├── .github └── workflows │ └── ci.yml ├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── build.sh ├── src ├── ucg.c ├── ucg.h └── ucg_tables.h └── tests ├── build_and_run.sh ├── test_data.c └── test_runner.c /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | 9 | jobs: 10 | build: 11 | runs-on: ${{ matrix.os }} 12 | 13 | strategy: 14 | fail-fast: false 15 | 16 | matrix: 17 | os: [ubuntu-latest, windows-latest] 18 | build_type: [Release] 19 | c_compiler: [gcc, clang, cl] 20 | include: 21 | - os: windows-latest 22 | c_compiler: cl 23 | cpp_compiler: cl 24 | - os: ubuntu-latest 25 | c_compiler: gcc 26 | cpp_compiler: g++ 27 | - os: ubuntu-latest 28 | c_compiler: clang 29 | cpp_compiler: clang++ 30 | exclude: 31 | - os: windows-latest 32 | c_compiler: gcc 33 | - os: windows-latest 34 | c_compiler: clang 35 | - os: ubuntu-latest 36 | c_compiler: cl 37 | 38 | steps: 39 | - uses: actions/checkout@v4 40 | 41 | - name: Set reusable strings 42 | # Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file. 43 | id: strings 44 | shell: bash 45 | run: | 46 | echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT" 47 | 48 | - name: Configure CMake 49 | # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. 50 | # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type 51 | run: > 52 | cmake -B ${{ steps.strings.outputs.build-output-dir }} 53 | -DCMAKE_CXX_COMPILER=${{ matrix.cpp_compiler }} 54 | -DCMAKE_C_COMPILER=${{ matrix.c_compiler }} 55 | -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} 56 | -S ${{ github.workspace }} 57 | 58 | - name: Build 59 | # Build your program with the given configuration. Note that --config is needed because the default Windows generator is a multi-config generator (Visual Studio generator). 60 | run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }} 61 | 62 | - name: Test 63 | working-directory: ${{ steps.strings.outputs.build-output-dir }} 64 | # Execute tests defined by the CMake configuration. Note that --build-config is needed because the default Windows generator is a multi-config generator (Visual Studio generator). 65 | # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail 66 | run: ctest --build-config ${{ matrix.build_type }} --verbose 67 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | lib/ 3 | tests/test_runner 4 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.15...3.29) 2 | 3 | project( 4 | UCG 5 | VERSION 0.1 6 | LANGUAGES C) 7 | 8 | include(CTest) 9 | 10 | if (MSVC) 11 | add_compile_options(/W4 /WX) 12 | else() 13 | add_compile_options(-Wall -Wextra -pedantic -Werror) 14 | endif() 15 | 16 | add_library(ucg src/ucg.c src/ucg.h) 17 | 18 | add_executable(test_runner tests/test_runner.c) 19 | 20 | target_link_libraries(test_runner PRIVATE ucg) 21 | add_test(NAME All_Tests COMMAND test_runner) 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2024 Feoramund 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # UCG 2 | 3 | ![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/Feoramund/ucg/ci.yml) 4 | 5 | **UCG** **C**ounts **G**raphemes. That is its one job, and it does it well. 6 | 7 | In short, UCG is a C99 implementation of grapheme segmentation from Unicode® 8 | Standard Annex #29 and width calculation from Annex #11. 9 | 10 | More simply put, it counts how many runes and graphemes are in UTF-8 encoded 11 | text and calculates how many monospace cells wide the text should be. Runes are 12 | Unicode codepoints, as opposed to individual bytes. Graphemes are individual 13 | units of a writing system, which can be composed of many runes, such as 14 | combining diacritics or emoji modifiers. 15 | 16 | It is designed to be simple and easy to use for other projects that need to 17 | segment text by grapheme cluster boundaries or calculate monospace width for 18 | terminal output. 19 | 20 | ## Conformity 21 | 22 | UCG passes the official Unicode test suite combined of 1,187 grapheme-based and 23 | 3,648 emoji-based test cases. It has been implemented based off of the Unicode 24 | version 15.1.0 specification, which was published in 2023. 25 | 26 | ## Performance 27 | 28 | UCG has not been thoroughly optimized. There are opportunities for it, but it's 29 | able to parse a file of nearly a million random Unicode codepoints in ~0.05 30 | seconds on my machine with `O2` optimization using the Clang C compiler. It 31 | parses a million random 7-bit ASCII characters in about ~0.01 seconds. This is 32 | hopefully fast enough for general-purpose use. 33 | 34 | ## Building 35 | 36 | UCG has no external dependencies. Drop right into your project and include. 37 | 38 | ## License 39 | 40 | UCG is licensed under the permissive BSD-3-Clause license. 41 | 42 | ## References 43 | 44 | - https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries 45 | - https://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table 46 | - https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules 47 | - https://www.unicode.org/reports/tr29/#Conformance 48 | - https://www.unicode.org/reports/tr11/ 49 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | # Build the UCG library. 3 | set -xe 4 | mkdir -p lib 5 | cc -c src/ucg.c -o lib/ucg.o -std=c99 -pedantic-errors -Wall -Werror 6 | ar rcs lib/libucg.a lib/ucg.o 7 | ranlib lib/libucg.a 8 | set +xe 9 | -------------------------------------------------------------------------------- /src/ucg.c: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2024 Feoramund 3 | * SPDX-License-Identifier: BSD-3-Clause 4 | */ 5 | #include 6 | #include 7 | 8 | #include "ucg.h" 9 | #include "ucg_tables.h" 10 | 11 | #define UCG_TABLE_LEN(t) (sizeof(t) / sizeof(ucg_rune)) 12 | 13 | #define ZERO_WIDTH_SPACE 0x200B 14 | #define ZERO_WIDTH_NON_JOINER 0x200C 15 | #define ZERO_WIDTH_JOINER 0x200D 16 | #define WORD_JOINER 0x2060 17 | 18 | void *ucg_default_malloc(intptr_t size, void *ctx) 19 | { 20 | (void)ctx; 21 | return malloc(size); 22 | } 23 | 24 | void *ucg_default_realloc(void* ptr, intptr_t old_size, intptr_t new_size, void *ctx) 25 | { 26 | (void)ctx; 27 | (void)old_size; 28 | return realloc(ptr, new_size); 29 | } 30 | 31 | void ucg_default_free(void *ptr, intptr_t size, void *ctx) 32 | { 33 | (void)ctx; 34 | (void)size; 35 | free(ptr); 36 | } 37 | 38 | const ucg_allocator ucg_default_allocator = { 39 | ucg_default_malloc, 40 | ucg_default_realloc, 41 | ucg_default_free, 42 | NULL, 43 | }; 44 | 45 | ucg_rune ucg_decode_rune(const uint8_t* str, ucg_int strlen, ucg_int* byte_iterator) { 46 | assert(str != NULL); 47 | assert(byte_iterator != NULL); 48 | 49 | const uint8_t* end = str + strlen; 50 | const uint8_t* c = str + *byte_iterator; 51 | 52 | if (c >= end) { 53 | return UCG_EOF; 54 | } 55 | 56 | *byte_iterator += 1; 57 | 58 | if (*c <= 0x7F) { 59 | return *c; 60 | } else { 61 | ucg_rune rune = 0; 62 | uint8_t first_byte = *c; 63 | 64 | // Check for well-formedness on the first byte. 65 | if (0x80 <= first_byte && first_byte < 0xC1) { return UCG_INVALID_RUNE; } 66 | else if (0xF5 <= first_byte && first_byte < 0xFF) { return UCG_INVALID_RUNE; } 67 | 68 | ucg_int more; 69 | if ((first_byte & 0xF8) == 0xF0) { more = 3; rune |= (first_byte & 0x07) << 18; } 70 | else if ((first_byte & 0xF0) == 0xE0) { more = 2; rune |= (first_byte & 0x0F) << 12; } 71 | else if ((first_byte & 0xE0) == 0xC0) { more = 1; rune |= (first_byte & 0x1F) << 6; } 72 | else { return UCG_INVALID_RUNE; } 73 | 74 | c += 1; 75 | if (c == end) { return UCG_EXPECTED_MORE_BYTES; } 76 | 77 | // Check for well-formedness on the second byte. 78 | if (first_byte == 0xE0 && *c < 0xA0) { return UCG_INVALID_RUNE; } 79 | else if (first_byte == 0xED && *c > 0x9F) { return UCG_INVALID_RUNE; } 80 | else if (first_byte == 0xF0 && *c < 0x90) { return UCG_INVALID_RUNE; } 81 | else if (first_byte == 0xF4 && *c > 0x8F) { return UCG_INVALID_RUNE; } 82 | 83 | c -= 1; 84 | 85 | for (more -= 1; more >= 0; more -= 1) { 86 | c += 1; *byte_iterator += 1; 87 | if (c == end) { return UCG_EXPECTED_MORE_BYTES; } 88 | rune |= (*c & 0x3F) << (more * 6); 89 | } 90 | 91 | return rune; 92 | } 93 | } 94 | 95 | ucg_int ucg_binary_search(ucg_rune value, const ucg_rune* table, ucg_int length, ucg_int stride) { 96 | assert(table != NULL); 97 | assert(length > 0); 98 | assert(stride > 0); 99 | 100 | ucg_int n = length; 101 | ucg_int t = 0; 102 | for (/**/; n > 1; /**/) { 103 | ucg_int m = n / 2; 104 | ucg_int p = t + m * stride; 105 | if (value >= table[p]) { 106 | t = p; 107 | n = n - m; 108 | } else { 109 | n = m; 110 | } 111 | } 112 | if (n != 0 && value >= table[t]) { 113 | return t; 114 | } 115 | return -1; 116 | } 117 | 118 | // 119 | // The procedures below are accurate as of Unicode 15.1.0. 120 | // 121 | 122 | bool ucg_is_control(ucg_rune r) { 123 | if (r <= 0x1F || (0x7F <= r && r <= 0x9F)) { 124 | return true; 125 | } 126 | return false; 127 | } 128 | 129 | // Emoji_Modifier 130 | bool ucg_is_emoji_modifier(ucg_rune r) { 131 | return 0x1F3FB <= r && r <= 0x1F3FF; 132 | } 133 | 134 | // Regional_Indicator 135 | bool ucg_is_regional_indicator(ucg_rune r) { 136 | return 0x1F1E6 <= r && r <= 0x1F1FF; 137 | } 138 | 139 | // General_Category=Enclosing_Mark 140 | bool ucg_is_enclosing_mark(ucg_rune r) { 141 | switch (r) { 142 | case 0x0488: 143 | case 0x0489: 144 | case 0x1ABE: 145 | return true; 146 | } 147 | 148 | if (0x20DD <= r && r <= 0x20E0) { return true; } 149 | if (0x20E2 <= r && r <= 0x20E4) { return true; } 150 | if (0xA670 <= r && r <= 0xA672) { return true; } 151 | 152 | return false; 153 | } 154 | 155 | // Prepended_Concatenation_Mark 156 | bool ucg_is_prepended_concatenation_mark(ucg_rune r) { 157 | switch (r) { 158 | case 0x006DD: 159 | case 0x0070F: 160 | case 0x008E2: 161 | case 0x110BD: 162 | case 0x110CD: 163 | return true; 164 | } 165 | 166 | if (0x00600 <= r && r <= 0x00605) { return true; } 167 | if (0x00890 <= r && r <= 0x00891) { return true; } 168 | 169 | return false; 170 | } 171 | 172 | // General_Category=Spacing_Mark 173 | bool ucg_is_spacing_mark(ucg_rune r) { 174 | intptr_t p = ucg_binary_search(r, ucg_spacing_mark_ranges, UCG_TABLE_LEN(ucg_spacing_mark_ranges)/2, 2); 175 | if (p >= 0 && ucg_spacing_mark_ranges[p] <= r && r <= ucg_spacing_mark_ranges[p+1]) { 176 | return true; 177 | } 178 | return false; 179 | } 180 | 181 | // General_Category=Nonspacing_Mark 182 | bool ucg_is_nonspacing_mark(ucg_rune r) { 183 | intptr_t p = ucg_binary_search(r, ucg_nonspacing_mark_ranges, UCG_TABLE_LEN(ucg_nonspacing_mark_ranges)/2, 2); 184 | if (p >= 0 && ucg_nonspacing_mark_ranges[p] <= r && r <= ucg_nonspacing_mark_ranges[p+1]) { 185 | return true; 186 | } 187 | return false; 188 | } 189 | 190 | // Extended_Pictographic 191 | bool ucg_is_emoji_extended_pictographic(ucg_rune r) { 192 | intptr_t p = ucg_binary_search(r, ucg_emoji_extended_pictographic_ranges, UCG_TABLE_LEN(ucg_emoji_extended_pictographic_ranges)/2, 2); 193 | if (p >= 0 && ucg_emoji_extended_pictographic_ranges[p] <= r && r <= ucg_emoji_extended_pictographic_ranges[p+1]) { 194 | return true; 195 | } 196 | return false; 197 | } 198 | 199 | // Grapheme_Extend 200 | bool ucg_is_grapheme_extend(ucg_rune r) { 201 | intptr_t p = ucg_binary_search(r, ucg_grapheme_extend_ranges, UCG_TABLE_LEN(ucg_grapheme_extend_ranges)/2, 2); 202 | if (p >= 0 && ucg_grapheme_extend_ranges[p] <= r && r <= ucg_grapheme_extend_ranges[p+1]) { 203 | return true; 204 | } 205 | return false; 206 | } 207 | 208 | 209 | // Hangul_Syllable_Type=Leading_Jamo 210 | bool ucg_is_hangul_syllable_leading(ucg_rune r) { 211 | return (0x1100 <= r && r <= 0x115F) || (0xA960 <= r && r <= 0xA97C); 212 | } 213 | 214 | // Hangul_Syllable_Type=Vowel_Jamo 215 | bool ucg_is_hangul_syllable_vowel(ucg_rune r) { 216 | return (0x1160 <= r && r <= 0x11A7) || (0xD7B0 <= r && r <= 0xD7C6); 217 | } 218 | 219 | // Hangul_Syllable_Type=Trailing_Jamo 220 | bool ucg_is_hangul_syllable_trailing(ucg_rune r) { 221 | return (0x11A8 <= r && r <= 0x11FF) || (0xD7CB <= r && r <= 0xD7FB); 222 | } 223 | 224 | // Hangul_Syllable_Type=LV_Syllable 225 | bool ucg_is_hangul_syllable_lv(ucg_rune r) { 226 | intptr_t p = ucg_binary_search(r, ucg_hangul_syllable_lv_singlets, UCG_TABLE_LEN(ucg_hangul_syllable_lv_singlets), 1); 227 | if (p >= 0 && r == ucg_hangul_syllable_lv_singlets[p]) { 228 | return true; 229 | } 230 | return false; 231 | } 232 | 233 | // Hangul_Syllable_Type=LVT_Syllable 234 | bool ucg_is_hangul_syllable_lvt(ucg_rune r) { 235 | intptr_t p = ucg_binary_search(r, ucg_hangul_syllable_lvt_ranges, UCG_TABLE_LEN(ucg_hangul_syllable_lvt_ranges)/2, 2); 236 | if (p >= 0 && ucg_hangul_syllable_lvt_ranges[p] <= r && r <= ucg_hangul_syllable_lvt_ranges[p+1]) { 237 | return true; 238 | } 239 | return false; 240 | } 241 | 242 | 243 | // Indic_Syllabic_Category=Consonant_Preceding_Repha 244 | bool ucg_is_indic_consonant_preceding_repha(ucg_rune r) { 245 | switch (r) { 246 | case 0x00D4E: 247 | case 0x11941: 248 | case 0x11D46: 249 | case 0x11F02: 250 | return true; 251 | } 252 | return false; 253 | } 254 | 255 | // Indic_Syllabic_Category=Consonant_Prefixed 256 | bool ucg_is_indic_consonant_prefixed(ucg_rune r) { 257 | switch (r) { 258 | case 0x1193F: 259 | case 0x11A3A: 260 | return true; 261 | } 262 | 263 | if (0x111C2 <= r && r <= 0x111C3) { return true; } 264 | if (0x11A84 <= r && r <= 0x11A89) { return true; } 265 | 266 | return false; 267 | } 268 | 269 | // Indic_Conjunct_Break=Linker 270 | bool ucg_is_indic_conjunct_break_linker(ucg_rune r) { 271 | switch (r) { 272 | case 0x094D: 273 | case 0x09CD: 274 | case 0x0ACD: 275 | case 0x0B4D: 276 | case 0x0C4D: 277 | case 0x0D4D: 278 | return true; 279 | } 280 | return false; 281 | } 282 | 283 | // Indic_Conjunct_Break=Consonant 284 | bool ucg_is_indic_conjunct_break_consonant(ucg_rune r) { 285 | intptr_t p = ucg_binary_search(r, ucg_indic_conjunct_break_consonant_ranges, UCG_TABLE_LEN(ucg_indic_conjunct_break_consonant_ranges)/2, 2); 286 | if (p >= 0 && ucg_indic_conjunct_break_consonant_ranges[p] <= r && r <= ucg_indic_conjunct_break_consonant_ranges[p+1]) { 287 | return true; 288 | } 289 | return false; 290 | } 291 | 292 | // Indic_Conjunct_Break=Extend 293 | bool ucg_is_indic_conjunct_break_extend(ucg_rune r) { 294 | intptr_t p = ucg_binary_search(r, ucg_indic_conjunct_break_extend_ranges, UCG_TABLE_LEN(ucg_indic_conjunct_break_extend_ranges)/2, 2); 295 | if (p >= 0 && ucg_indic_conjunct_break_extend_ranges[p] <= r && r <= ucg_indic_conjunct_break_extend_ranges[p+1]) { 296 | return true; 297 | } 298 | return false; 299 | } 300 | 301 | 302 | /* 303 | ``` 304 | Indic_Syllabic_Category = Consonant_Preceding_Repha, or 305 | Indic_Syllabic_Category = Consonant_Prefixed, or 306 | Prepended_Concatenation_Mark = Yes 307 | ``` 308 | */ 309 | bool ucg_is_gcb_prepend_class(ucg_rune r) { 310 | return ucg_is_indic_consonant_preceding_repha(r) || ucg_is_indic_consonant_prefixed(r) || ucg_is_prepended_concatenation_mark(r); 311 | } 312 | 313 | /* 314 | ``` 315 | Grapheme_Extend = Yes, or 316 | Emoji_Modifier = Yes 317 | 318 | This includes: 319 | General_Category = Nonspacing_Mark 320 | General_Category = Enclosing_Mark 321 | U+200C ZERO WIDTH NON-JOINER 322 | 323 | plus a few General_Category = Spacing_Mark needed for canonical equivalence. 324 | ``` 325 | */ 326 | bool ucg_is_gcb_extend_class(ucg_rune r) { 327 | return ucg_is_grapheme_extend(r) || ucg_is_emoji_modifier(r); 328 | } 329 | 330 | // Return values: 331 | // 332 | // - 2 if East_Asian_Width=F or W, or 333 | // - 0 if non-printable / zero-width, or 334 | // - 1 in all other cases. 335 | // 336 | ucg_int ucg_normalized_east_asian_width(ucg_rune r) { 337 | if (ucg_is_control(r)) { 338 | return 0; 339 | } else if (r <= 0x10FF) { 340 | // Easy early out for low runes. 341 | return 1; 342 | } 343 | 344 | switch (r) { 345 | // This is a different interpretation of the BOM which occurs in the middle of text. 346 | case 0xFEFF: /* ZERO_WIDTH_NO_BREAK_SPACE */ 347 | case ZERO_WIDTH_SPACE: 348 | case ZERO_WIDTH_NON_JOINER: 349 | case ZERO_WIDTH_JOINER: 350 | case WORD_JOINER: 351 | return 0; 352 | } 353 | 354 | intptr_t p = ucg_binary_search(r, ucg_normalized_east_asian_width_ranges, UCG_TABLE_LEN(ucg_normalized_east_asian_width_ranges)/3, 3); 355 | if (p >= 0 && ucg_normalized_east_asian_width_ranges[p] <= r && r <= ucg_normalized_east_asian_width_ranges[p+1]) { 356 | return (ucg_int)ucg_normalized_east_asian_width_ranges[p+2]; 357 | } 358 | return 1; 359 | } 360 | 361 | // 362 | // End of Unicode 15.1.0 block. 363 | // 364 | 365 | enum grapheme_cluster_sequence { 366 | None, 367 | Indic, 368 | Emoji, 369 | Regional, 370 | }; 371 | 372 | typedef struct { 373 | ucg_grapheme* graphemes; 374 | ucg_int rune_count; 375 | ucg_int grapheme_count; 376 | ucg_int width; 377 | 378 | ucg_rune last_rune; 379 | bool last_rune_breaks_forward; 380 | 381 | ucg_int last_width; 382 | ucg_int last_grapheme_count; 383 | 384 | bool bypass_next_rune; 385 | 386 | ucg_int regional_indicator_counter; 387 | 388 | enum grapheme_cluster_sequence current_sequence; 389 | bool continue_sequence; 390 | } ucg_decoder_state; 391 | 392 | 393 | ucg_int ucg_grapheme_count( 394 | const uint8_t* str, 395 | ucg_int str_len, 396 | 397 | ucg_int* out_runes, 398 | ucg_int* out_graphemes, 399 | ucg_int* out_width 400 | ) { 401 | return ucg_decode_grapheme_clusters(NULL, str, str_len, NULL, out_runes, out_graphemes, out_width); 402 | } 403 | 404 | void _ucg_decode_grapheme_clusters_deferred_step( 405 | ucg_allocator* allocator, 406 | ucg_decoder_state* state, 407 | ucg_int byte_index, 408 | ucg_rune this_rune 409 | ) { 410 | // "Break at the start and end of text, unless the text is empty." 411 | // 412 | // GB1: sot ÷ Any 413 | // GB2: Any ÷ eot 414 | if (state->rune_count == 0 && state->grapheme_count == 0) { 415 | state->grapheme_count += 1; 416 | } 417 | 418 | if (state->grapheme_count > state->last_grapheme_count) { 419 | state->width += ucg_normalized_east_asian_width(this_rune); 420 | 421 | if (allocator != NULL) { 422 | state->graphemes = (ucg_grapheme*)allocator->realloc( 423 | state->graphemes, 424 | sizeof(ucg_grapheme) * (state->grapheme_count), 425 | sizeof(ucg_grapheme) * (1 + state->grapheme_count), 426 | allocator->ctx); 427 | 428 | ucg_grapheme append = { 429 | byte_index, 430 | state->rune_count, 431 | state->width - state->last_width, 432 | }; 433 | 434 | state->graphemes[state->grapheme_count - 1] = append; 435 | } 436 | 437 | state->last_grapheme_count = state->grapheme_count; 438 | state->last_width = state->width; 439 | } 440 | 441 | state->last_rune = this_rune; 442 | state->rune_count += 1; 443 | 444 | if (!state->continue_sequence) { 445 | state->current_sequence = None; 446 | state->regional_indicator_counter = 0; 447 | } 448 | state->continue_sequence = false; 449 | } 450 | 451 | ucg_int ucg_decode_grapheme_clusters( 452 | ucg_allocator* allocator, 453 | const uint8_t* str, 454 | ucg_int str_len, 455 | 456 | ucg_grapheme** out_graphemes, 457 | ucg_int* out_rune_count, 458 | ucg_int* out_grapheme_count, 459 | ucg_int* out_width 460 | ) { 461 | // The following procedure implements text segmentation by breaking on 462 | // Grapheme Cluster Boundaries[1], using the values[2] and rules[3] from 463 | // the Unicode® Standard Annex #29, entitled: 464 | // 465 | // UNICODE TEXT SEGMENTATION 466 | // 467 | // Version: Unicode 15.1.0 468 | // Date: 2023-08-16 469 | // Revision: 43 470 | // 471 | // This procedure is conformant[4] to UAX29-C1-1, otherwise known as the 472 | // extended, non-legacy ruleset. 473 | // 474 | // Please see the references for more information. 475 | // 476 | // 477 | // [1]: https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries 478 | // [2]: https://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table 479 | // [3]: https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules 480 | // [4]: https://www.unicode.org/reports/tr29/#Conformance 481 | 482 | // Additionally, this procedure takes into account Standard Annex #11, 483 | // in order to estimate how visually wide the string will appear on a 484 | // monospaced display. This can only ever be a rough guess, as this tends 485 | // to be an implementation detail relating to which fonts are being used, 486 | // how codepoints are interpreted and drawn, if codepoint sequences are 487 | // interpreted correctly, and et cetera. 488 | // 489 | // For example, a program may not properly interpret an emoji modifier 490 | // sequence and print the component glyphs instead of one whole glyph. 491 | // 492 | // See here for more information: https://www.unicode.org/reports/tr11/ 493 | // 494 | // NOTE: There is no explicit mention of what to do with zero-width spaces 495 | // as far as grapheme cluster segmentation goes, therefore this 496 | // implementation may count and return graphemes with a `width` of zero. 497 | // 498 | // Treat them as any other space. 499 | assert(allocator == NULL || out_graphemes != NULL); 500 | 501 | ucg_decoder_state state = {0}; 502 | 503 | #define UCG_DEFERRED_DECODE_STEP() (_ucg_decode_grapheme_clusters_deferred_step(allocator, &state, byte_index, this_rune)) 504 | 505 | for (ucg_int byte_index = 0, byte_iterator = 0; byte_index < str_len; byte_index = byte_iterator) { 506 | ucg_rune this_rune = ucg_decode_rune(str, str_len, &byte_iterator); 507 | if (this_rune < 0) { 508 | // There was a Unicode parsing error; bail out. 509 | if (out_graphemes != NULL) { *out_graphemes = state.graphemes; } 510 | if (out_rune_count != NULL) { *out_rune_count = state.rune_count; } 511 | if (out_grapheme_count != NULL) { *out_grapheme_count = state.grapheme_count; } 512 | if (out_width != NULL) { *out_width = state.width; } 513 | 514 | // Return the error. 515 | return (ucg_int)this_rune; 516 | } 517 | 518 | // "Do not break between a CR and LF. Otherwise, break before and after controls." 519 | // 520 | // GB3: CR × LF 521 | // GB4: (Control | CR | LF) ÷ 522 | // GB5: ÷ (Control | CR | LF) 523 | if (this_rune == '\n' && state.last_rune == '\r') { 524 | state.last_rune_breaks_forward = false; 525 | state.bypass_next_rune = false; 526 | UCG_DEFERRED_DECODE_STEP(); continue; 527 | } 528 | 529 | if (ucg_is_control(this_rune)) { 530 | state.grapheme_count += 1; 531 | state.last_rune_breaks_forward = true; 532 | state.bypass_next_rune = true; 533 | UCG_DEFERRED_DECODE_STEP(); continue; 534 | } 535 | 536 | // (This check is for rules that work forwards, instead of backwards.) 537 | if (state.bypass_next_rune) { 538 | if (state.last_rune_breaks_forward) { 539 | state.grapheme_count += 1; 540 | state.last_rune_breaks_forward = false; 541 | } 542 | 543 | state.bypass_next_rune = false; 544 | UCG_DEFERRED_DECODE_STEP(); continue; 545 | } 546 | 547 | // (Optimization 1: Prevent low runes from proceeding further.) 548 | // 549 | // * 0xA9 and 0xAE are in the Extended_Pictographic range, 550 | // which is checked later in GB11. 551 | if (this_rune != 0xA9 && this_rune != 0xAE && this_rune <= 0x2FF) { 552 | state.grapheme_count += 1; 553 | UCG_DEFERRED_DECODE_STEP(); continue; 554 | } 555 | 556 | // (Optimization 2: Check if the rune is in the Hangul space before getting specific.) 557 | if (0x1100 <= this_rune && this_rune <= 0xD7FB) { 558 | // "Do not break Hangul syllable sequences." 559 | // 560 | // GB6: L × (L | V | LV | LVT) 561 | // GB7: (LV | V) × (V | T) 562 | // GB8: (LVT | T) × T 563 | if (ucg_is_hangul_syllable_leading(this_rune) || 564 | ucg_is_hangul_syllable_lv(this_rune) || 565 | ucg_is_hangul_syllable_lvt(this_rune)) 566 | { 567 | if (!ucg_is_hangul_syllable_leading(state.last_rune)) { 568 | state.grapheme_count += 1; 569 | } 570 | UCG_DEFERRED_DECODE_STEP(); continue; 571 | } 572 | 573 | if (ucg_is_hangul_syllable_vowel(this_rune)) { 574 | if (ucg_is_hangul_syllable_leading(state.last_rune) || 575 | ucg_is_hangul_syllable_vowel(state.last_rune) || 576 | ucg_is_hangul_syllable_lv(state.last_rune)) 577 | { 578 | UCG_DEFERRED_DECODE_STEP(); continue; 579 | } 580 | state.grapheme_count += 1; 581 | UCG_DEFERRED_DECODE_STEP(); continue; 582 | } 583 | 584 | if (ucg_is_hangul_syllable_trailing(this_rune)) { 585 | if (ucg_is_hangul_syllable_trailing(state.last_rune) || 586 | ucg_is_hangul_syllable_lvt(state.last_rune) || 587 | ucg_is_hangul_syllable_lv(state.last_rune) || 588 | ucg_is_hangul_syllable_vowel(state.last_rune)) 589 | { 590 | UCG_DEFERRED_DECODE_STEP(); continue; 591 | } 592 | state.grapheme_count += 1; 593 | UCG_DEFERRED_DECODE_STEP(); continue; 594 | } 595 | } 596 | 597 | // "Do not break before extending characters or ZWJ." 598 | // 599 | // GB9: × (Extend | ZWJ) 600 | if (this_rune == ZERO_WIDTH_JOINER) { 601 | state.continue_sequence = true; 602 | UCG_DEFERRED_DECODE_STEP(); continue; 603 | } 604 | 605 | if (ucg_is_gcb_extend_class(this_rune)) { 606 | // (Support for GB9c.) 607 | if (state.current_sequence == Indic) { 608 | if (ucg_is_indic_conjunct_break_extend(this_rune) && ( 609 | ucg_is_indic_conjunct_break_linker(state.last_rune) || 610 | ucg_is_indic_conjunct_break_consonant(state.last_rune) )) 611 | { 612 | state.continue_sequence = true; 613 | UCG_DEFERRED_DECODE_STEP(); continue; 614 | } 615 | 616 | if (ucg_is_indic_conjunct_break_linker(this_rune) && ( 617 | ucg_is_indic_conjunct_break_linker(state.last_rune) || 618 | ucg_is_indic_conjunct_break_extend(state.last_rune) || 619 | ucg_is_indic_conjunct_break_consonant(state.last_rune) )) 620 | { 621 | state.continue_sequence = true; 622 | UCG_DEFERRED_DECODE_STEP(); continue; 623 | } 624 | 625 | UCG_DEFERRED_DECODE_STEP(); continue; 626 | } 627 | 628 | // (Support for GB11.) 629 | if (state.current_sequence == Emoji && ( 630 | ucg_is_gcb_extend_class(state.last_rune) || 631 | ucg_is_emoji_extended_pictographic(state.last_rune) )) 632 | { 633 | state.continue_sequence = true; 634 | } 635 | 636 | UCG_DEFERRED_DECODE_STEP(); continue; 637 | } 638 | 639 | // _The GB9a and GB9b rules only apply to extended grapheme clusters:_ 640 | // "Do not break before SpacingMarks, or after Prepend characters." 641 | // 642 | // GB9a: × SpacingMark 643 | // GB9b: Prepend × 644 | if (ucg_is_spacing_mark(this_rune)) { 645 | UCG_DEFERRED_DECODE_STEP(); continue; 646 | } 647 | 648 | if (ucg_is_gcb_prepend_class(this_rune)) { 649 | state.grapheme_count += 1; 650 | state.bypass_next_rune = true; 651 | UCG_DEFERRED_DECODE_STEP(); continue; 652 | } 653 | 654 | // _The GB9c rule only applies to extended grapheme clusters:_ 655 | // "Do not break within certain combinations with Indic_Conjunct_Break (InCB)=Linker." 656 | // 657 | // GB9c: \p{InCB=Consonant} [ \p{InCB=Extend} \p{InCB=Linker} ]* \p{InCB=Linker} [ \p{InCB=Extend} \p{InCB=Linker} ]* × \p{InCB=Consonant} 658 | if (ucg_is_indic_conjunct_break_consonant(this_rune)) { 659 | if (state.current_sequence == Indic) { 660 | if (state.last_rune == ZERO_WIDTH_JOINER || 661 | ucg_is_indic_conjunct_break_linker(state.last_rune)) 662 | { 663 | state.continue_sequence = true; 664 | } else { 665 | state.grapheme_count += 1; 666 | } 667 | } else { 668 | state.grapheme_count += 1; 669 | state.current_sequence = Indic; 670 | state.continue_sequence = true; 671 | } 672 | UCG_DEFERRED_DECODE_STEP(); continue; 673 | } 674 | 675 | if (ucg_is_indic_conjunct_break_extend(this_rune)) { 676 | if (state.current_sequence == Indic) { 677 | if (ucg_is_indic_conjunct_break_consonant(state.last_rune) || 678 | ucg_is_indic_conjunct_break_linker(state.last_rune)) 679 | { 680 | state.continue_sequence = true; 681 | } else { 682 | state.grapheme_count += 1; 683 | } 684 | } 685 | UCG_DEFERRED_DECODE_STEP(); continue; 686 | } 687 | 688 | if (ucg_is_indic_conjunct_break_linker(this_rune)) { 689 | if (state.current_sequence == Indic) { 690 | if (ucg_is_indic_conjunct_break_extend(state.last_rune) || 691 | ucg_is_indic_conjunct_break_linker(state.last_rune)) 692 | { 693 | state.continue_sequence = true; 694 | } else { 695 | state.grapheme_count += 1; 696 | } 697 | } 698 | UCG_DEFERRED_DECODE_STEP(); continue; 699 | } 700 | 701 | // 702 | // (Curiously, there is no GB10.) 703 | // 704 | 705 | // "Do not break within emoji modifier sequences or emoji zwj sequences." 706 | // 707 | // GB11: \p{Extended_Pictographic} Extend* ZWJ × \p{Extended_Pictographic} 708 | if (ucg_is_emoji_extended_pictographic(this_rune)) { 709 | if (state.current_sequence != Emoji || state.last_rune != ZERO_WIDTH_JOINER) { 710 | state.grapheme_count += 1; 711 | } 712 | state.current_sequence = Emoji; 713 | state.continue_sequence = true; 714 | UCG_DEFERRED_DECODE_STEP(); continue; 715 | } 716 | 717 | // "Do not break within emoji flag sequences. 718 | // That is, do not break between regional indicator (RI) symbols 719 | // if there is an odd number of RI characters before the break point." 720 | // 721 | // GB12: sot (RI RI)* RI × RI 722 | // GB13: [^RI] (RI RI)* RI × RI 723 | if (ucg_is_regional_indicator(this_rune)) { 724 | if ((state.regional_indicator_counter & 1) == 0) { 725 | state.grapheme_count += 1; 726 | } 727 | 728 | state.current_sequence = Regional; 729 | state.continue_sequence = true; 730 | state.regional_indicator_counter += 1; 731 | 732 | UCG_DEFERRED_DECODE_STEP(); continue; 733 | } 734 | 735 | // "Otherwise, break everywhere." 736 | // 737 | // GB999: Any ÷ Any 738 | state.grapheme_count += 1; 739 | UCG_DEFERRED_DECODE_STEP(); 740 | } 741 | 742 | #undef UCG_DEFERRED_DECODE_STEP 743 | 744 | if (out_graphemes != NULL) { *out_graphemes = state.graphemes; } 745 | if (out_rune_count != NULL) { *out_rune_count = state.rune_count; } 746 | if (out_grapheme_count != NULL) { *out_grapheme_count = state.grapheme_count; } 747 | if (out_width != NULL) { *out_width = state.width; } 748 | 749 | return 0; 750 | } 751 | -------------------------------------------------------------------------------- /src/ucg.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2024 Feoramund 3 | * SPDX-License-Identifier: BSD-3-Clause 4 | */ 5 | #ifndef _UCG_INCLUDED 6 | #define _UCG_INCLUDED 7 | 8 | #ifdef __cplusplus 9 | extern "C" { 10 | #endif 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | /* For the sake of compatibility, the general-purpose integers used throughout 17 | * this library are defined to be at least 32-bits large but may be larger. */ 18 | typedef int_fast32_t ucg_int; 19 | 20 | /* This is UCG's custom allocator structure, allowing you to specify precisely 21 | * how `ucg_decode_grapheme_clusters` allocates its memory. 22 | * 23 | * An arena or stack allocator that supports fast realloc is recommended. */ 24 | typedef struct { 25 | void* (*malloc)(intptr_t size, void* ctx); 26 | void* (*realloc)(void* ptr, intptr_t old_size, intptr_t new_size, void* ctx); 27 | void (*free)(void* ptr, intptr_t size, void* ctx); 28 | 29 | void* ctx; 30 | } ucg_allocator; 31 | 32 | void *ucg_default_malloc(intptr_t size, void *ctx); 33 | void *ucg_default_realloc(void* ptr, intptr_t old_size, intptr_t new_size, void *ctx); 34 | void ucg_default_free(void *ptr, intptr_t size, void *ctx); 35 | 36 | /* This is the default allocator. Pass this if you don't mind using malloc. */ 37 | extern const ucg_allocator ucg_default_allocator; 38 | 39 | /* This is the data that is allocated when an allocator is passed to 40 | * ucg_decode_grapheme_clusters. */ 41 | typedef struct { 42 | ucg_int byte_index; 43 | ucg_int rune_index; 44 | ucg_int width; 45 | } ucg_grapheme; 46 | 47 | /* This procedure merely counts the runes, graphemes, and width without 48 | * incurring any allocations. It is a thin wrapper over 49 | * ucg_decode_grapheme_clusters that passes NULL for an allocator. */ 50 | ucg_int ucg_grapheme_count( 51 | const uint8_t* str, 52 | ucg_int str_len, 53 | 54 | ucg_int* out_runes, 55 | ucg_int* out_graphemes, 56 | ucg_int* out_width); 57 | 58 | /* This is the heart of the library. If you want specific information about each 59 | * grapheme, you can pass an allocator and a pointer to a `grapheme*` variable 60 | * where the allocated data will be passed to. 61 | * 62 | * Otherwise, if NULL is passed for an allocator, no allocation will occur, and 63 | * no data will be written to `out_graphemes`. 64 | * 65 | * Each `out_` argument may be NULL to signify that you do not want the data, 66 | * except for `out_graphemes`. The presence of an allocator is used to determine 67 | * whether or not to allocate the extra data. Therefore, if an allocator is 68 | * passed, `out_graphemes` must point to a valid `grapheme*` variable. 69 | * 70 | * The return value is 0 on success or negative (one of the error values below) 71 | * if there was a trouble with parsing the string as UTF-8. */ 72 | ucg_int ucg_decode_grapheme_clusters( 73 | ucg_allocator* allocator, 74 | const uint8_t* str, 75 | ucg_int str_len, 76 | 77 | ucg_grapheme** out_graphemes, 78 | ucg_int* out_rune_count, 79 | ucg_int* out_grapheme_count, 80 | ucg_int* out_width); 81 | 82 | 83 | /* These procedures are part of how UCG decodes graphemes, and as such, they are 84 | * made public here in the event that they are useful. */ 85 | 86 | typedef ucg_int ucg_rune; 87 | 88 | #define UCG_EOF (-1) 89 | #define UCG_EXPECTED_MORE_BYTES (-2) 90 | #define UCG_INVALID_RUNE (-3) 91 | 92 | /* This procedure decodes a byte string and returns a valid Unicode codepoint or 93 | * one of the errors above. The byte iterator is increased as needed while 94 | * reading the string. */ 95 | ucg_rune ucg_decode_rune(const uint8_t* str, ucg_int str_len, ucg_int* byte_iterator); 96 | 97 | /* The following procedures all return true or false based on whether a Unicode 98 | * codepoint fits into a certain class. */ 99 | 100 | bool ucg_is_control (ucg_rune r); 101 | bool ucg_is_emoji_modifier (ucg_rune r); 102 | bool ucg_is_regional_indicator (ucg_rune r); 103 | bool ucg_is_enclosing_mark (ucg_rune r); 104 | bool ucg_is_prepended_concatenation_mark (ucg_rune r); 105 | bool ucg_is_spacing_mark (ucg_rune r); 106 | bool ucg_is_nonspacing_mark (ucg_rune r); 107 | bool ucg_is_emoji_extended_pictographic (ucg_rune r); 108 | bool ucg_is_grapheme_extend (ucg_rune r); 109 | bool ucg_is_hangul_syllable_leading (ucg_rune r); 110 | bool ucg_is_hangul_syllable_vowel (ucg_rune r); 111 | bool ucg_is_hangul_syllable_trailing (ucg_rune r); 112 | bool ucg_is_hangul_syllable_lv (ucg_rune r); 113 | bool ucg_is_hangul_syllable_lvt (ucg_rune r); 114 | bool ucg_is_indic_consonant_preceding_repha (ucg_rune r); 115 | bool ucg_is_indic_consonant_prefixed (ucg_rune r); 116 | bool ucg_is_indic_conjunct_break_linker (ucg_rune r); 117 | bool ucg_is_indic_conjunct_break_consonant (ucg_rune r); 118 | bool ucg_is_indic_conjunct_break_extend (ucg_rune r); 119 | bool ucg_is_gcb_prepend_class (ucg_rune r); 120 | bool ucg_is_gcb_extend_class (ucg_rune r); 121 | 122 | #ifdef __cplusplus 123 | } 124 | #endif 125 | 126 | #endif /* _UCG_INCLUDED */ 127 | -------------------------------------------------------------------------------- /src/ucg_tables.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2024 Feoramund 3 | * SPDX-License-Identifier: BSD-3-Clause 4 | */ 5 | #ifndef _UCG_TABLES_INCLUDED 6 | #define _UCG_TABLES_INCLUDED 7 | 8 | #ifdef __cplusplus 9 | extern "C" { 10 | #endif 11 | 12 | #include "ucg.h" 13 | 14 | // 15 | // The tables below are accurate as of Unicode 15.1.0. 16 | // 17 | 18 | static const ucg_rune ucg_spacing_mark_ranges[] = { 19 | 0x0903, 0x0903, 20 | 0x093B, 0x093B, 21 | 0x093E, 0x0940, 22 | 0x0949, 0x094C, 23 | 0x094E, 0x094F, 24 | 0x0982, 0x0983, 25 | 0x09BE, 0x09C0, 26 | 0x09C7, 0x09C8, 27 | 0x09CB, 0x09CC, 28 | 0x09D7, 0x09D7, 29 | 0x0A03, 0x0A03, 30 | 0x0A3E, 0x0A40, 31 | 0x0A83, 0x0A83, 32 | 0x0ABE, 0x0AC0, 33 | 0x0AC9, 0x0AC9, 34 | 0x0ACB, 0x0ACC, 35 | 0x0B02, 0x0B03, 36 | 0x0B3E, 0x0B3E, 37 | 0x0B40, 0x0B40, 38 | 0x0B47, 0x0B48, 39 | 0x0B4B, 0x0B4C, 40 | 0x0B57, 0x0B57, 41 | 0x0BBE, 0x0BBF, 42 | 0x0BC1, 0x0BC2, 43 | 0x0BC6, 0x0BC8, 44 | 0x0BCA, 0x0BCC, 45 | 0x0BD7, 0x0BD7, 46 | 0x0C01, 0x0C03, 47 | 0x0C41, 0x0C44, 48 | 0x0C82, 0x0C83, 49 | 0x0CBE, 0x0CBE, 50 | 0x0CC0, 0x0CC4, 51 | 0x0CC7, 0x0CC8, 52 | 0x0CCA, 0x0CCB, 53 | 0x0CD5, 0x0CD6, 54 | 0x0CF3, 0x0CF3, 55 | 0x0D02, 0x0D03, 56 | 0x0D3E, 0x0D40, 57 | 0x0D46, 0x0D48, 58 | 0x0D4A, 0x0D4C, 59 | 0x0D57, 0x0D57, 60 | 0x0D82, 0x0D83, 61 | 0x0DCF, 0x0DD1, 62 | 0x0DD8, 0x0DDF, 63 | 0x0DF2, 0x0DF3, 64 | 0x0F3E, 0x0F3F, 65 | 0x0F7F, 0x0F7F, 66 | 0x102B, 0x102C, 67 | 0x1031, 0x1031, 68 | 0x1038, 0x1038, 69 | 0x103B, 0x103C, 70 | 0x1056, 0x1057, 71 | 0x1062, 0x1064, 72 | 0x1067, 0x106D, 73 | 0x1083, 0x1084, 74 | 0x1087, 0x108C, 75 | 0x108F, 0x108F, 76 | 0x109A, 0x109C, 77 | 0x1715, 0x1715, 78 | 0x1734, 0x1734, 79 | 0x17B6, 0x17B6, 80 | 0x17BE, 0x17C5, 81 | 0x17C7, 0x17C8, 82 | 0x1923, 0x1926, 83 | 0x1929, 0x192B, 84 | 0x1930, 0x1931, 85 | 0x1933, 0x1938, 86 | 0x1A19, 0x1A1A, 87 | 0x1A55, 0x1A55, 88 | 0x1A57, 0x1A57, 89 | 0x1A61, 0x1A61, 90 | 0x1A63, 0x1A64, 91 | 0x1A6D, 0x1A72, 92 | 0x1B04, 0x1B04, 93 | 0x1B35, 0x1B35, 94 | 0x1B3B, 0x1B3B, 95 | 0x1B3D, 0x1B41, 96 | 0x1B43, 0x1B44, 97 | 0x1B82, 0x1B82, 98 | 0x1BA1, 0x1BA1, 99 | 0x1BA6, 0x1BA7, 100 | 0x1BAA, 0x1BAA, 101 | 0x1BE7, 0x1BE7, 102 | 0x1BEA, 0x1BEC, 103 | 0x1BEE, 0x1BEE, 104 | 0x1BF2, 0x1BF3, 105 | 0x1C24, 0x1C2B, 106 | 0x1C34, 0x1C35, 107 | 0x1CE1, 0x1CE1, 108 | 0x1CF7, 0x1CF7, 109 | 0x302E, 0x302F, 110 | 0xA823, 0xA824, 111 | 0xA827, 0xA827, 112 | 0xA880, 0xA881, 113 | 0xA8B4, 0xA8C3, 114 | 0xA952, 0xA953, 115 | 0xA983, 0xA983, 116 | 0xA9B4, 0xA9B5, 117 | 0xA9BA, 0xA9BB, 118 | 0xA9BE, 0xA9C0, 119 | 0xAA2F, 0xAA30, 120 | 0xAA33, 0xAA34, 121 | 0xAA4D, 0xAA4D, 122 | 0xAA7B, 0xAA7B, 123 | 0xAA7D, 0xAA7D, 124 | 0xAAEB, 0xAAEB, 125 | 0xAAEE, 0xAAEF, 126 | 0xAAF5, 0xAAF5, 127 | 0xABE3, 0xABE4, 128 | 0xABE6, 0xABE7, 129 | 0xABE9, 0xABEA, 130 | 0xABEC, 0xABEC, 131 | 0x11000, 0x11000, 132 | 0x11002, 0x11002, 133 | 0x11082, 0x11082, 134 | 0x110B0, 0x110B2, 135 | 0x110B7, 0x110B8, 136 | 0x1112C, 0x1112C, 137 | 0x11145, 0x11146, 138 | 0x11182, 0x11182, 139 | 0x111B3, 0x111B5, 140 | 0x111BF, 0x111C0, 141 | 0x111CE, 0x111CE, 142 | 0x1122C, 0x1122E, 143 | 0x11232, 0x11233, 144 | 0x11235, 0x11235, 145 | 0x112E0, 0x112E2, 146 | 0x11302, 0x11303, 147 | 0x1133E, 0x1133F, 148 | 0x11341, 0x11344, 149 | 0x11347, 0x11348, 150 | 0x1134B, 0x1134D, 151 | 0x11357, 0x11357, 152 | 0x11362, 0x11363, 153 | 0x11435, 0x11437, 154 | 0x11440, 0x11441, 155 | 0x11445, 0x11445, 156 | 0x114B0, 0x114B2, 157 | 0x114B9, 0x114B9, 158 | 0x114BB, 0x114BE, 159 | 0x114C1, 0x114C1, 160 | 0x115AF, 0x115B1, 161 | 0x115B8, 0x115BB, 162 | 0x115BE, 0x115BE, 163 | 0x11630, 0x11632, 164 | 0x1163B, 0x1163C, 165 | 0x1163E, 0x1163E, 166 | 0x116AC, 0x116AC, 167 | 0x116AE, 0x116AF, 168 | 0x116B6, 0x116B6, 169 | 0x11720, 0x11721, 170 | 0x11726, 0x11726, 171 | 0x1182C, 0x1182E, 172 | 0x11838, 0x11838, 173 | 0x11930, 0x11935, 174 | 0x11937, 0x11938, 175 | 0x1193D, 0x1193D, 176 | 0x11940, 0x11940, 177 | 0x11942, 0x11942, 178 | 0x119D1, 0x119D3, 179 | 0x119DC, 0x119DF, 180 | 0x119E4, 0x119E4, 181 | 0x11A39, 0x11A39, 182 | 0x11A57, 0x11A58, 183 | 0x11A97, 0x11A97, 184 | 0x11C2F, 0x11C2F, 185 | 0x11C3E, 0x11C3E, 186 | 0x11CA9, 0x11CA9, 187 | 0x11CB1, 0x11CB1, 188 | 0x11CB4, 0x11CB4, 189 | 0x11D8A, 0x11D8E, 190 | 0x11D93, 0x11D94, 191 | 0x11D96, 0x11D96, 192 | 0x11EF5, 0x11EF6, 193 | 0x11F03, 0x11F03, 194 | 0x11F34, 0x11F35, 195 | 0x11F3E, 0x11F3F, 196 | 0x11F41, 0x11F41, 197 | 0x16F51, 0x16F87, 198 | 0x16FF0, 0x16FF1, 199 | 0x1D165, 0x1D166, 200 | 0x1D16D, 0x1D172, 201 | }; 202 | 203 | static const ucg_rune ucg_nonspacing_mark_ranges[] = { 204 | 0x0300, 0x036F, 205 | 0x0483, 0x0487, 206 | 0x0591, 0x05BD, 207 | 0x05BF, 0x05BF, 208 | 0x05C1, 0x05C2, 209 | 0x05C4, 0x05C5, 210 | 0x05C7, 0x05C7, 211 | 0x0610, 0x061A, 212 | 0x064B, 0x065F, 213 | 0x0670, 0x0670, 214 | 0x06D6, 0x06DC, 215 | 0x06DF, 0x06E4, 216 | 0x06E7, 0x06E8, 217 | 0x06EA, 0x06ED, 218 | 0x0711, 0x0711, 219 | 0x0730, 0x074A, 220 | 0x07A6, 0x07B0, 221 | 0x07EB, 0x07F3, 222 | 0x07FD, 0x07FD, 223 | 0x0816, 0x0819, 224 | 0x081B, 0x0823, 225 | 0x0825, 0x0827, 226 | 0x0829, 0x082D, 227 | 0x0859, 0x085B, 228 | 0x0898, 0x089F, 229 | 0x08CA, 0x08E1, 230 | 0x08E3, 0x0902, 231 | 0x093A, 0x093A, 232 | 0x093C, 0x093C, 233 | 0x0941, 0x0948, 234 | 0x094D, 0x094D, 235 | 0x0951, 0x0957, 236 | 0x0962, 0x0963, 237 | 0x0981, 0x0981, 238 | 0x09BC, 0x09BC, 239 | 0x09C1, 0x09C4, 240 | 0x09CD, 0x09CD, 241 | 0x09E2, 0x09E3, 242 | 0x09FE, 0x09FE, 243 | 0x0A01, 0x0A02, 244 | 0x0A3C, 0x0A3C, 245 | 0x0A41, 0x0A42, 246 | 0x0A47, 0x0A48, 247 | 0x0A4B, 0x0A4D, 248 | 0x0A51, 0x0A51, 249 | 0x0A70, 0x0A71, 250 | 0x0A75, 0x0A75, 251 | 0x0A81, 0x0A82, 252 | 0x0ABC, 0x0ABC, 253 | 0x0AC1, 0x0AC5, 254 | 0x0AC7, 0x0AC8, 255 | 0x0ACD, 0x0ACD, 256 | 0x0AE2, 0x0AE3, 257 | 0x0AFA, 0x0AFF, 258 | 0x0B01, 0x0B01, 259 | 0x0B3C, 0x0B3C, 260 | 0x0B3F, 0x0B3F, 261 | 0x0B41, 0x0B44, 262 | 0x0B4D, 0x0B4D, 263 | 0x0B55, 0x0B56, 264 | 0x0B62, 0x0B63, 265 | 0x0B82, 0x0B82, 266 | 0x0BC0, 0x0BC0, 267 | 0x0BCD, 0x0BCD, 268 | 0x0C00, 0x0C00, 269 | 0x0C04, 0x0C04, 270 | 0x0C3C, 0x0C3C, 271 | 0x0C3E, 0x0C40, 272 | 0x0C46, 0x0C48, 273 | 0x0C4A, 0x0C4D, 274 | 0x0C55, 0x0C56, 275 | 0x0C62, 0x0C63, 276 | 0x0C81, 0x0C81, 277 | 0x0CBC, 0x0CBC, 278 | 0x0CBF, 0x0CBF, 279 | 0x0CC6, 0x0CC6, 280 | 0x0CCC, 0x0CCD, 281 | 0x0CE2, 0x0CE3, 282 | 0x0D00, 0x0D01, 283 | 0x0D3B, 0x0D3C, 284 | 0x0D41, 0x0D44, 285 | 0x0D4D, 0x0D4D, 286 | 0x0D62, 0x0D63, 287 | 0x0D81, 0x0D81, 288 | 0x0DCA, 0x0DCA, 289 | 0x0DD2, 0x0DD4, 290 | 0x0DD6, 0x0DD6, 291 | 0x0E31, 0x0E31, 292 | 0x0E34, 0x0E3A, 293 | 0x0E47, 0x0E4E, 294 | 0x0EB1, 0x0EB1, 295 | 0x0EB4, 0x0EBC, 296 | 0x0EC8, 0x0ECE, 297 | 0x0F18, 0x0F19, 298 | 0x0F35, 0x0F35, 299 | 0x0F37, 0x0F37, 300 | 0x0F39, 0x0F39, 301 | 0x0F71, 0x0F7E, 302 | 0x0F80, 0x0F84, 303 | 0x0F86, 0x0F87, 304 | 0x0F8D, 0x0F97, 305 | 0x0F99, 0x0FBC, 306 | 0x0FC6, 0x0FC6, 307 | 0x102D, 0x1030, 308 | 0x1032, 0x1037, 309 | 0x1039, 0x103A, 310 | 0x103D, 0x103E, 311 | 0x1058, 0x1059, 312 | 0x105E, 0x1060, 313 | 0x1071, 0x1074, 314 | 0x1082, 0x1082, 315 | 0x1085, 0x1086, 316 | 0x108D, 0x108D, 317 | 0x109D, 0x109D, 318 | 0x135D, 0x135F, 319 | 0x1712, 0x1714, 320 | 0x1732, 0x1733, 321 | 0x1752, 0x1753, 322 | 0x1772, 0x1773, 323 | 0x17B4, 0x17B5, 324 | 0x17B7, 0x17BD, 325 | 0x17C6, 0x17C6, 326 | 0x17C9, 0x17D3, 327 | 0x17DD, 0x17DD, 328 | 0x180B, 0x180D, 329 | 0x180F, 0x180F, 330 | 0x1885, 0x1886, 331 | 0x18A9, 0x18A9, 332 | 0x1920, 0x1922, 333 | 0x1927, 0x1928, 334 | 0x1932, 0x1932, 335 | 0x1939, 0x193B, 336 | 0x1A17, 0x1A18, 337 | 0x1A1B, 0x1A1B, 338 | 0x1A56, 0x1A56, 339 | 0x1A58, 0x1A5E, 340 | 0x1A60, 0x1A60, 341 | 0x1A62, 0x1A62, 342 | 0x1A65, 0x1A6C, 343 | 0x1A73, 0x1A7C, 344 | 0x1A7F, 0x1A7F, 345 | 0x1AB0, 0x1ABD, 346 | 0x1ABF, 0x1ACE, 347 | 0x1B00, 0x1B03, 348 | 0x1B34, 0x1B34, 349 | 0x1B36, 0x1B3A, 350 | 0x1B3C, 0x1B3C, 351 | 0x1B42, 0x1B42, 352 | 0x1B6B, 0x1B73, 353 | 0x1B80, 0x1B81, 354 | 0x1BA2, 0x1BA5, 355 | 0x1BA8, 0x1BA9, 356 | 0x1BAB, 0x1BAD, 357 | 0x1BE6, 0x1BE6, 358 | 0x1BE8, 0x1BE9, 359 | 0x1BED, 0x1BED, 360 | 0x1BEF, 0x1BF1, 361 | 0x1C2C, 0x1C33, 362 | 0x1C36, 0x1C37, 363 | 0x1CD0, 0x1CD2, 364 | 0x1CD4, 0x1CE0, 365 | 0x1CE2, 0x1CE8, 366 | 0x1CED, 0x1CED, 367 | 0x1CF4, 0x1CF4, 368 | 0x1CF8, 0x1CF9, 369 | 0x1DC0, 0x1DFF, 370 | 0x20D0, 0x20DC, 371 | 0x20E1, 0x20E1, 372 | 0x20E5, 0x20F0, 373 | 0x2CEF, 0x2CF1, 374 | 0x2D7F, 0x2D7F, 375 | 0x2DE0, 0x2DFF, 376 | 0x302A, 0x302D, 377 | 0x3099, 0x309A, 378 | 0xA66F, 0xA66F, 379 | 0xA674, 0xA67D, 380 | 0xA69E, 0xA69F, 381 | 0xA6F0, 0xA6F1, 382 | 0xA802, 0xA802, 383 | 0xA806, 0xA806, 384 | 0xA80B, 0xA80B, 385 | 0xA825, 0xA826, 386 | 0xA82C, 0xA82C, 387 | 0xA8C4, 0xA8C5, 388 | 0xA8E0, 0xA8F1, 389 | 0xA8FF, 0xA8FF, 390 | 0xA926, 0xA92D, 391 | 0xA947, 0xA951, 392 | 0xA980, 0xA982, 393 | 0xA9B3, 0xA9B3, 394 | 0xA9B6, 0xA9B9, 395 | 0xA9BC, 0xA9BD, 396 | 0xA9E5, 0xA9E5, 397 | 0xAA29, 0xAA2E, 398 | 0xAA31, 0xAA32, 399 | 0xAA35, 0xAA36, 400 | 0xAA43, 0xAA43, 401 | 0xAA4C, 0xAA4C, 402 | 0xAA7C, 0xAA7C, 403 | 0xAAB0, 0xAAB0, 404 | 0xAAB2, 0xAAB4, 405 | 0xAAB7, 0xAAB8, 406 | 0xAABE, 0xAABF, 407 | 0xAAC1, 0xAAC1, 408 | 0xAAEC, 0xAAED, 409 | 0xAAF6, 0xAAF6, 410 | 0xABE5, 0xABE5, 411 | 0xABE8, 0xABE8, 412 | 0xABED, 0xABED, 413 | 0xFB1E, 0xFB1E, 414 | 0xFE00, 0xFE0F, 415 | 0xFE20, 0xFE2F, 416 | 0x101FD, 0x101FD, 417 | 0x102E0, 0x102E0, 418 | 0x10376, 0x1037A, 419 | 0x10A01, 0x10A03, 420 | 0x10A05, 0x10A06, 421 | 0x10A0C, 0x10A0F, 422 | 0x10A38, 0x10A3A, 423 | 0x10A3F, 0x10A3F, 424 | 0x10AE5, 0x10AE6, 425 | 0x10D24, 0x10D27, 426 | 0x10EAB, 0x10EAC, 427 | 0x10EFD, 0x10EFF, 428 | 0x10F46, 0x10F50, 429 | 0x10F82, 0x10F85, 430 | 0x11001, 0x11001, 431 | 0x11038, 0x11046, 432 | 0x11070, 0x11070, 433 | 0x11073, 0x11074, 434 | 0x1107F, 0x11081, 435 | 0x110B3, 0x110B6, 436 | 0x110B9, 0x110BA, 437 | 0x110C2, 0x110C2, 438 | 0x11100, 0x11102, 439 | 0x11127, 0x1112B, 440 | 0x1112D, 0x11134, 441 | 0x11173, 0x11173, 442 | 0x11180, 0x11181, 443 | 0x111B6, 0x111BE, 444 | 0x111C9, 0x111CC, 445 | 0x111CF, 0x111CF, 446 | 0x1122F, 0x11231, 447 | 0x11234, 0x11234, 448 | 0x11236, 0x11237, 449 | 0x1123E, 0x1123E, 450 | 0x11241, 0x11241, 451 | 0x112DF, 0x112DF, 452 | 0x112E3, 0x112EA, 453 | 0x11300, 0x11301, 454 | 0x1133B, 0x1133C, 455 | 0x11340, 0x11340, 456 | 0x11366, 0x1136C, 457 | 0x11370, 0x11374, 458 | 0x11438, 0x1143F, 459 | 0x11442, 0x11444, 460 | 0x11446, 0x11446, 461 | 0x1145E, 0x1145E, 462 | 0x114B3, 0x114B8, 463 | 0x114BA, 0x114BA, 464 | 0x114BF, 0x114C0, 465 | 0x114C2, 0x114C3, 466 | 0x115B2, 0x115B5, 467 | 0x115BC, 0x115BD, 468 | 0x115BF, 0x115C0, 469 | 0x115DC, 0x115DD, 470 | 0x11633, 0x1163A, 471 | 0x1163D, 0x1163D, 472 | 0x1163F, 0x11640, 473 | 0x116AB, 0x116AB, 474 | 0x116AD, 0x116AD, 475 | 0x116B0, 0x116B5, 476 | 0x116B7, 0x116B7, 477 | 0x1171D, 0x1171F, 478 | 0x11722, 0x11725, 479 | 0x11727, 0x1172B, 480 | 0x1182F, 0x11837, 481 | 0x11839, 0x1183A, 482 | 0x1193B, 0x1193C, 483 | 0x1193E, 0x1193E, 484 | 0x11943, 0x11943, 485 | 0x119D4, 0x119D7, 486 | 0x119DA, 0x119DB, 487 | 0x119E0, 0x119E0, 488 | 0x11A01, 0x11A0A, 489 | 0x11A33, 0x11A38, 490 | 0x11A3B, 0x11A3E, 491 | 0x11A47, 0x11A47, 492 | 0x11A51, 0x11A56, 493 | 0x11A59, 0x11A5B, 494 | 0x11A8A, 0x11A96, 495 | 0x11A98, 0x11A99, 496 | 0x11C30, 0x11C36, 497 | 0x11C38, 0x11C3D, 498 | 0x11C3F, 0x11C3F, 499 | 0x11C92, 0x11CA7, 500 | 0x11CAA, 0x11CB0, 501 | 0x11CB2, 0x11CB3, 502 | 0x11CB5, 0x11CB6, 503 | 0x11D31, 0x11D36, 504 | 0x11D3A, 0x11D3A, 505 | 0x11D3C, 0x11D3D, 506 | 0x11D3F, 0x11D45, 507 | 0x11D47, 0x11D47, 508 | 0x11D90, 0x11D91, 509 | 0x11D95, 0x11D95, 510 | 0x11D97, 0x11D97, 511 | 0x11EF3, 0x11EF4, 512 | 0x11F00, 0x11F01, 513 | 0x11F36, 0x11F3A, 514 | 0x11F40, 0x11F40, 515 | 0x11F42, 0x11F42, 516 | 0x13440, 0x13440, 517 | 0x13447, 0x13455, 518 | 0x16AF0, 0x16AF4, 519 | 0x16B30, 0x16B36, 520 | 0x16F4F, 0x16F4F, 521 | 0x16F8F, 0x16F92, 522 | 0x16FE4, 0x16FE4, 523 | 0x1BC9D, 0x1BC9E, 524 | 0x1CF00, 0x1CF2D, 525 | 0x1CF30, 0x1CF46, 526 | 0x1D167, 0x1D169, 527 | 0x1D17B, 0x1D182, 528 | 0x1D185, 0x1D18B, 529 | 0x1D1AA, 0x1D1AD, 530 | 0x1D242, 0x1D244, 531 | 0x1DA00, 0x1DA36, 532 | 0x1DA3B, 0x1DA6C, 533 | 0x1DA75, 0x1DA75, 534 | 0x1DA84, 0x1DA84, 535 | 0x1DA9B, 0x1DA9F, 536 | 0x1DAA1, 0x1DAAF, 537 | 0x1E000, 0x1E006, 538 | 0x1E008, 0x1E018, 539 | 0x1E01B, 0x1E021, 540 | 0x1E023, 0x1E024, 541 | 0x1E026, 0x1E02A, 542 | 0x1E08F, 0x1E08F, 543 | 0x1E130, 0x1E136, 544 | 0x1E2AE, 0x1E2AE, 545 | 0x1E2EC, 0x1E2EF, 546 | 0x1E4EC, 0x1E4EF, 547 | 0x1E8D0, 0x1E8D6, 548 | 0x1E944, 0x1E94A, 549 | 0xE0100, 0xE01EF, 550 | }; 551 | 552 | static const ucg_rune ucg_emoji_extended_pictographic_ranges[] = { 553 | 0x00A9, 0x00A9, 554 | 0x00AE, 0x00AE, 555 | 0x203C, 0x203C, 556 | 0x2049, 0x2049, 557 | 0x2122, 0x2122, 558 | 0x2139, 0x2139, 559 | 0x2194, 0x2199, 560 | 0x21A9, 0x21AA, 561 | 0x231A, 0x231B, 562 | 0x2328, 0x2328, 563 | 0x2388, 0x2388, 564 | 0x23CF, 0x23CF, 565 | 0x23E9, 0x23EC, 566 | 0x23ED, 0x23EE, 567 | 0x23EF, 0x23EF, 568 | 0x23F0, 0x23F0, 569 | 0x23F1, 0x23F2, 570 | 0x23F3, 0x23F3, 571 | 0x23F8, 0x23FA, 572 | 0x24C2, 0x24C2, 573 | 0x25AA, 0x25AB, 574 | 0x25B6, 0x25B6, 575 | 0x25C0, 0x25C0, 576 | 0x25FB, 0x25FE, 577 | 0x2600, 0x2601, 578 | 0x2602, 0x2603, 579 | 0x2604, 0x2604, 580 | 0x2605, 0x2605, 581 | 0x2607, 0x260D, 582 | 0x260E, 0x260E, 583 | 0x260F, 0x2610, 584 | 0x2611, 0x2611, 585 | 0x2612, 0x2612, 586 | 0x2614, 0x2615, 587 | 0x2616, 0x2617, 588 | 0x2618, 0x2618, 589 | 0x2619, 0x261C, 590 | 0x261D, 0x261D, 591 | 0x261E, 0x261F, 592 | 0x2620, 0x2620, 593 | 0x2621, 0x2621, 594 | 0x2622, 0x2623, 595 | 0x2624, 0x2625, 596 | 0x2626, 0x2626, 597 | 0x2627, 0x2629, 598 | 0x262A, 0x262A, 599 | 0x262B, 0x262D, 600 | 0x262E, 0x262E, 601 | 0x262F, 0x262F, 602 | 0x2630, 0x2637, 603 | 0x2638, 0x2639, 604 | 0x263A, 0x263A, 605 | 0x263B, 0x263F, 606 | 0x2640, 0x2640, 607 | 0x2641, 0x2641, 608 | 0x2642, 0x2642, 609 | 0x2643, 0x2647, 610 | 0x2648, 0x2653, 611 | 0x2654, 0x265E, 612 | 0x265F, 0x265F, 613 | 0x2660, 0x2660, 614 | 0x2661, 0x2662, 615 | 0x2663, 0x2663, 616 | 0x2664, 0x2664, 617 | 0x2665, 0x2666, 618 | 0x2667, 0x2667, 619 | 0x2668, 0x2668, 620 | 0x2669, 0x267A, 621 | 0x267B, 0x267B, 622 | 0x267C, 0x267D, 623 | 0x267E, 0x267E, 624 | 0x267F, 0x267F, 625 | 0x2680, 0x2685, 626 | 0x2690, 0x2691, 627 | 0x2692, 0x2692, 628 | 0x2693, 0x2693, 629 | 0x2694, 0x2694, 630 | 0x2695, 0x2695, 631 | 0x2696, 0x2697, 632 | 0x2698, 0x2698, 633 | 0x2699, 0x2699, 634 | 0x269A, 0x269A, 635 | 0x269B, 0x269C, 636 | 0x269D, 0x269F, 637 | 0x26A0, 0x26A1, 638 | 0x26A2, 0x26A6, 639 | 0x26A7, 0x26A7, 640 | 0x26A8, 0x26A9, 641 | 0x26AA, 0x26AB, 642 | 0x26AC, 0x26AF, 643 | 0x26B0, 0x26B1, 644 | 0x26B2, 0x26BC, 645 | 0x26BD, 0x26BE, 646 | 0x26BF, 0x26C3, 647 | 0x26C4, 0x26C5, 648 | 0x26C6, 0x26C7, 649 | 0x26C8, 0x26C8, 650 | 0x26C9, 0x26CD, 651 | 0x26CE, 0x26CE, 652 | 0x26CF, 0x26CF, 653 | 0x26D0, 0x26D0, 654 | 0x26D1, 0x26D1, 655 | 0x26D2, 0x26D2, 656 | 0x26D3, 0x26D3, 657 | 0x26D4, 0x26D4, 658 | 0x26D5, 0x26E8, 659 | 0x26E9, 0x26E9, 660 | 0x26EA, 0x26EA, 661 | 0x26EB, 0x26EF, 662 | 0x26F0, 0x26F1, 663 | 0x26F2, 0x26F3, 664 | 0x26F4, 0x26F4, 665 | 0x26F5, 0x26F5, 666 | 0x26F6, 0x26F6, 667 | 0x26F7, 0x26F9, 668 | 0x26FA, 0x26FA, 669 | 0x26FB, 0x26FC, 670 | 0x26FD, 0x26FD, 671 | 0x26FE, 0x2701, 672 | 0x2702, 0x2702, 673 | 0x2703, 0x2704, 674 | 0x2705, 0x2705, 675 | 0x2708, 0x270C, 676 | 0x270D, 0x270D, 677 | 0x270E, 0x270E, 678 | 0x270F, 0x270F, 679 | 0x2710, 0x2711, 680 | 0x2712, 0x2712, 681 | 0x2714, 0x2714, 682 | 0x2716, 0x2716, 683 | 0x271D, 0x271D, 684 | 0x2721, 0x2721, 685 | 0x2728, 0x2728, 686 | 0x2733, 0x2734, 687 | 0x2744, 0x2744, 688 | 0x2747, 0x2747, 689 | 0x274C, 0x274C, 690 | 0x274E, 0x274E, 691 | 0x2753, 0x2755, 692 | 0x2757, 0x2757, 693 | 0x2763, 0x2763, 694 | 0x2764, 0x2764, 695 | 0x2765, 0x2767, 696 | 0x2795, 0x2797, 697 | 0x27A1, 0x27A1, 698 | 0x27B0, 0x27B0, 699 | 0x27BF, 0x27BF, 700 | 0x2934, 0x2935, 701 | 0x2B05, 0x2B07, 702 | 0x2B1B, 0x2B1C, 703 | 0x2B50, 0x2B50, 704 | 0x2B55, 0x2B55, 705 | 0x3030, 0x3030, 706 | 0x303D, 0x303D, 707 | 0x3297, 0x3297, 708 | 0x3299, 0x3299, 709 | 0x1F000, 0x1F003, 710 | 0x1F004, 0x1F004, 711 | 0x1F005, 0x1F0CE, 712 | 0x1F0CF, 0x1F0CF, 713 | 0x1F0D0, 0x1F0FF, 714 | 0x1F10D, 0x1F10F, 715 | 0x1F12F, 0x1F12F, 716 | 0x1F16C, 0x1F16F, 717 | 0x1F170, 0x1F171, 718 | 0x1F17E, 0x1F17F, 719 | 0x1F18E, 0x1F18E, 720 | 0x1F191, 0x1F19A, 721 | 0x1F1AD, 0x1F1E5, 722 | 0x1F201, 0x1F202, 723 | 0x1F203, 0x1F20F, 724 | 0x1F21A, 0x1F21A, 725 | 0x1F22F, 0x1F22F, 726 | 0x1F232, 0x1F23A, 727 | 0x1F23C, 0x1F23F, 728 | 0x1F249, 0x1F24F, 729 | 0x1F250, 0x1F251, 730 | 0x1F252, 0x1F2FF, 731 | 0x1F300, 0x1F30C, 732 | 0x1F30D, 0x1F30E, 733 | 0x1F30F, 0x1F30F, 734 | 0x1F310, 0x1F310, 735 | 0x1F311, 0x1F311, 736 | 0x1F312, 0x1F312, 737 | 0x1F313, 0x1F315, 738 | 0x1F316, 0x1F318, 739 | 0x1F319, 0x1F319, 740 | 0x1F31A, 0x1F31A, 741 | 0x1F31B, 0x1F31B, 742 | 0x1F31C, 0x1F31C, 743 | 0x1F31D, 0x1F31E, 744 | 0x1F31F, 0x1F320, 745 | 0x1F321, 0x1F321, 746 | 0x1F322, 0x1F323, 747 | 0x1F324, 0x1F32C, 748 | 0x1F32D, 0x1F32F, 749 | 0x1F330, 0x1F331, 750 | 0x1F332, 0x1F333, 751 | 0x1F334, 0x1F335, 752 | 0x1F336, 0x1F336, 753 | 0x1F337, 0x1F34A, 754 | 0x1F34B, 0x1F34B, 755 | 0x1F34C, 0x1F34F, 756 | 0x1F350, 0x1F350, 757 | 0x1F351, 0x1F37B, 758 | 0x1F37C, 0x1F37C, 759 | 0x1F37D, 0x1F37D, 760 | 0x1F37E, 0x1F37F, 761 | 0x1F380, 0x1F393, 762 | 0x1F394, 0x1F395, 763 | 0x1F396, 0x1F397, 764 | 0x1F398, 0x1F398, 765 | 0x1F399, 0x1F39B, 766 | 0x1F39C, 0x1F39D, 767 | 0x1F39E, 0x1F39F, 768 | 0x1F3A0, 0x1F3C4, 769 | 0x1F3C5, 0x1F3C5, 770 | 0x1F3C6, 0x1F3C6, 771 | 0x1F3C7, 0x1F3C7, 772 | 0x1F3C8, 0x1F3C8, 773 | 0x1F3C9, 0x1F3C9, 774 | 0x1F3CA, 0x1F3CA, 775 | 0x1F3CB, 0x1F3CE, 776 | 0x1F3CF, 0x1F3D3, 777 | 0x1F3D4, 0x1F3DF, 778 | 0x1F3E0, 0x1F3E3, 779 | 0x1F3E4, 0x1F3E4, 780 | 0x1F3E5, 0x1F3F0, 781 | 0x1F3F1, 0x1F3F2, 782 | 0x1F3F3, 0x1F3F3, 783 | 0x1F3F4, 0x1F3F4, 784 | 0x1F3F5, 0x1F3F5, 785 | 0x1F3F6, 0x1F3F6, 786 | 0x1F3F7, 0x1F3F7, 787 | 0x1F3F8, 0x1F3FA, 788 | 0x1F400, 0x1F407, 789 | 0x1F408, 0x1F408, 790 | 0x1F409, 0x1F40B, 791 | 0x1F40C, 0x1F40E, 792 | 0x1F40F, 0x1F410, 793 | 0x1F411, 0x1F412, 794 | 0x1F413, 0x1F413, 795 | 0x1F414, 0x1F414, 796 | 0x1F415, 0x1F415, 797 | 0x1F416, 0x1F416, 798 | 0x1F417, 0x1F429, 799 | 0x1F42A, 0x1F42A, 800 | 0x1F42B, 0x1F43E, 801 | 0x1F43F, 0x1F43F, 802 | 0x1F440, 0x1F440, 803 | 0x1F441, 0x1F441, 804 | 0x1F442, 0x1F464, 805 | 0x1F465, 0x1F465, 806 | 0x1F466, 0x1F46B, 807 | 0x1F46C, 0x1F46D, 808 | 0x1F46E, 0x1F4AC, 809 | 0x1F4AD, 0x1F4AD, 810 | 0x1F4AE, 0x1F4B5, 811 | 0x1F4B6, 0x1F4B7, 812 | 0x1F4B8, 0x1F4EB, 813 | 0x1F4EC, 0x1F4ED, 814 | 0x1F4EE, 0x1F4EE, 815 | 0x1F4EF, 0x1F4EF, 816 | 0x1F4F0, 0x1F4F4, 817 | 0x1F4F5, 0x1F4F5, 818 | 0x1F4F6, 0x1F4F7, 819 | 0x1F4F8, 0x1F4F8, 820 | 0x1F4F9, 0x1F4FC, 821 | 0x1F4FD, 0x1F4FD, 822 | 0x1F4FE, 0x1F4FE, 823 | 0x1F4FF, 0x1F502, 824 | 0x1F503, 0x1F503, 825 | 0x1F504, 0x1F507, 826 | 0x1F508, 0x1F508, 827 | 0x1F509, 0x1F509, 828 | 0x1F50A, 0x1F514, 829 | 0x1F515, 0x1F515, 830 | 0x1F516, 0x1F52B, 831 | 0x1F52C, 0x1F52D, 832 | 0x1F52E, 0x1F53D, 833 | 0x1F546, 0x1F548, 834 | 0x1F549, 0x1F54A, 835 | 0x1F54B, 0x1F54E, 836 | 0x1F54F, 0x1F54F, 837 | 0x1F550, 0x1F55B, 838 | 0x1F55C, 0x1F567, 839 | 0x1F568, 0x1F56E, 840 | 0x1F56F, 0x1F570, 841 | 0x1F571, 0x1F572, 842 | 0x1F573, 0x1F579, 843 | 0x1F57A, 0x1F57A, 844 | 0x1F57B, 0x1F586, 845 | 0x1F587, 0x1F587, 846 | 0x1F588, 0x1F589, 847 | 0x1F58A, 0x1F58D, 848 | 0x1F58E, 0x1F58F, 849 | 0x1F590, 0x1F590, 850 | 0x1F591, 0x1F594, 851 | 0x1F595, 0x1F596, 852 | 0x1F597, 0x1F5A3, 853 | 0x1F5A4, 0x1F5A4, 854 | 0x1F5A5, 0x1F5A5, 855 | 0x1F5A6, 0x1F5A7, 856 | 0x1F5A8, 0x1F5A8, 857 | 0x1F5A9, 0x1F5B0, 858 | 0x1F5B1, 0x1F5B2, 859 | 0x1F5B3, 0x1F5BB, 860 | 0x1F5BC, 0x1F5BC, 861 | 0x1F5BD, 0x1F5C1, 862 | 0x1F5C2, 0x1F5C4, 863 | 0x1F5C5, 0x1F5D0, 864 | 0x1F5D1, 0x1F5D3, 865 | 0x1F5D4, 0x1F5DB, 866 | 0x1F5DC, 0x1F5DE, 867 | 0x1F5DF, 0x1F5E0, 868 | 0x1F5E1, 0x1F5E1, 869 | 0x1F5E2, 0x1F5E2, 870 | 0x1F5E3, 0x1F5E3, 871 | 0x1F5E4, 0x1F5E7, 872 | 0x1F5E8, 0x1F5E8, 873 | 0x1F5E9, 0x1F5EE, 874 | 0x1F5EF, 0x1F5EF, 875 | 0x1F5F0, 0x1F5F2, 876 | 0x1F5F3, 0x1F5F3, 877 | 0x1F5F4, 0x1F5F9, 878 | 0x1F5FA, 0x1F5FA, 879 | 0x1F5FB, 0x1F5FF, 880 | 0x1F600, 0x1F600, 881 | 0x1F601, 0x1F606, 882 | 0x1F607, 0x1F608, 883 | 0x1F609, 0x1F60D, 884 | 0x1F60E, 0x1F60E, 885 | 0x1F60F, 0x1F60F, 886 | 0x1F610, 0x1F610, 887 | 0x1F611, 0x1F611, 888 | 0x1F612, 0x1F614, 889 | 0x1F615, 0x1F615, 890 | 0x1F616, 0x1F616, 891 | 0x1F617, 0x1F617, 892 | 0x1F618, 0x1F618, 893 | 0x1F619, 0x1F619, 894 | 0x1F61A, 0x1F61A, 895 | 0x1F61B, 0x1F61B, 896 | 0x1F61C, 0x1F61E, 897 | 0x1F61F, 0x1F61F, 898 | 0x1F620, 0x1F625, 899 | 0x1F626, 0x1F627, 900 | 0x1F628, 0x1F62B, 901 | 0x1F62C, 0x1F62C, 902 | 0x1F62D, 0x1F62D, 903 | 0x1F62E, 0x1F62F, 904 | 0x1F630, 0x1F633, 905 | 0x1F634, 0x1F634, 906 | 0x1F635, 0x1F635, 907 | 0x1F636, 0x1F636, 908 | 0x1F637, 0x1F640, 909 | 0x1F641, 0x1F644, 910 | 0x1F645, 0x1F64F, 911 | 0x1F680, 0x1F680, 912 | 0x1F681, 0x1F682, 913 | 0x1F683, 0x1F685, 914 | 0x1F686, 0x1F686, 915 | 0x1F687, 0x1F687, 916 | 0x1F688, 0x1F688, 917 | 0x1F689, 0x1F689, 918 | 0x1F68A, 0x1F68B, 919 | 0x1F68C, 0x1F68C, 920 | 0x1F68D, 0x1F68D, 921 | 0x1F68E, 0x1F68E, 922 | 0x1F68F, 0x1F68F, 923 | 0x1F690, 0x1F690, 924 | 0x1F691, 0x1F693, 925 | 0x1F694, 0x1F694, 926 | 0x1F695, 0x1F695, 927 | 0x1F696, 0x1F696, 928 | 0x1F697, 0x1F697, 929 | 0x1F698, 0x1F698, 930 | 0x1F699, 0x1F69A, 931 | 0x1F69B, 0x1F6A1, 932 | 0x1F6A2, 0x1F6A2, 933 | 0x1F6A3, 0x1F6A3, 934 | 0x1F6A4, 0x1F6A5, 935 | 0x1F6A6, 0x1F6A6, 936 | 0x1F6A7, 0x1F6AD, 937 | 0x1F6AE, 0x1F6B1, 938 | 0x1F6B2, 0x1F6B2, 939 | 0x1F6B3, 0x1F6B5, 940 | 0x1F6B6, 0x1F6B6, 941 | 0x1F6B7, 0x1F6B8, 942 | 0x1F6B9, 0x1F6BE, 943 | 0x1F6BF, 0x1F6BF, 944 | 0x1F6C0, 0x1F6C0, 945 | 0x1F6C1, 0x1F6C5, 946 | 0x1F6C6, 0x1F6CA, 947 | 0x1F6CB, 0x1F6CB, 948 | 0x1F6CC, 0x1F6CC, 949 | 0x1F6CD, 0x1F6CF, 950 | 0x1F6D0, 0x1F6D0, 951 | 0x1F6D1, 0x1F6D2, 952 | 0x1F6D3, 0x1F6D4, 953 | 0x1F6D5, 0x1F6D5, 954 | 0x1F6D6, 0x1F6D7, 955 | 0x1F6D8, 0x1F6DB, 956 | 0x1F6DC, 0x1F6DC, 957 | 0x1F6DD, 0x1F6DF, 958 | 0x1F6E0, 0x1F6E5, 959 | 0x1F6E6, 0x1F6E8, 960 | 0x1F6E9, 0x1F6E9, 961 | 0x1F6EA, 0x1F6EA, 962 | 0x1F6EB, 0x1F6EC, 963 | 0x1F6ED, 0x1F6EF, 964 | 0x1F6F0, 0x1F6F0, 965 | 0x1F6F1, 0x1F6F2, 966 | 0x1F6F3, 0x1F6F3, 967 | 0x1F6F4, 0x1F6F6, 968 | 0x1F6F7, 0x1F6F8, 969 | 0x1F6F9, 0x1F6F9, 970 | 0x1F6FA, 0x1F6FA, 971 | 0x1F6FB, 0x1F6FC, 972 | 0x1F6FD, 0x1F6FF, 973 | 0x1F774, 0x1F77F, 974 | 0x1F7D5, 0x1F7DF, 975 | 0x1F7E0, 0x1F7EB, 976 | 0x1F7EC, 0x1F7EF, 977 | 0x1F7F0, 0x1F7F0, 978 | 0x1F7F1, 0x1F7FF, 979 | 0x1F80C, 0x1F80F, 980 | 0x1F848, 0x1F84F, 981 | 0x1F85A, 0x1F85F, 982 | 0x1F888, 0x1F88F, 983 | 0x1F8AE, 0x1F8FF, 984 | 0x1F90C, 0x1F90C, 985 | 0x1F90D, 0x1F90F, 986 | 0x1F910, 0x1F918, 987 | 0x1F919, 0x1F91E, 988 | 0x1F91F, 0x1F91F, 989 | 0x1F920, 0x1F927, 990 | 0x1F928, 0x1F92F, 991 | 0x1F930, 0x1F930, 992 | 0x1F931, 0x1F932, 993 | 0x1F933, 0x1F93A, 994 | 0x1F93C, 0x1F93E, 995 | 0x1F93F, 0x1F93F, 996 | 0x1F940, 0x1F945, 997 | 0x1F947, 0x1F94B, 998 | 0x1F94C, 0x1F94C, 999 | 0x1F94D, 0x1F94F, 1000 | 0x1F950, 0x1F95E, 1001 | 0x1F95F, 0x1F96B, 1002 | 0x1F96C, 0x1F970, 1003 | 0x1F971, 0x1F971, 1004 | 0x1F972, 0x1F972, 1005 | 0x1F973, 0x1F976, 1006 | 0x1F977, 0x1F978, 1007 | 0x1F979, 0x1F979, 1008 | 0x1F97A, 0x1F97A, 1009 | 0x1F97B, 0x1F97B, 1010 | 0x1F97C, 0x1F97F, 1011 | 0x1F980, 0x1F984, 1012 | 0x1F985, 0x1F991, 1013 | 0x1F992, 0x1F997, 1014 | 0x1F998, 0x1F9A2, 1015 | 0x1F9A3, 0x1F9A4, 1016 | 0x1F9A5, 0x1F9AA, 1017 | 0x1F9AB, 0x1F9AD, 1018 | 0x1F9AE, 0x1F9AF, 1019 | 0x1F9B0, 0x1F9B9, 1020 | 0x1F9BA, 0x1F9BF, 1021 | 0x1F9C0, 0x1F9C0, 1022 | 0x1F9C1, 0x1F9C2, 1023 | 0x1F9C3, 0x1F9CA, 1024 | 0x1F9CB, 0x1F9CB, 1025 | 0x1F9CC, 0x1F9CC, 1026 | 0x1F9CD, 0x1F9CF, 1027 | 0x1F9D0, 0x1F9E6, 1028 | 0x1F9E7, 0x1F9FF, 1029 | 0x1FA00, 0x1FA6F, 1030 | 0x1FA70, 0x1FA73, 1031 | 0x1FA74, 0x1FA74, 1032 | 0x1FA75, 0x1FA77, 1033 | 0x1FA78, 0x1FA7A, 1034 | 0x1FA7B, 0x1FA7C, 1035 | 0x1FA7D, 0x1FA7F, 1036 | 0x1FA80, 0x1FA82, 1037 | 0x1FA83, 0x1FA86, 1038 | 0x1FA87, 0x1FA88, 1039 | 0x1FA89, 0x1FA8F, 1040 | 0x1FA90, 0x1FA95, 1041 | 0x1FA96, 0x1FAA8, 1042 | 0x1FAA9, 0x1FAAC, 1043 | 0x1FAAD, 0x1FAAF, 1044 | 0x1FAB0, 0x1FAB6, 1045 | 0x1FAB7, 0x1FABA, 1046 | 0x1FABB, 0x1FABD, 1047 | 0x1FABE, 0x1FABE, 1048 | 0x1FABF, 0x1FABF, 1049 | 0x1FAC0, 0x1FAC2, 1050 | 0x1FAC3, 0x1FAC5, 1051 | 0x1FAC6, 0x1FACD, 1052 | 0x1FACE, 0x1FACF, 1053 | 0x1FAD0, 0x1FAD6, 1054 | 0x1FAD7, 0x1FAD9, 1055 | 0x1FADA, 0x1FADB, 1056 | 0x1FADC, 0x1FADF, 1057 | 0x1FAE0, 0x1FAE7, 1058 | 0x1FAE8, 0x1FAE8, 1059 | 0x1FAE9, 0x1FAEF, 1060 | 0x1FAF0, 0x1FAF6, 1061 | 0x1FAF7, 0x1FAF8, 1062 | 0x1FAF9, 0x1FAFF, 1063 | 0x1FC00, 0x1FFFD, 1064 | }; 1065 | 1066 | static const ucg_rune ucg_grapheme_extend_ranges[] = { 1067 | 0x0300, 0x036F, 1068 | 0x0483, 0x0487, 1069 | 0x0488, 0x0489, 1070 | 0x0591, 0x05BD, 1071 | 0x05BF, 0x05BF, 1072 | 0x05C1, 0x05C2, 1073 | 0x05C4, 0x05C5, 1074 | 0x05C7, 0x05C7, 1075 | 0x0610, 0x061A, 1076 | 0x064B, 0x065F, 1077 | 0x0670, 0x0670, 1078 | 0x06D6, 0x06DC, 1079 | 0x06DF, 0x06E4, 1080 | 0x06E7, 0x06E8, 1081 | 0x06EA, 0x06ED, 1082 | 0x0711, 0x0711, 1083 | 0x0730, 0x074A, 1084 | 0x07A6, 0x07B0, 1085 | 0x07EB, 0x07F3, 1086 | 0x07FD, 0x07FD, 1087 | 0x0816, 0x0819, 1088 | 0x081B, 0x0823, 1089 | 0x0825, 0x0827, 1090 | 0x0829, 0x082D, 1091 | 0x0859, 0x085B, 1092 | 0x0898, 0x089F, 1093 | 0x08CA, 0x08E1, 1094 | 0x08E3, 0x0902, 1095 | 0x093A, 0x093A, 1096 | 0x093C, 0x093C, 1097 | 0x0941, 0x0948, 1098 | 0x094D, 0x094D, 1099 | 0x0951, 0x0957, 1100 | 0x0962, 0x0963, 1101 | 0x0981, 0x0981, 1102 | 0x09BC, 0x09BC, 1103 | 0x09BE, 0x09BE, 1104 | 0x09C1, 0x09C4, 1105 | 0x09CD, 0x09CD, 1106 | 0x09D7, 0x09D7, 1107 | 0x09E2, 0x09E3, 1108 | 0x09FE, 0x09FE, 1109 | 0x0A01, 0x0A02, 1110 | 0x0A3C, 0x0A3C, 1111 | 0x0A41, 0x0A42, 1112 | 0x0A47, 0x0A48, 1113 | 0x0A4B, 0x0A4D, 1114 | 0x0A51, 0x0A51, 1115 | 0x0A70, 0x0A71, 1116 | 0x0A75, 0x0A75, 1117 | 0x0A81, 0x0A82, 1118 | 0x0ABC, 0x0ABC, 1119 | 0x0AC1, 0x0AC5, 1120 | 0x0AC7, 0x0AC8, 1121 | 0x0ACD, 0x0ACD, 1122 | 0x0AE2, 0x0AE3, 1123 | 0x0AFA, 0x0AFF, 1124 | 0x0B01, 0x0B01, 1125 | 0x0B3C, 0x0B3C, 1126 | 0x0B3E, 0x0B3E, 1127 | 0x0B3F, 0x0B3F, 1128 | 0x0B41, 0x0B44, 1129 | 0x0B4D, 0x0B4D, 1130 | 0x0B55, 0x0B56, 1131 | 0x0B57, 0x0B57, 1132 | 0x0B62, 0x0B63, 1133 | 0x0B82, 0x0B82, 1134 | 0x0BBE, 0x0BBE, 1135 | 0x0BC0, 0x0BC0, 1136 | 0x0BCD, 0x0BCD, 1137 | 0x0BD7, 0x0BD7, 1138 | 0x0C00, 0x0C00, 1139 | 0x0C04, 0x0C04, 1140 | 0x0C3C, 0x0C3C, 1141 | 0x0C3E, 0x0C40, 1142 | 0x0C46, 0x0C48, 1143 | 0x0C4A, 0x0C4D, 1144 | 0x0C55, 0x0C56, 1145 | 0x0C62, 0x0C63, 1146 | 0x0C81, 0x0C81, 1147 | 0x0CBC, 0x0CBC, 1148 | 0x0CBF, 0x0CBF, 1149 | 0x0CC2, 0x0CC2, 1150 | 0x0CC6, 0x0CC6, 1151 | 0x0CCC, 0x0CCD, 1152 | 0x0CD5, 0x0CD6, 1153 | 0x0CE2, 0x0CE3, 1154 | 0x0D00, 0x0D01, 1155 | 0x0D3B, 0x0D3C, 1156 | 0x0D3E, 0x0D3E, 1157 | 0x0D41, 0x0D44, 1158 | 0x0D4D, 0x0D4D, 1159 | 0x0D57, 0x0D57, 1160 | 0x0D62, 0x0D63, 1161 | 0x0D81, 0x0D81, 1162 | 0x0DCA, 0x0DCA, 1163 | 0x0DCF, 0x0DCF, 1164 | 0x0DD2, 0x0DD4, 1165 | 0x0DD6, 0x0DD6, 1166 | 0x0DDF, 0x0DDF, 1167 | 0x0E31, 0x0E31, 1168 | 0x0E34, 0x0E3A, 1169 | 0x0E47, 0x0E4E, 1170 | 0x0EB1, 0x0EB1, 1171 | 0x0EB4, 0x0EBC, 1172 | 0x0EC8, 0x0ECE, 1173 | 0x0F18, 0x0F19, 1174 | 0x0F35, 0x0F35, 1175 | 0x0F37, 0x0F37, 1176 | 0x0F39, 0x0F39, 1177 | 0x0F71, 0x0F7E, 1178 | 0x0F80, 0x0F84, 1179 | 0x0F86, 0x0F87, 1180 | 0x0F8D, 0x0F97, 1181 | 0x0F99, 0x0FBC, 1182 | 0x0FC6, 0x0FC6, 1183 | 0x102D, 0x1030, 1184 | 0x1032, 0x1037, 1185 | 0x1039, 0x103A, 1186 | 0x103D, 0x103E, 1187 | 0x1058, 0x1059, 1188 | 0x105E, 0x1060, 1189 | 0x1071, 0x1074, 1190 | 0x1082, 0x1082, 1191 | 0x1085, 0x1086, 1192 | 0x108D, 0x108D, 1193 | 0x109D, 0x109D, 1194 | 0x135D, 0x135F, 1195 | 0x1712, 0x1714, 1196 | 0x1732, 0x1733, 1197 | 0x1752, 0x1753, 1198 | 0x1772, 0x1773, 1199 | 0x17B4, 0x17B5, 1200 | 0x17B7, 0x17BD, 1201 | 0x17C6, 0x17C6, 1202 | 0x17C9, 0x17D3, 1203 | 0x17DD, 0x17DD, 1204 | 0x180B, 0x180D, 1205 | 0x180F, 0x180F, 1206 | 0x1885, 0x1886, 1207 | 0x18A9, 0x18A9, 1208 | 0x1920, 0x1922, 1209 | 0x1927, 0x1928, 1210 | 0x1932, 0x1932, 1211 | 0x1939, 0x193B, 1212 | 0x1A17, 0x1A18, 1213 | 0x1A1B, 0x1A1B, 1214 | 0x1A56, 0x1A56, 1215 | 0x1A58, 0x1A5E, 1216 | 0x1A60, 0x1A60, 1217 | 0x1A62, 0x1A62, 1218 | 0x1A65, 0x1A6C, 1219 | 0x1A73, 0x1A7C, 1220 | 0x1A7F, 0x1A7F, 1221 | 0x1AB0, 0x1ABD, 1222 | 0x1ABE, 0x1ABE, 1223 | 0x1ABF, 0x1ACE, 1224 | 0x1B00, 0x1B03, 1225 | 0x1B34, 0x1B34, 1226 | 0x1B35, 0x1B35, 1227 | 0x1B36, 0x1B3A, 1228 | 0x1B3C, 0x1B3C, 1229 | 0x1B42, 0x1B42, 1230 | 0x1B6B, 0x1B73, 1231 | 0x1B80, 0x1B81, 1232 | 0x1BA2, 0x1BA5, 1233 | 0x1BA8, 0x1BA9, 1234 | 0x1BAB, 0x1BAD, 1235 | 0x1BE6, 0x1BE6, 1236 | 0x1BE8, 0x1BE9, 1237 | 0x1BED, 0x1BED, 1238 | 0x1BEF, 0x1BF1, 1239 | 0x1C2C, 0x1C33, 1240 | 0x1C36, 0x1C37, 1241 | 0x1CD0, 0x1CD2, 1242 | 0x1CD4, 0x1CE0, 1243 | 0x1CE2, 0x1CE8, 1244 | 0x1CED, 0x1CED, 1245 | 0x1CF4, 0x1CF4, 1246 | 0x1CF8, 0x1CF9, 1247 | 0x1DC0, 0x1DFF, 1248 | 0x200C, 0x200C, 1249 | 0x20D0, 0x20DC, 1250 | 0x20DD, 0x20E0, 1251 | 0x20E1, 0x20E1, 1252 | 0x20E2, 0x20E4, 1253 | 0x20E5, 0x20F0, 1254 | 0x2CEF, 0x2CF1, 1255 | 0x2D7F, 0x2D7F, 1256 | 0x2DE0, 0x2DFF, 1257 | 0x302A, 0x302D, 1258 | 0x302E, 0x302F, 1259 | 0x3099, 0x309A, 1260 | 0xA66F, 0xA66F, 1261 | 0xA670, 0xA672, 1262 | 0xA674, 0xA67D, 1263 | 0xA69E, 0xA69F, 1264 | 0xA6F0, 0xA6F1, 1265 | 0xA802, 0xA802, 1266 | 0xA806, 0xA806, 1267 | 0xA80B, 0xA80B, 1268 | 0xA825, 0xA826, 1269 | 0xA82C, 0xA82C, 1270 | 0xA8C4, 0xA8C5, 1271 | 0xA8E0, 0xA8F1, 1272 | 0xA8FF, 0xA8FF, 1273 | 0xA926, 0xA92D, 1274 | 0xA947, 0xA951, 1275 | 0xA980, 0xA982, 1276 | 0xA9B3, 0xA9B3, 1277 | 0xA9B6, 0xA9B9, 1278 | 0xA9BC, 0xA9BD, 1279 | 0xA9E5, 0xA9E5, 1280 | 0xAA29, 0xAA2E, 1281 | 0xAA31, 0xAA32, 1282 | 0xAA35, 0xAA36, 1283 | 0xAA43, 0xAA43, 1284 | 0xAA4C, 0xAA4C, 1285 | 0xAA7C, 0xAA7C, 1286 | 0xAAB0, 0xAAB0, 1287 | 0xAAB2, 0xAAB4, 1288 | 0xAAB7, 0xAAB8, 1289 | 0xAABE, 0xAABF, 1290 | 0xAAC1, 0xAAC1, 1291 | 0xAAEC, 0xAAED, 1292 | 0xAAF6, 0xAAF6, 1293 | 0xABE5, 0xABE5, 1294 | 0xABE8, 0xABE8, 1295 | 0xABED, 0xABED, 1296 | 0xFB1E, 0xFB1E, 1297 | 0xFE00, 0xFE0F, 1298 | 0xFE20, 0xFE2F, 1299 | 0xFF9E, 0xFF9F, 1300 | 0x101FD, 0x101FD, 1301 | 0x102E0, 0x102E0, 1302 | 0x10376, 0x1037A, 1303 | 0x10A01, 0x10A03, 1304 | 0x10A05, 0x10A06, 1305 | 0x10A0C, 0x10A0F, 1306 | 0x10A38, 0x10A3A, 1307 | 0x10A3F, 0x10A3F, 1308 | 0x10AE5, 0x10AE6, 1309 | 0x10D24, 0x10D27, 1310 | 0x10EAB, 0x10EAC, 1311 | 0x10EFD, 0x10EFF, 1312 | 0x10F46, 0x10F50, 1313 | 0x10F82, 0x10F85, 1314 | 0x11001, 0x11001, 1315 | 0x11038, 0x11046, 1316 | 0x11070, 0x11070, 1317 | 0x11073, 0x11074, 1318 | 0x1107F, 0x11081, 1319 | 0x110B3, 0x110B6, 1320 | 0x110B9, 0x110BA, 1321 | 0x110C2, 0x110C2, 1322 | 0x11100, 0x11102, 1323 | 0x11127, 0x1112B, 1324 | 0x1112D, 0x11134, 1325 | 0x11173, 0x11173, 1326 | 0x11180, 0x11181, 1327 | 0x111B6, 0x111BE, 1328 | 0x111C9, 0x111CC, 1329 | 0x111CF, 0x111CF, 1330 | 0x1122F, 0x11231, 1331 | 0x11234, 0x11234, 1332 | 0x11236, 0x11237, 1333 | 0x1123E, 0x1123E, 1334 | 0x11241, 0x11241, 1335 | 0x112DF, 0x112DF, 1336 | 0x112E3, 0x112EA, 1337 | 0x11300, 0x11301, 1338 | 0x1133B, 0x1133C, 1339 | 0x1133E, 0x1133E, 1340 | 0x11340, 0x11340, 1341 | 0x11357, 0x11357, 1342 | 0x11366, 0x1136C, 1343 | 0x11370, 0x11374, 1344 | 0x11438, 0x1143F, 1345 | 0x11442, 0x11444, 1346 | 0x11446, 0x11446, 1347 | 0x1145E, 0x1145E, 1348 | 0x114B0, 0x114B0, 1349 | 0x114B3, 0x114B8, 1350 | 0x114BA, 0x114BA, 1351 | 0x114BD, 0x114BD, 1352 | 0x114BF, 0x114C0, 1353 | 0x114C2, 0x114C3, 1354 | 0x115AF, 0x115AF, 1355 | 0x115B2, 0x115B5, 1356 | 0x115BC, 0x115BD, 1357 | 0x115BF, 0x115C0, 1358 | 0x115DC, 0x115DD, 1359 | 0x11633, 0x1163A, 1360 | 0x1163D, 0x1163D, 1361 | 0x1163F, 0x11640, 1362 | 0x116AB, 0x116AB, 1363 | 0x116AD, 0x116AD, 1364 | 0x116B0, 0x116B5, 1365 | 0x116B7, 0x116B7, 1366 | 0x1171D, 0x1171F, 1367 | 0x11722, 0x11725, 1368 | 0x11727, 0x1172B, 1369 | 0x1182F, 0x11837, 1370 | 0x11839, 0x1183A, 1371 | 0x11930, 0x11930, 1372 | 0x1193B, 0x1193C, 1373 | 0x1193E, 0x1193E, 1374 | 0x11943, 0x11943, 1375 | 0x119D4, 0x119D7, 1376 | 0x119DA, 0x119DB, 1377 | 0x119E0, 0x119E0, 1378 | 0x11A01, 0x11A0A, 1379 | 0x11A33, 0x11A38, 1380 | 0x11A3B, 0x11A3E, 1381 | 0x11A47, 0x11A47, 1382 | 0x11A51, 0x11A56, 1383 | 0x11A59, 0x11A5B, 1384 | 0x11A8A, 0x11A96, 1385 | 0x11A98, 0x11A99, 1386 | 0x11C30, 0x11C36, 1387 | 0x11C38, 0x11C3D, 1388 | 0x11C3F, 0x11C3F, 1389 | 0x11C92, 0x11CA7, 1390 | 0x11CAA, 0x11CB0, 1391 | 0x11CB2, 0x11CB3, 1392 | 0x11CB5, 0x11CB6, 1393 | 0x11D31, 0x11D36, 1394 | 0x11D3A, 0x11D3A, 1395 | 0x11D3C, 0x11D3D, 1396 | 0x11D3F, 0x11D45, 1397 | 0x11D47, 0x11D47, 1398 | 0x11D90, 0x11D91, 1399 | 0x11D95, 0x11D95, 1400 | 0x11D97, 0x11D97, 1401 | 0x11EF3, 0x11EF4, 1402 | 0x11F00, 0x11F01, 1403 | 0x11F36, 0x11F3A, 1404 | 0x11F40, 0x11F40, 1405 | 0x11F42, 0x11F42, 1406 | 0x13440, 0x13440, 1407 | 0x13447, 0x13455, 1408 | 0x16AF0, 0x16AF4, 1409 | 0x16B30, 0x16B36, 1410 | 0x16F4F, 0x16F4F, 1411 | 0x16F8F, 0x16F92, 1412 | 0x16FE4, 0x16FE4, 1413 | 0x1BC9D, 0x1BC9E, 1414 | 0x1CF00, 0x1CF2D, 1415 | 0x1CF30, 0x1CF46, 1416 | 0x1D165, 0x1D165, 1417 | 0x1D167, 0x1D169, 1418 | 0x1D16E, 0x1D172, 1419 | 0x1D17B, 0x1D182, 1420 | 0x1D185, 0x1D18B, 1421 | 0x1D1AA, 0x1D1AD, 1422 | 0x1D242, 0x1D244, 1423 | 0x1DA00, 0x1DA36, 1424 | 0x1DA3B, 0x1DA6C, 1425 | 0x1DA75, 0x1DA75, 1426 | 0x1DA84, 0x1DA84, 1427 | 0x1DA9B, 0x1DA9F, 1428 | 0x1DAA1, 0x1DAAF, 1429 | 0x1E000, 0x1E006, 1430 | 0x1E008, 0x1E018, 1431 | 0x1E01B, 0x1E021, 1432 | 0x1E023, 0x1E024, 1433 | 0x1E026, 0x1E02A, 1434 | 0x1E08F, 0x1E08F, 1435 | 0x1E130, 0x1E136, 1436 | 0x1E2AE, 0x1E2AE, 1437 | 0x1E2EC, 0x1E2EF, 1438 | 0x1E4EC, 0x1E4EF, 1439 | 0x1E8D0, 0x1E8D6, 1440 | 0x1E944, 0x1E94A, 1441 | 0xE0020, 0xE007F, 1442 | 0xE0100, 0xE01EF, 1443 | }; 1444 | 1445 | static const ucg_rune ucg_hangul_syllable_lv_singlets[] = { 1446 | 0xAC00, 1447 | 0xAC1C, 1448 | 0xAC38, 1449 | 0xAC54, 1450 | 0xAC70, 1451 | 0xAC8C, 1452 | 0xACA8, 1453 | 0xACC4, 1454 | 0xACE0, 1455 | 0xACFC, 1456 | 0xAD18, 1457 | 0xAD34, 1458 | 0xAD50, 1459 | 0xAD6C, 1460 | 0xAD88, 1461 | 0xADA4, 1462 | 0xADC0, 1463 | 0xADDC, 1464 | 0xADF8, 1465 | 0xAE14, 1466 | 0xAE30, 1467 | 0xAE4C, 1468 | 0xAE68, 1469 | 0xAE84, 1470 | 0xAEA0, 1471 | 0xAEBC, 1472 | 0xAED8, 1473 | 0xAEF4, 1474 | 0xAF10, 1475 | 0xAF2C, 1476 | 0xAF48, 1477 | 0xAF64, 1478 | 0xAF80, 1479 | 0xAF9C, 1480 | 0xAFB8, 1481 | 0xAFD4, 1482 | 0xAFF0, 1483 | 0xB00C, 1484 | 0xB028, 1485 | 0xB044, 1486 | 0xB060, 1487 | 0xB07C, 1488 | 0xB098, 1489 | 0xB0B4, 1490 | 0xB0D0, 1491 | 0xB0EC, 1492 | 0xB108, 1493 | 0xB124, 1494 | 0xB140, 1495 | 0xB15C, 1496 | 0xB178, 1497 | 0xB194, 1498 | 0xB1B0, 1499 | 0xB1CC, 1500 | 0xB1E8, 1501 | 0xB204, 1502 | 0xB220, 1503 | 0xB23C, 1504 | 0xB258, 1505 | 0xB274, 1506 | 0xB290, 1507 | 0xB2AC, 1508 | 0xB2C8, 1509 | 0xB2E4, 1510 | 0xB300, 1511 | 0xB31C, 1512 | 0xB338, 1513 | 0xB354, 1514 | 0xB370, 1515 | 0xB38C, 1516 | 0xB3A8, 1517 | 0xB3C4, 1518 | 0xB3E0, 1519 | 0xB3FC, 1520 | 0xB418, 1521 | 0xB434, 1522 | 0xB450, 1523 | 0xB46C, 1524 | 0xB488, 1525 | 0xB4A4, 1526 | 0xB4C0, 1527 | 0xB4DC, 1528 | 0xB4F8, 1529 | 0xB514, 1530 | 0xB530, 1531 | 0xB54C, 1532 | 0xB568, 1533 | 0xB584, 1534 | 0xB5A0, 1535 | 0xB5BC, 1536 | 0xB5D8, 1537 | 0xB5F4, 1538 | 0xB610, 1539 | 0xB62C, 1540 | 0xB648, 1541 | 0xB664, 1542 | 0xB680, 1543 | 0xB69C, 1544 | 0xB6B8, 1545 | 0xB6D4, 1546 | 0xB6F0, 1547 | 0xB70C, 1548 | 0xB728, 1549 | 0xB744, 1550 | 0xB760, 1551 | 0xB77C, 1552 | 0xB798, 1553 | 0xB7B4, 1554 | 0xB7D0, 1555 | 0xB7EC, 1556 | 0xB808, 1557 | 0xB824, 1558 | 0xB840, 1559 | 0xB85C, 1560 | 0xB878, 1561 | 0xB894, 1562 | 0xB8B0, 1563 | 0xB8CC, 1564 | 0xB8E8, 1565 | 0xB904, 1566 | 0xB920, 1567 | 0xB93C, 1568 | 0xB958, 1569 | 0xB974, 1570 | 0xB990, 1571 | 0xB9AC, 1572 | 0xB9C8, 1573 | 0xB9E4, 1574 | 0xBA00, 1575 | 0xBA1C, 1576 | 0xBA38, 1577 | 0xBA54, 1578 | 0xBA70, 1579 | 0xBA8C, 1580 | 0xBAA8, 1581 | 0xBAC4, 1582 | 0xBAE0, 1583 | 0xBAFC, 1584 | 0xBB18, 1585 | 0xBB34, 1586 | 0xBB50, 1587 | 0xBB6C, 1588 | 0xBB88, 1589 | 0xBBA4, 1590 | 0xBBC0, 1591 | 0xBBDC, 1592 | 0xBBF8, 1593 | 0xBC14, 1594 | 0xBC30, 1595 | 0xBC4C, 1596 | 0xBC68, 1597 | 0xBC84, 1598 | 0xBCA0, 1599 | 0xBCBC, 1600 | 0xBCD8, 1601 | 0xBCF4, 1602 | 0xBD10, 1603 | 0xBD2C, 1604 | 0xBD48, 1605 | 0xBD64, 1606 | 0xBD80, 1607 | 0xBD9C, 1608 | 0xBDB8, 1609 | 0xBDD4, 1610 | 0xBDF0, 1611 | 0xBE0C, 1612 | 0xBE28, 1613 | 0xBE44, 1614 | 0xBE60, 1615 | 0xBE7C, 1616 | 0xBE98, 1617 | 0xBEB4, 1618 | 0xBED0, 1619 | 0xBEEC, 1620 | 0xBF08, 1621 | 0xBF24, 1622 | 0xBF40, 1623 | 0xBF5C, 1624 | 0xBF78, 1625 | 0xBF94, 1626 | 0xBFB0, 1627 | 0xBFCC, 1628 | 0xBFE8, 1629 | 0xC004, 1630 | 0xC020, 1631 | 0xC03C, 1632 | 0xC058, 1633 | 0xC074, 1634 | 0xC090, 1635 | 0xC0AC, 1636 | 0xC0C8, 1637 | 0xC0E4, 1638 | 0xC100, 1639 | 0xC11C, 1640 | 0xC138, 1641 | 0xC154, 1642 | 0xC170, 1643 | 0xC18C, 1644 | 0xC1A8, 1645 | 0xC1C4, 1646 | 0xC1E0, 1647 | 0xC1FC, 1648 | 0xC218, 1649 | 0xC234, 1650 | 0xC250, 1651 | 0xC26C, 1652 | 0xC288, 1653 | 0xC2A4, 1654 | 0xC2C0, 1655 | 0xC2DC, 1656 | 0xC2F8, 1657 | 0xC314, 1658 | 0xC330, 1659 | 0xC34C, 1660 | 0xC368, 1661 | 0xC384, 1662 | 0xC3A0, 1663 | 0xC3BC, 1664 | 0xC3D8, 1665 | 0xC3F4, 1666 | 0xC410, 1667 | 0xC42C, 1668 | 0xC448, 1669 | 0xC464, 1670 | 0xC480, 1671 | 0xC49C, 1672 | 0xC4B8, 1673 | 0xC4D4, 1674 | 0xC4F0, 1675 | 0xC50C, 1676 | 0xC528, 1677 | 0xC544, 1678 | 0xC560, 1679 | 0xC57C, 1680 | 0xC598, 1681 | 0xC5B4, 1682 | 0xC5D0, 1683 | 0xC5EC, 1684 | 0xC608, 1685 | 0xC624, 1686 | 0xC640, 1687 | 0xC65C, 1688 | 0xC678, 1689 | 0xC694, 1690 | 0xC6B0, 1691 | 0xC6CC, 1692 | 0xC6E8, 1693 | 0xC704, 1694 | 0xC720, 1695 | 0xC73C, 1696 | 0xC758, 1697 | 0xC774, 1698 | 0xC790, 1699 | 0xC7AC, 1700 | 0xC7C8, 1701 | 0xC7E4, 1702 | 0xC800, 1703 | 0xC81C, 1704 | 0xC838, 1705 | 0xC854, 1706 | 0xC870, 1707 | 0xC88C, 1708 | 0xC8A8, 1709 | 0xC8C4, 1710 | 0xC8E0, 1711 | 0xC8FC, 1712 | 0xC918, 1713 | 0xC934, 1714 | 0xC950, 1715 | 0xC96C, 1716 | 0xC988, 1717 | 0xC9A4, 1718 | 0xC9C0, 1719 | 0xC9DC, 1720 | 0xC9F8, 1721 | 0xCA14, 1722 | 0xCA30, 1723 | 0xCA4C, 1724 | 0xCA68, 1725 | 0xCA84, 1726 | 0xCAA0, 1727 | 0xCABC, 1728 | 0xCAD8, 1729 | 0xCAF4, 1730 | 0xCB10, 1731 | 0xCB2C, 1732 | 0xCB48, 1733 | 0xCB64, 1734 | 0xCB80, 1735 | 0xCB9C, 1736 | 0xCBB8, 1737 | 0xCBD4, 1738 | 0xCBF0, 1739 | 0xCC0C, 1740 | 0xCC28, 1741 | 0xCC44, 1742 | 0xCC60, 1743 | 0xCC7C, 1744 | 0xCC98, 1745 | 0xCCB4, 1746 | 0xCCD0, 1747 | 0xCCEC, 1748 | 0xCD08, 1749 | 0xCD24, 1750 | 0xCD40, 1751 | 0xCD5C, 1752 | 0xCD78, 1753 | 0xCD94, 1754 | 0xCDB0, 1755 | 0xCDCC, 1756 | 0xCDE8, 1757 | 0xCE04, 1758 | 0xCE20, 1759 | 0xCE3C, 1760 | 0xCE58, 1761 | 0xCE74, 1762 | 0xCE90, 1763 | 0xCEAC, 1764 | 0xCEC8, 1765 | 0xCEE4, 1766 | 0xCF00, 1767 | 0xCF1C, 1768 | 0xCF38, 1769 | 0xCF54, 1770 | 0xCF70, 1771 | 0xCF8C, 1772 | 0xCFA8, 1773 | 0xCFC4, 1774 | 0xCFE0, 1775 | 0xCFFC, 1776 | 0xD018, 1777 | 0xD034, 1778 | 0xD050, 1779 | 0xD06C, 1780 | 0xD088, 1781 | 0xD0A4, 1782 | 0xD0C0, 1783 | 0xD0DC, 1784 | 0xD0F8, 1785 | 0xD114, 1786 | 0xD130, 1787 | 0xD14C, 1788 | 0xD168, 1789 | 0xD184, 1790 | 0xD1A0, 1791 | 0xD1BC, 1792 | 0xD1D8, 1793 | 0xD1F4, 1794 | 0xD210, 1795 | 0xD22C, 1796 | 0xD248, 1797 | 0xD264, 1798 | 0xD280, 1799 | 0xD29C, 1800 | 0xD2B8, 1801 | 0xD2D4, 1802 | 0xD2F0, 1803 | 0xD30C, 1804 | 0xD328, 1805 | 0xD344, 1806 | 0xD360, 1807 | 0xD37C, 1808 | 0xD398, 1809 | 0xD3B4, 1810 | 0xD3D0, 1811 | 0xD3EC, 1812 | 0xD408, 1813 | 0xD424, 1814 | 0xD440, 1815 | 0xD45C, 1816 | 0xD478, 1817 | 0xD494, 1818 | 0xD4B0, 1819 | 0xD4CC, 1820 | 0xD4E8, 1821 | 0xD504, 1822 | 0xD520, 1823 | 0xD53C, 1824 | 0xD558, 1825 | 0xD574, 1826 | 0xD590, 1827 | 0xD5AC, 1828 | 0xD5C8, 1829 | 0xD5E4, 1830 | 0xD600, 1831 | 0xD61C, 1832 | 0xD638, 1833 | 0xD654, 1834 | 0xD670, 1835 | 0xD68C, 1836 | 0xD6A8, 1837 | 0xD6C4, 1838 | 0xD6E0, 1839 | 0xD6FC, 1840 | 0xD718, 1841 | 0xD734, 1842 | 0xD750, 1843 | 0xD76C, 1844 | 0xD788, 1845 | }; 1846 | 1847 | static const ucg_rune ucg_hangul_syllable_lvt_ranges[] = { 1848 | 0xAC01, 0xAC1B, 1849 | 0xAC1D, 0xAC37, 1850 | 0xAC39, 0xAC53, 1851 | 0xAC55, 0xAC6F, 1852 | 0xAC71, 0xAC8B, 1853 | 0xAC8D, 0xACA7, 1854 | 0xACA9, 0xACC3, 1855 | 0xACC5, 0xACDF, 1856 | 0xACE1, 0xACFB, 1857 | 0xACFD, 0xAD17, 1858 | 0xAD19, 0xAD33, 1859 | 0xAD35, 0xAD4F, 1860 | 0xAD51, 0xAD6B, 1861 | 0xAD6D, 0xAD87, 1862 | 0xAD89, 0xADA3, 1863 | 0xADA5, 0xADBF, 1864 | 0xADC1, 0xADDB, 1865 | 0xADDD, 0xADF7, 1866 | 0xADF9, 0xAE13, 1867 | 0xAE15, 0xAE2F, 1868 | 0xAE31, 0xAE4B, 1869 | 0xAE4D, 0xAE67, 1870 | 0xAE69, 0xAE83, 1871 | 0xAE85, 0xAE9F, 1872 | 0xAEA1, 0xAEBB, 1873 | 0xAEBD, 0xAED7, 1874 | 0xAED9, 0xAEF3, 1875 | 0xAEF5, 0xAF0F, 1876 | 0xAF11, 0xAF2B, 1877 | 0xAF2D, 0xAF47, 1878 | 0xAF49, 0xAF63, 1879 | 0xAF65, 0xAF7F, 1880 | 0xAF81, 0xAF9B, 1881 | 0xAF9D, 0xAFB7, 1882 | 0xAFB9, 0xAFD3, 1883 | 0xAFD5, 0xAFEF, 1884 | 0xAFF1, 0xB00B, 1885 | 0xB00D, 0xB027, 1886 | 0xB029, 0xB043, 1887 | 0xB045, 0xB05F, 1888 | 0xB061, 0xB07B, 1889 | 0xB07D, 0xB097, 1890 | 0xB099, 0xB0B3, 1891 | 0xB0B5, 0xB0CF, 1892 | 0xB0D1, 0xB0EB, 1893 | 0xB0ED, 0xB107, 1894 | 0xB109, 0xB123, 1895 | 0xB125, 0xB13F, 1896 | 0xB141, 0xB15B, 1897 | 0xB15D, 0xB177, 1898 | 0xB179, 0xB193, 1899 | 0xB195, 0xB1AF, 1900 | 0xB1B1, 0xB1CB, 1901 | 0xB1CD, 0xB1E7, 1902 | 0xB1E9, 0xB203, 1903 | 0xB205, 0xB21F, 1904 | 0xB221, 0xB23B, 1905 | 0xB23D, 0xB257, 1906 | 0xB259, 0xB273, 1907 | 0xB275, 0xB28F, 1908 | 0xB291, 0xB2AB, 1909 | 0xB2AD, 0xB2C7, 1910 | 0xB2C9, 0xB2E3, 1911 | 0xB2E5, 0xB2FF, 1912 | 0xB301, 0xB31B, 1913 | 0xB31D, 0xB337, 1914 | 0xB339, 0xB353, 1915 | 0xB355, 0xB36F, 1916 | 0xB371, 0xB38B, 1917 | 0xB38D, 0xB3A7, 1918 | 0xB3A9, 0xB3C3, 1919 | 0xB3C5, 0xB3DF, 1920 | 0xB3E1, 0xB3FB, 1921 | 0xB3FD, 0xB417, 1922 | 0xB419, 0xB433, 1923 | 0xB435, 0xB44F, 1924 | 0xB451, 0xB46B, 1925 | 0xB46D, 0xB487, 1926 | 0xB489, 0xB4A3, 1927 | 0xB4A5, 0xB4BF, 1928 | 0xB4C1, 0xB4DB, 1929 | 0xB4DD, 0xB4F7, 1930 | 0xB4F9, 0xB513, 1931 | 0xB515, 0xB52F, 1932 | 0xB531, 0xB54B, 1933 | 0xB54D, 0xB567, 1934 | 0xB569, 0xB583, 1935 | 0xB585, 0xB59F, 1936 | 0xB5A1, 0xB5BB, 1937 | 0xB5BD, 0xB5D7, 1938 | 0xB5D9, 0xB5F3, 1939 | 0xB5F5, 0xB60F, 1940 | 0xB611, 0xB62B, 1941 | 0xB62D, 0xB647, 1942 | 0xB649, 0xB663, 1943 | 0xB665, 0xB67F, 1944 | 0xB681, 0xB69B, 1945 | 0xB69D, 0xB6B7, 1946 | 0xB6B9, 0xB6D3, 1947 | 0xB6D5, 0xB6EF, 1948 | 0xB6F1, 0xB70B, 1949 | 0xB70D, 0xB727, 1950 | 0xB729, 0xB743, 1951 | 0xB745, 0xB75F, 1952 | 0xB761, 0xB77B, 1953 | 0xB77D, 0xB797, 1954 | 0xB799, 0xB7B3, 1955 | 0xB7B5, 0xB7CF, 1956 | 0xB7D1, 0xB7EB, 1957 | 0xB7ED, 0xB807, 1958 | 0xB809, 0xB823, 1959 | 0xB825, 0xB83F, 1960 | 0xB841, 0xB85B, 1961 | 0xB85D, 0xB877, 1962 | 0xB879, 0xB893, 1963 | 0xB895, 0xB8AF, 1964 | 0xB8B1, 0xB8CB, 1965 | 0xB8CD, 0xB8E7, 1966 | 0xB8E9, 0xB903, 1967 | 0xB905, 0xB91F, 1968 | 0xB921, 0xB93B, 1969 | 0xB93D, 0xB957, 1970 | 0xB959, 0xB973, 1971 | 0xB975, 0xB98F, 1972 | 0xB991, 0xB9AB, 1973 | 0xB9AD, 0xB9C7, 1974 | 0xB9C9, 0xB9E3, 1975 | 0xB9E5, 0xB9FF, 1976 | 0xBA01, 0xBA1B, 1977 | 0xBA1D, 0xBA37, 1978 | 0xBA39, 0xBA53, 1979 | 0xBA55, 0xBA6F, 1980 | 0xBA71, 0xBA8B, 1981 | 0xBA8D, 0xBAA7, 1982 | 0xBAA9, 0xBAC3, 1983 | 0xBAC5, 0xBADF, 1984 | 0xBAE1, 0xBAFB, 1985 | 0xBAFD, 0xBB17, 1986 | 0xBB19, 0xBB33, 1987 | 0xBB35, 0xBB4F, 1988 | 0xBB51, 0xBB6B, 1989 | 0xBB6D, 0xBB87, 1990 | 0xBB89, 0xBBA3, 1991 | 0xBBA5, 0xBBBF, 1992 | 0xBBC1, 0xBBDB, 1993 | 0xBBDD, 0xBBF7, 1994 | 0xBBF9, 0xBC13, 1995 | 0xBC15, 0xBC2F, 1996 | 0xBC31, 0xBC4B, 1997 | 0xBC4D, 0xBC67, 1998 | 0xBC69, 0xBC83, 1999 | 0xBC85, 0xBC9F, 2000 | 0xBCA1, 0xBCBB, 2001 | 0xBCBD, 0xBCD7, 2002 | 0xBCD9, 0xBCF3, 2003 | 0xBCF5, 0xBD0F, 2004 | 0xBD11, 0xBD2B, 2005 | 0xBD2D, 0xBD47, 2006 | 0xBD49, 0xBD63, 2007 | 0xBD65, 0xBD7F, 2008 | 0xBD81, 0xBD9B, 2009 | 0xBD9D, 0xBDB7, 2010 | 0xBDB9, 0xBDD3, 2011 | 0xBDD5, 0xBDEF, 2012 | 0xBDF1, 0xBE0B, 2013 | 0xBE0D, 0xBE27, 2014 | 0xBE29, 0xBE43, 2015 | 0xBE45, 0xBE5F, 2016 | 0xBE61, 0xBE7B, 2017 | 0xBE7D, 0xBE97, 2018 | 0xBE99, 0xBEB3, 2019 | 0xBEB5, 0xBECF, 2020 | 0xBED1, 0xBEEB, 2021 | 0xBEED, 0xBF07, 2022 | 0xBF09, 0xBF23, 2023 | 0xBF25, 0xBF3F, 2024 | 0xBF41, 0xBF5B, 2025 | 0xBF5D, 0xBF77, 2026 | 0xBF79, 0xBF93, 2027 | 0xBF95, 0xBFAF, 2028 | 0xBFB1, 0xBFCB, 2029 | 0xBFCD, 0xBFE7, 2030 | 0xBFE9, 0xC003, 2031 | 0xC005, 0xC01F, 2032 | 0xC021, 0xC03B, 2033 | 0xC03D, 0xC057, 2034 | 0xC059, 0xC073, 2035 | 0xC075, 0xC08F, 2036 | 0xC091, 0xC0AB, 2037 | 0xC0AD, 0xC0C7, 2038 | 0xC0C9, 0xC0E3, 2039 | 0xC0E5, 0xC0FF, 2040 | 0xC101, 0xC11B, 2041 | 0xC11D, 0xC137, 2042 | 0xC139, 0xC153, 2043 | 0xC155, 0xC16F, 2044 | 0xC171, 0xC18B, 2045 | 0xC18D, 0xC1A7, 2046 | 0xC1A9, 0xC1C3, 2047 | 0xC1C5, 0xC1DF, 2048 | 0xC1E1, 0xC1FB, 2049 | 0xC1FD, 0xC217, 2050 | 0xC219, 0xC233, 2051 | 0xC235, 0xC24F, 2052 | 0xC251, 0xC26B, 2053 | 0xC26D, 0xC287, 2054 | 0xC289, 0xC2A3, 2055 | 0xC2A5, 0xC2BF, 2056 | 0xC2C1, 0xC2DB, 2057 | 0xC2DD, 0xC2F7, 2058 | 0xC2F9, 0xC313, 2059 | 0xC315, 0xC32F, 2060 | 0xC331, 0xC34B, 2061 | 0xC34D, 0xC367, 2062 | 0xC369, 0xC383, 2063 | 0xC385, 0xC39F, 2064 | 0xC3A1, 0xC3BB, 2065 | 0xC3BD, 0xC3D7, 2066 | 0xC3D9, 0xC3F3, 2067 | 0xC3F5, 0xC40F, 2068 | 0xC411, 0xC42B, 2069 | 0xC42D, 0xC447, 2070 | 0xC449, 0xC463, 2071 | 0xC465, 0xC47F, 2072 | 0xC481, 0xC49B, 2073 | 0xC49D, 0xC4B7, 2074 | 0xC4B9, 0xC4D3, 2075 | 0xC4D5, 0xC4EF, 2076 | 0xC4F1, 0xC50B, 2077 | 0xC50D, 0xC527, 2078 | 0xC529, 0xC543, 2079 | 0xC545, 0xC55F, 2080 | 0xC561, 0xC57B, 2081 | 0xC57D, 0xC597, 2082 | 0xC599, 0xC5B3, 2083 | 0xC5B5, 0xC5CF, 2084 | 0xC5D1, 0xC5EB, 2085 | 0xC5ED, 0xC607, 2086 | 0xC609, 0xC623, 2087 | 0xC625, 0xC63F, 2088 | 0xC641, 0xC65B, 2089 | 0xC65D, 0xC677, 2090 | 0xC679, 0xC693, 2091 | 0xC695, 0xC6AF, 2092 | 0xC6B1, 0xC6CB, 2093 | 0xC6CD, 0xC6E7, 2094 | 0xC6E9, 0xC703, 2095 | 0xC705, 0xC71F, 2096 | 0xC721, 0xC73B, 2097 | 0xC73D, 0xC757, 2098 | 0xC759, 0xC773, 2099 | 0xC775, 0xC78F, 2100 | 0xC791, 0xC7AB, 2101 | 0xC7AD, 0xC7C7, 2102 | 0xC7C9, 0xC7E3, 2103 | 0xC7E5, 0xC7FF, 2104 | 0xC801, 0xC81B, 2105 | 0xC81D, 0xC837, 2106 | 0xC839, 0xC853, 2107 | 0xC855, 0xC86F, 2108 | 0xC871, 0xC88B, 2109 | 0xC88D, 0xC8A7, 2110 | 0xC8A9, 0xC8C3, 2111 | 0xC8C5, 0xC8DF, 2112 | 0xC8E1, 0xC8FB, 2113 | 0xC8FD, 0xC917, 2114 | 0xC919, 0xC933, 2115 | 0xC935, 0xC94F, 2116 | 0xC951, 0xC96B, 2117 | 0xC96D, 0xC987, 2118 | 0xC989, 0xC9A3, 2119 | 0xC9A5, 0xC9BF, 2120 | 0xC9C1, 0xC9DB, 2121 | 0xC9DD, 0xC9F7, 2122 | 0xC9F9, 0xCA13, 2123 | 0xCA15, 0xCA2F, 2124 | 0xCA31, 0xCA4B, 2125 | 0xCA4D, 0xCA67, 2126 | 0xCA69, 0xCA83, 2127 | 0xCA85, 0xCA9F, 2128 | 0xCAA1, 0xCABB, 2129 | 0xCABD, 0xCAD7, 2130 | 0xCAD9, 0xCAF3, 2131 | 0xCAF5, 0xCB0F, 2132 | 0xCB11, 0xCB2B, 2133 | 0xCB2D, 0xCB47, 2134 | 0xCB49, 0xCB63, 2135 | 0xCB65, 0xCB7F, 2136 | 0xCB81, 0xCB9B, 2137 | 0xCB9D, 0xCBB7, 2138 | 0xCBB9, 0xCBD3, 2139 | 0xCBD5, 0xCBEF, 2140 | 0xCBF1, 0xCC0B, 2141 | 0xCC0D, 0xCC27, 2142 | 0xCC29, 0xCC43, 2143 | 0xCC45, 0xCC5F, 2144 | 0xCC61, 0xCC7B, 2145 | 0xCC7D, 0xCC97, 2146 | 0xCC99, 0xCCB3, 2147 | 0xCCB5, 0xCCCF, 2148 | 0xCCD1, 0xCCEB, 2149 | 0xCCED, 0xCD07, 2150 | 0xCD09, 0xCD23, 2151 | 0xCD25, 0xCD3F, 2152 | 0xCD41, 0xCD5B, 2153 | 0xCD5D, 0xCD77, 2154 | 0xCD79, 0xCD93, 2155 | 0xCD95, 0xCDAF, 2156 | 0xCDB1, 0xCDCB, 2157 | 0xCDCD, 0xCDE7, 2158 | 0xCDE9, 0xCE03, 2159 | 0xCE05, 0xCE1F, 2160 | 0xCE21, 0xCE3B, 2161 | 0xCE3D, 0xCE57, 2162 | 0xCE59, 0xCE73, 2163 | 0xCE75, 0xCE8F, 2164 | 0xCE91, 0xCEAB, 2165 | 0xCEAD, 0xCEC7, 2166 | 0xCEC9, 0xCEE3, 2167 | 0xCEE5, 0xCEFF, 2168 | 0xCF01, 0xCF1B, 2169 | 0xCF1D, 0xCF37, 2170 | 0xCF39, 0xCF53, 2171 | 0xCF55, 0xCF6F, 2172 | 0xCF71, 0xCF8B, 2173 | 0xCF8D, 0xCFA7, 2174 | 0xCFA9, 0xCFC3, 2175 | 0xCFC5, 0xCFDF, 2176 | 0xCFE1, 0xCFFB, 2177 | 0xCFFD, 0xD017, 2178 | 0xD019, 0xD033, 2179 | 0xD035, 0xD04F, 2180 | 0xD051, 0xD06B, 2181 | 0xD06D, 0xD087, 2182 | 0xD089, 0xD0A3, 2183 | 0xD0A5, 0xD0BF, 2184 | 0xD0C1, 0xD0DB, 2185 | 0xD0DD, 0xD0F7, 2186 | 0xD0F9, 0xD113, 2187 | 0xD115, 0xD12F, 2188 | 0xD131, 0xD14B, 2189 | 0xD14D, 0xD167, 2190 | 0xD169, 0xD183, 2191 | 0xD185, 0xD19F, 2192 | 0xD1A1, 0xD1BB, 2193 | 0xD1BD, 0xD1D7, 2194 | 0xD1D9, 0xD1F3, 2195 | 0xD1F5, 0xD20F, 2196 | 0xD211, 0xD22B, 2197 | 0xD22D, 0xD247, 2198 | 0xD249, 0xD263, 2199 | 0xD265, 0xD27F, 2200 | 0xD281, 0xD29B, 2201 | 0xD29D, 0xD2B7, 2202 | 0xD2B9, 0xD2D3, 2203 | 0xD2D5, 0xD2EF, 2204 | 0xD2F1, 0xD30B, 2205 | 0xD30D, 0xD327, 2206 | 0xD329, 0xD343, 2207 | 0xD345, 0xD35F, 2208 | 0xD361, 0xD37B, 2209 | 0xD37D, 0xD397, 2210 | 0xD399, 0xD3B3, 2211 | 0xD3B5, 0xD3CF, 2212 | 0xD3D1, 0xD3EB, 2213 | 0xD3ED, 0xD407, 2214 | 0xD409, 0xD423, 2215 | 0xD425, 0xD43F, 2216 | 0xD441, 0xD45B, 2217 | 0xD45D, 0xD477, 2218 | 0xD479, 0xD493, 2219 | 0xD495, 0xD4AF, 2220 | 0xD4B1, 0xD4CB, 2221 | 0xD4CD, 0xD4E7, 2222 | 0xD4E9, 0xD503, 2223 | 0xD505, 0xD51F, 2224 | 0xD521, 0xD53B, 2225 | 0xD53D, 0xD557, 2226 | 0xD559, 0xD573, 2227 | 0xD575, 0xD58F, 2228 | 0xD591, 0xD5AB, 2229 | 0xD5AD, 0xD5C7, 2230 | 0xD5C9, 0xD5E3, 2231 | 0xD5E5, 0xD5FF, 2232 | 0xD601, 0xD61B, 2233 | 0xD61D, 0xD637, 2234 | 0xD639, 0xD653, 2235 | 0xD655, 0xD66F, 2236 | 0xD671, 0xD68B, 2237 | 0xD68D, 0xD6A7, 2238 | 0xD6A9, 0xD6C3, 2239 | 0xD6C5, 0xD6DF, 2240 | 0xD6E1, 0xD6FB, 2241 | 0xD6FD, 0xD717, 2242 | 0xD719, 0xD733, 2243 | 0xD735, 0xD74F, 2244 | 0xD751, 0xD76B, 2245 | 0xD76D, 0xD787, 2246 | 0xD789, 0xD7A3, 2247 | }; 2248 | 2249 | static const ucg_rune ucg_indic_conjunct_break_consonant_ranges[] = { 2250 | 0x0915, 0x0939, 2251 | 0x0958, 0x095F, 2252 | 0x0978, 0x097F, 2253 | 0x0995, 0x09A8, 2254 | 0x09AA, 0x09B0, 2255 | 0x09B2, 0x09B2, 2256 | 0x09B6, 0x09B9, 2257 | 0x09DC, 0x09DD, 2258 | 0x09DF, 0x09DF, 2259 | 0x09F0, 0x09F1, 2260 | 0x0A95, 0x0AA8, 2261 | 0x0AAA, 0x0AB0, 2262 | 0x0AB2, 0x0AB3, 2263 | 0x0AB5, 0x0AB9, 2264 | 0x0AF9, 0x0AF9, 2265 | 0x0B15, 0x0B28, 2266 | 0x0B2A, 0x0B30, 2267 | 0x0B32, 0x0B33, 2268 | 0x0B35, 0x0B39, 2269 | 0x0B5C, 0x0B5D, 2270 | 0x0B5F, 0x0B5F, 2271 | 0x0B71, 0x0B71, 2272 | 0x0C15, 0x0C28, 2273 | 0x0C2A, 0x0C39, 2274 | 0x0C58, 0x0C5A, 2275 | 0x0D15, 0x0D3A, 2276 | }; 2277 | 2278 | static const ucg_rune ucg_indic_conjunct_break_extend_ranges[] = { 2279 | 0x0300, 0x034E, 2280 | 0x0350, 0x036F, 2281 | 0x0483, 0x0487, 2282 | 0x0591, 0x05BD, 2283 | 0x05BF, 0x05BF, 2284 | 0x05C1, 0x05C2, 2285 | 0x05C4, 0x05C5, 2286 | 0x05C7, 0x05C7, 2287 | 0x0610, 0x061A, 2288 | 0x064B, 0x065F, 2289 | 0x0670, 0x0670, 2290 | 0x06D6, 0x06DC, 2291 | 0x06DF, 0x06E4, 2292 | 0x06E7, 0x06E8, 2293 | 0x06EA, 0x06ED, 2294 | 0x0711, 0x0711, 2295 | 0x0730, 0x074A, 2296 | 0x07EB, 0x07F3, 2297 | 0x07FD, 0x07FD, 2298 | 0x0816, 0x0819, 2299 | 0x081B, 0x0823, 2300 | 0x0825, 0x0827, 2301 | 0x0829, 0x082D, 2302 | 0x0859, 0x085B, 2303 | 0x0898, 0x089F, 2304 | 0x08CA, 0x08E1, 2305 | 0x08E3, 0x08FF, 2306 | 0x093C, 0x093C, 2307 | 0x0951, 0x0954, 2308 | 0x09BC, 0x09BC, 2309 | 0x09FE, 0x09FE, 2310 | 0x0A3C, 0x0A3C, 2311 | 0x0ABC, 0x0ABC, 2312 | 0x0B3C, 0x0B3C, 2313 | 0x0C3C, 0x0C3C, 2314 | 0x0C55, 0x0C56, 2315 | 0x0CBC, 0x0CBC, 2316 | 0x0D3B, 0x0D3C, 2317 | 0x0E38, 0x0E3A, 2318 | 0x0E48, 0x0E4B, 2319 | 0x0EB8, 0x0EBA, 2320 | 0x0EC8, 0x0ECB, 2321 | 0x0F18, 0x0F19, 2322 | 0x0F35, 0x0F35, 2323 | 0x0F37, 0x0F37, 2324 | 0x0F39, 0x0F39, 2325 | 0x0F71, 0x0F72, 2326 | 0x0F74, 0x0F74, 2327 | 0x0F7A, 0x0F7D, 2328 | 0x0F80, 0x0F80, 2329 | 0x0F82, 0x0F84, 2330 | 0x0F86, 0x0F87, 2331 | 0x0FC6, 0x0FC6, 2332 | 0x1037, 0x1037, 2333 | 0x1039, 0x103A, 2334 | 0x108D, 0x108D, 2335 | 0x135D, 0x135F, 2336 | 0x1714, 0x1714, 2337 | 0x17D2, 0x17D2, 2338 | 0x17DD, 0x17DD, 2339 | 0x18A9, 0x18A9, 2340 | 0x1939, 0x193B, 2341 | 0x1A17, 0x1A18, 2342 | 0x1A60, 0x1A60, 2343 | 0x1A75, 0x1A7C, 2344 | 0x1A7F, 0x1A7F, 2345 | 0x1AB0, 0x1ABD, 2346 | 0x1ABF, 0x1ACE, 2347 | 0x1B34, 0x1B34, 2348 | 0x1B6B, 0x1B73, 2349 | 0x1BAB, 0x1BAB, 2350 | 0x1BE6, 0x1BE6, 2351 | 0x1C37, 0x1C37, 2352 | 0x1CD0, 0x1CD2, 2353 | 0x1CD4, 0x1CE0, 2354 | 0x1CE2, 0x1CE8, 2355 | 0x1CED, 0x1CED, 2356 | 0x1CF4, 0x1CF4, 2357 | 0x1CF8, 0x1CF9, 2358 | 0x1DC0, 0x1DFF, 2359 | 0x200D, 0x200D, 2360 | 0x20D0, 0x20DC, 2361 | 0x20E1, 0x20E1, 2362 | 0x20E5, 0x20F0, 2363 | 0x2CEF, 0x2CF1, 2364 | 0x2D7F, 0x2D7F, 2365 | 0x2DE0, 0x2DFF, 2366 | 0x302A, 0x302D, 2367 | 0x302E, 0x302F, 2368 | 0x3099, 0x309A, 2369 | 0xA66F, 0xA66F, 2370 | 0xA674, 0xA67D, 2371 | 0xA69E, 0xA69F, 2372 | 0xA6F0, 0xA6F1, 2373 | 0xA82C, 0xA82C, 2374 | 0xA8E0, 0xA8F1, 2375 | 0xA92B, 0xA92D, 2376 | 0xA9B3, 0xA9B3, 2377 | 0xAAB0, 0xAAB0, 2378 | 0xAAB2, 0xAAB4, 2379 | 0xAAB7, 0xAAB8, 2380 | 0xAABE, 0xAABF, 2381 | 0xAAC1, 0xAAC1, 2382 | 0xAAF6, 0xAAF6, 2383 | 0xABED, 0xABED, 2384 | 0xFB1E, 0xFB1E, 2385 | 0xFE20, 0xFE2F, 2386 | 0x101FD, 0x101FD, 2387 | 0x102E0, 0x102E0, 2388 | 0x10376, 0x1037A, 2389 | 0x10A0D, 0x10A0D, 2390 | 0x10A0F, 0x10A0F, 2391 | 0x10A38, 0x10A3A, 2392 | 0x10A3F, 0x10A3F, 2393 | 0x10AE5, 0x10AE6, 2394 | 0x10D24, 0x10D27, 2395 | 0x10EAB, 0x10EAC, 2396 | 0x10EFD, 0x10EFF, 2397 | 0x10F46, 0x10F50, 2398 | 0x10F82, 0x10F85, 2399 | 0x11070, 0x11070, 2400 | 0x1107F, 0x1107F, 2401 | 0x110BA, 0x110BA, 2402 | 0x11100, 0x11102, 2403 | 0x11133, 0x11134, 2404 | 0x11173, 0x11173, 2405 | 0x111CA, 0x111CA, 2406 | 0x11236, 0x11236, 2407 | 0x112E9, 0x112EA, 2408 | 0x1133B, 0x1133C, 2409 | 0x11366, 0x1136C, 2410 | 0x11370, 0x11374, 2411 | 0x11446, 0x11446, 2412 | 0x1145E, 0x1145E, 2413 | 0x114C3, 0x114C3, 2414 | 0x115C0, 0x115C0, 2415 | 0x116B7, 0x116B7, 2416 | 0x1172B, 0x1172B, 2417 | 0x1183A, 0x1183A, 2418 | 0x1193E, 0x1193E, 2419 | 0x11943, 0x11943, 2420 | 0x11A34, 0x11A34, 2421 | 0x11A47, 0x11A47, 2422 | 0x11A99, 0x11A99, 2423 | 0x11D42, 0x11D42, 2424 | 0x11D44, 0x11D45, 2425 | 0x11D97, 0x11D97, 2426 | 0x11F42, 0x11F42, 2427 | 0x16AF0, 0x16AF4, 2428 | 0x16B30, 0x16B36, 2429 | 0x1BC9E, 0x1BC9E, 2430 | 0x1D165, 0x1D165, 2431 | 0x1D167, 0x1D169, 2432 | 0x1D16E, 0x1D172, 2433 | 0x1D17B, 0x1D182, 2434 | 0x1D185, 0x1D18B, 2435 | 0x1D1AA, 0x1D1AD, 2436 | 0x1D242, 0x1D244, 2437 | 0x1E000, 0x1E006, 2438 | 0x1E008, 0x1E018, 2439 | 0x1E01B, 0x1E021, 2440 | 0x1E023, 0x1E024, 2441 | 0x1E026, 0x1E02A, 2442 | 0x1E08F, 0x1E08F, 2443 | 0x1E130, 0x1E136, 2444 | 0x1E2AE, 0x1E2AE, 2445 | 0x1E2EC, 0x1E2EF, 2446 | 0x1E4EC, 0x1E4EF, 2447 | 0x1E8D0, 0x1E8D6, 2448 | 0x1E944, 0x1E94A, 2449 | }; 2450 | 2451 | // Fullwidth (F) and Wide (W) are counted as 2. 2452 | // Everything else is 1. 2453 | // 2454 | // Derived from: https://unicode.org/Public/15.1.0/ucd/EastAsianWidth.txt 2455 | static const ucg_rune ucg_normalized_east_asian_width_ranges[] = { 2456 | 0x0000, 0x10FF, 1, 2457 | 0x1100, 0x115F, 2, 2458 | 0x1160, 0x2319, 1, 2459 | 0x231A, 0x231B, 2, 2460 | 0x231C, 0x2328, 1, 2461 | 0x2329, 0x232A, 2, 2462 | 0x232B, 0x23E8, 1, 2463 | 0x23E9, 0x23EC, 2, 2464 | 0x23ED, 0x23EF, 1, 2465 | 0x23F0, 0x23F0, 2, 2466 | 0x23F1, 0x23F2, 1, 2467 | 0x23F3, 0x23F3, 2, 2468 | 0x23F4, 0x25FC, 1, 2469 | 0x25FD, 0x25FE, 2, 2470 | 0x25FF, 0x2613, 1, 2471 | 0x2614, 0x2615, 2, 2472 | 0x2616, 0x2647, 1, 2473 | 0x2648, 0x2653, 2, 2474 | 0x2654, 0x267E, 1, 2475 | 0x267F, 0x267F, 2, 2476 | 0x2680, 0x2692, 1, 2477 | 0x2693, 0x2693, 2, 2478 | 0x2694, 0x26A0, 1, 2479 | 0x26A1, 0x26A1, 2, 2480 | 0x26A2, 0x26A9, 1, 2481 | 0x26AA, 0x26AB, 2, 2482 | 0x26AC, 0x26BC, 1, 2483 | 0x26BD, 0x26BE, 2, 2484 | 0x26BF, 0x26C3, 1, 2485 | 0x26C4, 0x26C5, 2, 2486 | 0x26C6, 0x26CD, 1, 2487 | 0x26CE, 0x26CE, 2, 2488 | 0x26CF, 0x26D3, 1, 2489 | 0x26D4, 0x26D4, 2, 2490 | 0x26D5, 0x26E9, 1, 2491 | 0x26EA, 0x26EA, 2, 2492 | 0x26EB, 0x26F1, 1, 2493 | 0x26F2, 0x26F3, 2, 2494 | 0x26F4, 0x26F4, 1, 2495 | 0x26F5, 0x26F5, 2, 2496 | 0x26F6, 0x26F9, 1, 2497 | 0x26FA, 0x26FA, 2, 2498 | 0x26FB, 0x26FC, 1, 2499 | 0x26FD, 0x26FD, 2, 2500 | 0x26FE, 0x2704, 1, 2501 | 0x2705, 0x2705, 2, 2502 | 0x2706, 0x2709, 1, 2503 | 0x270A, 0x270B, 2, 2504 | 0x270C, 0x2727, 1, 2505 | 0x2728, 0x2728, 2, 2506 | 0x2729, 0x274B, 1, 2507 | 0x274C, 0x274C, 2, 2508 | 0x274D, 0x274D, 1, 2509 | 0x274E, 0x274E, 2, 2510 | 0x274F, 0x2752, 1, 2511 | 0x2753, 0x2755, 2, 2512 | 0x2756, 0x2756, 1, 2513 | 0x2757, 0x2757, 2, 2514 | 0x2758, 0x2794, 1, 2515 | 0x2795, 0x2797, 2, 2516 | 0x2798, 0x27AF, 1, 2517 | 0x27B0, 0x27B0, 2, 2518 | 0x27B1, 0x27BE, 1, 2519 | 0x27BF, 0x27BF, 2, 2520 | 0x27C0, 0x2B1A, 1, 2521 | 0x2B1B, 0x2B1C, 2, 2522 | 0x2B1D, 0x2B4F, 1, 2523 | 0x2B50, 0x2B50, 2, 2524 | 0x2B51, 0x2B54, 1, 2525 | 0x2B55, 0x2B55, 2, 2526 | 0x2B56, 0x2E5D, 1, 2527 | 0x2E80, 0x303E, 2, 2528 | 0x303F, 0x303F, 1, 2529 | 0x3041, 0x3247, 2, 2530 | 0x3248, 0x324F, 1, 2531 | 0x3250, 0x4DBF, 2, 2532 | 0x4DC0, 0x4DFF, 1, 2533 | 0x4E00, 0xA4C6, 2, 2534 | 0xA4D0, 0xA95F, 1, 2535 | 0xA960, 0xA97C, 2, 2536 | 0xA980, 0xABF9, 1, 2537 | 0xAC00, 0xD7A3, 2, 2538 | 0xD7B0, 0xF8FF, 1, 2539 | 0xF900, 0xFAFF, 2, 2540 | 0xFB00, 0xFE0F, 1, 2541 | 0xFE10, 0xFE19, 2, 2542 | 0xFE20, 0xFE2F, 1, 2543 | 0xFE30, 0xFE6B, 2, 2544 | 0xFE70, 0xFEFF, 1, 2545 | 0xFF01, 0xFF60, 2, 2546 | 0xFF61, 0xFFDC, 1, 2547 | 0xFFE0, 0xFFE6, 2, 2548 | 0xFFE8, 0x16F9F, 1, 2549 | 0x16FE0, 0x1B2FB, 2, 2550 | 0x1BC00, 0x1F003, 1, 2551 | 0x1F004, 0x1F004, 2, 2552 | 0x1F005, 0x1F0CE, 1, 2553 | 0x1F0CF, 0x1F0CF, 2, 2554 | 0x1F0D1, 0x1F18D, 1, 2555 | 0x1F18E, 0x1F18E, 2, 2556 | 0x1F18F, 0x1F190, 1, 2557 | 0x1F191, 0x1F19A, 2, 2558 | 0x1F19B, 0x1F1FF, 1, 2559 | 0x1F200, 0x1F320, 2, 2560 | 0x1F321, 0x1F32C, 1, 2561 | 0x1F32D, 0x1F335, 2, 2562 | 0x1F336, 0x1F336, 1, 2563 | 0x1F337, 0x1F37C, 2, 2564 | 0x1F37D, 0x1F37D, 1, 2565 | 0x1F37E, 0x1F393, 2, 2566 | 0x1F394, 0x1F39F, 1, 2567 | 0x1F3A0, 0x1F3CA, 2, 2568 | 0x1F3CB, 0x1F3CE, 1, 2569 | 0x1F3CF, 0x1F3D3, 2, 2570 | 0x1F3D4, 0x1F3DF, 1, 2571 | 0x1F3E0, 0x1F3F0, 2, 2572 | 0x1F3F1, 0x1F3F3, 1, 2573 | 0x1F3F4, 0x1F3F4, 2, 2574 | 0x1F3F5, 0x1F3F7, 1, 2575 | 0x1F3F8, 0x1F43E, 2, 2576 | 0x1F43F, 0x1F43F, 1, 2577 | 0x1F440, 0x1F440, 2, 2578 | 0x1F441, 0x1F441, 1, 2579 | 0x1F442, 0x1F4FC, 2, 2580 | 0x1F4FD, 0x1F4FE, 1, 2581 | 0x1F4FF, 0x1F53D, 2, 2582 | 0x1F53E, 0x1F54A, 1, 2583 | 0x1F54B, 0x1F54E, 2, 2584 | 0x1F54F, 0x1F54F, 1, 2585 | 0x1F550, 0x1F567, 2, 2586 | 0x1F568, 0x1F579, 1, 2587 | 0x1F57A, 0x1F57A, 2, 2588 | 0x1F57B, 0x1F594, 1, 2589 | 0x1F595, 0x1F596, 2, 2590 | 0x1F597, 0x1F5A3, 1, 2591 | 0x1F5A4, 0x1F5A4, 2, 2592 | 0x1F5A5, 0x1F5FA, 1, 2593 | 0x1F5FB, 0x1F64F, 2, 2594 | 0x1F650, 0x1F67F, 1, 2595 | 0x1F680, 0x1F6C5, 2, 2596 | 0x1F6C6, 0x1F6CB, 1, 2597 | 0x1F6CC, 0x1F6CC, 2, 2598 | 0x1F6CD, 0x1F6CF, 1, 2599 | 0x1F6D0, 0x1F6D2, 2, 2600 | 0x1F6D3, 0x1F6D4, 1, 2601 | 0x1F6D5, 0x1F6DF, 2, 2602 | 0x1F6E0, 0x1F6EA, 1, 2603 | 0x1F6EB, 0x1F6EC, 2, 2604 | 0x1F6F0, 0x1F6F3, 1, 2605 | 0x1F6F4, 0x1F6FC, 2, 2606 | 0x1F700, 0x1F7D9, 1, 2607 | 0x1F7E0, 0x1F7F0, 2, 2608 | 0x1F800, 0x1F90B, 1, 2609 | 0x1F90C, 0x1F93A, 2, 2610 | 0x1F93B, 0x1F93B, 1, 2611 | 0x1F93C, 0x1F945, 2, 2612 | 0x1F946, 0x1F946, 1, 2613 | 0x1F947, 0x1F9FF, 2, 2614 | 0x1FA00, 0x1FA6D, 1, 2615 | 0x1FA70, 0x1FAF8, 2, 2616 | 0x1FB00, 0x1FBF9, 1, 2617 | 0x20000, 0x3FFFD, 2, 2618 | 0xE0001, 0x10FFFD, 1, 2619 | }; 2620 | 2621 | // 2622 | // End of Unicode 15.1.0 block. 2623 | // 2624 | 2625 | #ifdef __cplusplus 2626 | } 2627 | #endif 2628 | 2629 | #endif /* _UCG_TABLES_INCLUDED */ 2630 | -------------------------------------------------------------------------------- /tests/build_and_run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | # Build and run the UCG tests. 3 | set -xe 4 | cc test_runner.c -o test_runner -L../lib -lucg -std=c99 -pedantic-errors -Wall -Werror 5 | ./test_runner 6 | set +xe 7 | -------------------------------------------------------------------------------- /tests/test_runner.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "../src/ucg.h" 7 | #include "test_data.c" 8 | 9 | int main(int argc, const char** argv) { 10 | (void)argc; 11 | (void)argv; 12 | 13 | ucg_int failed = 0; 14 | ucg_int completed = 0; 15 | 16 | printf("Running official grapheme break tests ...\n"); 17 | for (ucg_int i = 0; i < (ucg_int)(sizeof(official_grapheme_break_test_cases) / sizeof(test_case)); i += 1) { 18 | test_case t = official_grapheme_break_test_cases[i]; 19 | 20 | ucg_int grapheme_count; 21 | ucg_int result = ucg_grapheme_count((uint8_t*)t.str, (ucg_int)strlen(t.str), NULL, &grapheme_count, NULL); 22 | if (result != 0) { 23 | fprintf(stderr, "(#% 4li) failed due to UTF-8 parsing error: %li\n", i, result); 24 | } 25 | if (grapheme_count != t.expected_clusters) { 26 | fprintf(stderr, "(#% 4li) graphemes: %li != %li, %s\n", i, grapheme_count, t.expected_clusters, t.str); 27 | failed += 1; 28 | } 29 | completed += 1; 30 | } 31 | 32 | printf("Running official emoji tests ...\n"); 33 | for (ucg_int i = 0; i < (ucg_int)(sizeof(official_emoji_test_cases) / sizeof(test_case)); i += 1) { 34 | test_case t = official_emoji_test_cases[i]; 35 | ucg_int grapheme_count; 36 | ucg_int result = ucg_grapheme_count((uint8_t*)t.str, (ucg_int)strlen(t.str), NULL, &grapheme_count, NULL); 37 | if (result != 0) { 38 | fprintf(stderr, "(#% 4li) failed due to UTF-8 parsing error: %li\n", i, result); 39 | } 40 | if (grapheme_count != t.expected_clusters) { 41 | fprintf(stderr, "(#% 4li) graphemes: %li != %li, %s\n", i, grapheme_count, t.expected_clusters, t.str); 42 | failed += 1; 43 | } 44 | completed += 1; 45 | } 46 | 47 | fprintf(stderr, "Tests failed: % 4li/% 4li\n", failed, completed); 48 | 49 | return failed == 0 ? EXIT_SUCCESS : EXIT_FAILURE; 50 | } 51 | --------------------------------------------------------------------------------