├── tests ├── data │ ├── UTF-7-snippet │ ├── UTF-7-snippet.UTF-8 │ ├── UCS-4BE-snippet.UTF-8 │ ├── UCS-4LE-snippet.UTF-8 │ ├── UTF-16-snippet.UTF-8 │ ├── UTF-16BE-snippet.UTF-8 │ ├── UTF-16LE-snippet.UTF-8 │ ├── UCS-4BE-snippet │ ├── UCS-4LE-snippet │ ├── Translit1.ASCII │ ├── DEC-HANYU.IRREVERSIBLE.TXT │ ├── CP1163.IRREVERSIBLE.TXT │ ├── EUC-JP.IRREVERSIBLE.TXT │ ├── Quotes.ASCII │ ├── Quotes.UTF-8 │ ├── HZ-snippet.UTF-8 │ ├── ISO-2022-JP-snippet.UTF-8 │ ├── ISO-2022-KR-snippet.UTF-8 │ ├── ISO-2022-CN-snippet.UTF-8 │ ├── CP1161.IRREVERSIBLE.TXT │ ├── ISO-2022-CN-EXT-snippet.UTF-8 │ ├── IBM-1160.IRREVERSIBLE.TXT │ ├── HZ-snippet │ ├── TCVN-snippet │ ├── BIG5-2003.IRREVERSIBLE.TXT │ ├── CP1255-snippet │ ├── CP1258-snippet │ ├── ISO-2022-JP-1-snippet.UTF-8 │ ├── ISO-2022-JP-snippet │ ├── UCS-2BE-snippet │ ├── UCS-2LE-snippet │ ├── UTF-16-snippet │ ├── UTF-32-snippet │ ├── ARMSCII-8.IRREVERSIBLE.TXT │ ├── IBM-838.IRREVERSIBLE.TXT │ ├── ISO-2022-JP-1-snippet │ ├── ISO-2022-KR-snippet │ ├── Quotes.ISO-8859-1 │ ├── UTF-16BE-snippet │ ├── UTF-16LE-snippet │ ├── UTF-32BE-snippet │ ├── UTF-32LE-snippet │ ├── Translit1.ISO-8859-1 │ ├── BIG5-HKSCS-1999-snippet │ ├── BIG5-HKSCS-2001-snippet │ ├── BIG5-HKSCS-2004-snippet │ ├── BIG5-HKSCS-2008-snippet │ ├── ISO-2022-CN-snippet │ ├── ISO-2022-CN-EXT-snippet │ ├── BIG5-HKSCS-1999.IRREVERSIBLE.TXT │ ├── BIG5-HKSCS-2001.IRREVERSIBLE.TXT │ ├── BIG5-HKSCS-2004.IRREVERSIBLE.TXT │ ├── BIG5-HKSCS-2008.IRREVERSIBLE.TXT │ ├── CP950.IRREVERSIBLE.TXT │ ├── ISO-2022-JP-2-snippet.UTF-8 │ ├── UTF-32-snippet.UTF-8 │ ├── UTF-32BE-snippet.UTF-8 │ ├── UTF-32LE-snippet.UTF-8 │ ├── ISO-2022-JP-2-snippet │ ├── CP1255.IRREVERSIBLE.TXT │ ├── ISO-IR-165.IRREVERSIBLE.TXT │ ├── GB18030-2005.IRREVERSIBLE.TXT │ ├── TCVN.IRREVERSIBLE.TXT │ ├── UCS-2BE-snippet.UTF-8 │ ├── UCS-2LE-snippet.UTF-8 │ ├── ASCII.TXT │ ├── TDS565.TXT │ ├── ISO646-CN.TXT │ ├── ISO646-JP.TXT │ ├── JIS_X0201.TXT │ ├── CP1258.IRREVERSIBLE.TXT │ ├── ISO-8859-6.TXT │ ├── MacHebrew.TXT │ ├── CP856.TXT │ ├── TIS-620.TXT │ └── IBM-424.TXT ├── sort.cpp ├── reiconv-test.hpp ├── sort.hpp ├── check-encoding.cpp ├── check-stateful.cpp └── data-generator.cpp ├── benchmark ├── benchmark.png ├── compile.sh ├── reiconv_iconv.cpp ├── benchmark.md ├── benchmark.hpp ├── glib_iconv.cpp ├── libiconv_iconv.cpp ├── benchmark.cpp └── run.py ├── TODO ├── .markdownlint.json ├── .rubisco ├── test.yml ├── build.yml └── dist.yml ├── repo.json ├── docs ├── zh_CN │ └── README.md └── README.md ├── lib ├── codepage_to_ei.h ├── loops.h ├── localecharset │ ├── lc_types.h │ └── lc_utils.h ├── locale_charset.cpp ├── encoding.h ├── converters │ ├── cp943.h │ ├── flushwc.h │ ├── iso8859_1.h │ ├── ascii.h │ ├── ucs4internal.h │ ├── ucs4be.h │ ├── ucs4le.h │ ├── tis620.h │ ├── iso8859_11.h │ ├── cns11643.h │ ├── ucs2be.h │ ├── ucs2le.h │ ├── iso646_cn.h │ ├── ucs2internal.h │ ├── iso646_jp.h │ ├── utf32le.h │ ├── cp858.h │ ├── ucs4swapped.h │ ├── utf32be.h │ ├── jisx0201.h │ ├── cp1163.h │ ├── ces_gbk.h │ ├── cp1162.h │ ├── ucs2swapped.h │ ├── cns11643_4.h │ ├── ces_big5.h │ ├── dec_kanji.h │ ├── euc_cn.h │ ├── gb12345.h │ ├── euc_kr.h │ ├── ucs2.h │ ├── ucs4.h │ ├── utf16be.h │ ├── utf16le.h │ ├── iso8859_15.h │ ├── cp1161.h │ ├── utf8.h │ └── iso8859_9.h ├── encoding_indexes.h ├── all_encodings.h ├── loop_funcs.h └── reiconv.cpp ├── .github └── workflows │ └── test.yml ├── include ├── cppp │ └── encodings │ │ ├── reiconv.h.in │ │ └── reiconv.hpp.in └── iconv.h.in ├── .gitignore ├── cpack.cmake ├── Makefile.devel ├── windows └── cppp-reiconv.rc.in └── tools ├── check-encodings.cpp └── genindexes.cpp /tests/data/UTF-7-snippet: -------------------------------------------------------------------------------- 1 | A+ImIDkQ- -------------------------------------------------------------------------------- /tests/data/UTF-7-snippet.UTF-8: -------------------------------------------------------------------------------- 1 | A≢Α -------------------------------------------------------------------------------- /tests/data/UCS-4BE-snippet.UTF-8: -------------------------------------------------------------------------------- 1 | 𒍅=Ra -------------------------------------------------------------------------------- /tests/data/UCS-4LE-snippet.UTF-8: -------------------------------------------------------------------------------- 1 | 𒍅=Ra -------------------------------------------------------------------------------- /tests/data/UTF-16-snippet.UTF-8: -------------------------------------------------------------------------------- 1 | 𒍅=Ra -------------------------------------------------------------------------------- /tests/data/UTF-16BE-snippet.UTF-8: -------------------------------------------------------------------------------- 1 | 𒍅=Ra -------------------------------------------------------------------------------- /tests/data/UTF-16LE-snippet.UTF-8: -------------------------------------------------------------------------------- 1 | 𒍅=Ra -------------------------------------------------------------------------------- /tests/data/UCS-4BE-snippet: -------------------------------------------------------------------------------- 1 | #E=Ra -------------------------------------------------------------------------------- /tests/data/UCS-4LE-snippet: -------------------------------------------------------------------------------- 1 | E#=Ra -------------------------------------------------------------------------------- /tests/data/Translit1.ASCII: -------------------------------------------------------------------------------- 1 | 'Ecrit par %s. 2 | -------------------------------------------------------------------------------- /tests/data/DEC-HANYU.IRREVERSIBLE.TXT: -------------------------------------------------------------------------------- 1 | 0xC2CBA1B8 0x5344 2 | -------------------------------------------------------------------------------- /tests/data/CP1163.IRREVERSIBLE.TXT: -------------------------------------------------------------------------------- 1 | 0xAF 0x203E 2 | 0xD0 0x00D0 3 | -------------------------------------------------------------------------------- /tests/data/EUC-JP.IRREVERSIBLE.TXT: -------------------------------------------------------------------------------- 1 | 0x5C 0x00A5 2 | 0x7E 0x203E 3 | -------------------------------------------------------------------------------- /tests/data/Quotes.ASCII: -------------------------------------------------------------------------------- 1 | "Hello" 2 | 'Hello' 3 | "Hello" 4 | 'Hello' 5 | -------------------------------------------------------------------------------- /tests/data/Quotes.UTF-8: -------------------------------------------------------------------------------- 1 | “Hello” 2 | ‘Hello’ 3 | „Hello” 4 | ‚Hello’ 5 | -------------------------------------------------------------------------------- /tests/data/HZ-snippet.UTF-8: -------------------------------------------------------------------------------- 1 | Chinese (中文,普通话,汉语) 你好 2 | GB -- 元气 开发 3 | -------------------------------------------------------------------------------- /tests/data/ISO-2022-JP-snippet.UTF-8: -------------------------------------------------------------------------------- 1 | Japanese (日本語) こんにちは 2 | JIS -- 元気 開発 3 | -------------------------------------------------------------------------------- /tests/data/ISO-2022-KR-snippet.UTF-8: -------------------------------------------------------------------------------- 1 | Korean (한글) 안녕하세요, 안녕하십니까 2 | KSC -- 元氣 開發 3 | -------------------------------------------------------------------------------- /tests/data/ISO-2022-CN-snippet.UTF-8: -------------------------------------------------------------------------------- 1 | Chinese (中文,普通话,汉语) 你好 2 | GB -- 元气 开发 3 | 喲洈 4 | -------------------------------------------------------------------------------- /tests/data/CP1161.IRREVERSIBLE.TXT: -------------------------------------------------------------------------------- 1 | 0xA0 0x0E48 2 | 0xDB 0x0E49 3 | 0xDC 0x0E4A 4 | 0xDD 0x0E4B 5 | -------------------------------------------------------------------------------- /tests/data/ISO-2022-CN-EXT-snippet.UTF-8: -------------------------------------------------------------------------------- 1 | Chinese (中文,普通话,汉语) 你好 2 | GB -- 元气 开发 3 | 喲痓洈 4 | -------------------------------------------------------------------------------- /tests/data/IBM-1160.IRREVERSIBLE.TXT: -------------------------------------------------------------------------------- 1 | 0x51 0x0E48 2 | 0xCA 0x0E49 3 | 0xE1 0x0E4A 4 | 0xFD 0x0E4B 5 | -------------------------------------------------------------------------------- /benchmark/benchmark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cppp-project/cppp-reiconv/HEAD/benchmark/benchmark.png -------------------------------------------------------------------------------- /tests/data/HZ-snippet: -------------------------------------------------------------------------------- 1 | Chinese (~{VPND~},~{FUM(;0~},~{::So~}) ~{Dc:C~} 2 | GB -- ~{T*Fx~} ~{?*7"~} 3 | -------------------------------------------------------------------------------- /tests/data/TCVN-snippet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cppp-project/cppp-reiconv/HEAD/tests/data/TCVN-snippet -------------------------------------------------------------------------------- /tests/data/BIG5-2003.IRREVERSIBLE.TXT: -------------------------------------------------------------------------------- 1 | 0xF9FA 0x2554 2 | 0xF9FB 0x2557 3 | 0xF9FC 0x255A 4 | 0xF9FD 0x255D 5 | -------------------------------------------------------------------------------- /tests/data/CP1255-snippet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cppp-project/cppp-reiconv/HEAD/tests/data/CP1255-snippet -------------------------------------------------------------------------------- /tests/data/CP1258-snippet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cppp-project/cppp-reiconv/HEAD/tests/data/CP1258-snippet -------------------------------------------------------------------------------- /tests/data/ISO-2022-JP-1-snippet.UTF-8: -------------------------------------------------------------------------------- 1 | Just for a test of JISX0212: 騏驎 (the second character is of JISX0212) 2 | -------------------------------------------------------------------------------- /tests/data/ISO-2022-JP-snippet: -------------------------------------------------------------------------------- 1 | Japanese ($BF|K\8l(B) $B$3$s$K$A$O(B 2 | JIS -- $B855$(B $B3+H/(B 3 | -------------------------------------------------------------------------------- /tests/data/UCS-2BE-snippet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cppp-project/cppp-reiconv/HEAD/tests/data/UCS-2BE-snippet -------------------------------------------------------------------------------- /tests/data/UCS-2LE-snippet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cppp-project/cppp-reiconv/HEAD/tests/data/UCS-2LE-snippet -------------------------------------------------------------------------------- /tests/data/UTF-16-snippet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cppp-project/cppp-reiconv/HEAD/tests/data/UTF-16-snippet -------------------------------------------------------------------------------- /tests/data/UTF-32-snippet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cppp-project/cppp-reiconv/HEAD/tests/data/UTF-32-snippet -------------------------------------------------------------------------------- /tests/data/ARMSCII-8.IRREVERSIBLE.TXT: -------------------------------------------------------------------------------- 1 | 0x28 0x0028 2 | 0x29 0x0029 3 | 0x2C 0x002C 4 | 0x2D 0x002D 5 | 0x2E 0x002E 6 | -------------------------------------------------------------------------------- /tests/data/IBM-838.IRREVERSIBLE.TXT: -------------------------------------------------------------------------------- 1 | 0x51 0x0E48 2 | 0xCA 0x0E49 3 | 0xE1 0x0E4A 4 | 0xFD 0x0E4B 5 | 0xFE 0x0E4C 6 | -------------------------------------------------------------------------------- /tests/data/ISO-2022-JP-1-snippet: -------------------------------------------------------------------------------- 1 | Just for a test of JISX0212: $BqV$(DiQ(B (the second character is of JISX0212) 2 | -------------------------------------------------------------------------------- /tests/data/ISO-2022-KR-snippet: -------------------------------------------------------------------------------- 1 | Korean ($)CGQ1[) >H3gGO<H3gGO=J4O1n 2 | KSC -- $)Cj*Q( KR[! 3 | -------------------------------------------------------------------------------- /tests/data/Quotes.ISO-8859-1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cppp-project/cppp-reiconv/HEAD/tests/data/Quotes.ISO-8859-1 -------------------------------------------------------------------------------- /tests/data/UTF-16BE-snippet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cppp-project/cppp-reiconv/HEAD/tests/data/UTF-16BE-snippet -------------------------------------------------------------------------------- /tests/data/UTF-16LE-snippet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cppp-project/cppp-reiconv/HEAD/tests/data/UTF-16LE-snippet -------------------------------------------------------------------------------- /tests/data/UTF-32BE-snippet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cppp-project/cppp-reiconv/HEAD/tests/data/UTF-32BE-snippet -------------------------------------------------------------------------------- /tests/data/UTF-32LE-snippet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cppp-project/cppp-reiconv/HEAD/tests/data/UTF-32LE-snippet -------------------------------------------------------------------------------- /tests/data/Translit1.ISO-8859-1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cppp-project/cppp-reiconv/HEAD/tests/data/Translit1.ISO-8859-1 -------------------------------------------------------------------------------- /tests/data/BIG5-HKSCS-1999-snippet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cppp-project/cppp-reiconv/HEAD/tests/data/BIG5-HKSCS-1999-snippet -------------------------------------------------------------------------------- /tests/data/BIG5-HKSCS-2001-snippet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cppp-project/cppp-reiconv/HEAD/tests/data/BIG5-HKSCS-2001-snippet -------------------------------------------------------------------------------- /tests/data/BIG5-HKSCS-2004-snippet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cppp-project/cppp-reiconv/HEAD/tests/data/BIG5-HKSCS-2004-snippet -------------------------------------------------------------------------------- /tests/data/BIG5-HKSCS-2008-snippet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cppp-project/cppp-reiconv/HEAD/tests/data/BIG5-HKSCS-2008-snippet -------------------------------------------------------------------------------- /tests/data/ISO-2022-CN-snippet: -------------------------------------------------------------------------------- 1 | Chinese ($)AVPND,FUM(;0,::So) Dc:C 2 | GB -- $)AT*Fx ?*7" 3 | $)G^O$*HN+j 4 | -------------------------------------------------------------------------------- /tests/data/ISO-2022-CN-EXT-snippet: -------------------------------------------------------------------------------- 1 | Chinese ($)AVPND,FUM(;0,::So) Dc:C 2 | GB -- $)AT*Fx ?*7" 3 | $)G^O$+IO7J$*HN+j 4 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | 1. Implement the //NON_IDENTICAL_DISCARD suffix from POSIX:2024. 2 | 2. Add translit support. 3 | 3. Use xmake buildsystem. 4 | 4. Fix testing errors on some Windows machine. 5 | -------------------------------------------------------------------------------- /tests/data/BIG5-HKSCS-1999.IRREVERSIBLE.TXT: -------------------------------------------------------------------------------- 1 | 0xF9E9 0x255E 2 | 0xF9EA 0x256A 3 | 0xF9EB 0x2561 4 | 0xF9F9 0x2550 5 | 0xF9FA 0x256D 6 | 0xF9FB 0x256E 7 | 0xF9FC 0x2570 8 | 0xF9FD 0x256F 9 | -------------------------------------------------------------------------------- /tests/data/BIG5-HKSCS-2001.IRREVERSIBLE.TXT: -------------------------------------------------------------------------------- 1 | 0xF9E9 0x255E 2 | 0xF9EA 0x256A 3 | 0xF9EB 0x2561 4 | 0xF9F9 0x2550 5 | 0xF9FA 0x256D 6 | 0xF9FB 0x256E 7 | 0xF9FC 0x2570 8 | 0xF9FD 0x256F 9 | -------------------------------------------------------------------------------- /tests/data/BIG5-HKSCS-2004.IRREVERSIBLE.TXT: -------------------------------------------------------------------------------- 1 | 0xF9E9 0x255E 2 | 0xF9EA 0x256A 3 | 0xF9EB 0x2561 4 | 0xF9F9 0x2550 5 | 0xF9FA 0x256D 6 | 0xF9FB 0x256E 7 | 0xF9FC 0x2570 8 | 0xF9FD 0x256F 9 | -------------------------------------------------------------------------------- /tests/data/BIG5-HKSCS-2008.IRREVERSIBLE.TXT: -------------------------------------------------------------------------------- 1 | 0xF9E9 0x255E 2 | 0xF9EA 0x256A 3 | 0xF9EB 0x2561 4 | 0xF9F9 0x2550 5 | 0xF9FA 0x256D 6 | 0xF9FB 0x256E 7 | 0xF9FC 0x2570 8 | 0xF9FD 0x256F 9 | -------------------------------------------------------------------------------- /.markdownlint.json: -------------------------------------------------------------------------------- 1 | { 2 | "MD013": { 3 | "code_blocks": false 4 | }, 5 | "MD024": { 6 | "siblings_only": true 7 | }, 8 | "MD033": { 9 | "allowed_elements": ["img"] 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /benchmark/compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | g++ *.cpp /tmp/inst/lib/libcppp-reiconv.static.a /tmp/inst/lib/libcharset.a /tmp/inst/lib/libiconv.a -I/tmp/inst/include /workspaces/benchmark/build/src/libbenchmark.a -licuuc -O3 -fPIC -o benchmark 3 | -------------------------------------------------------------------------------- /tests/data/CP950.IRREVERSIBLE.TXT: -------------------------------------------------------------------------------- 1 | 0xA244 0x00A5 2 | 0xA2CC 0x5341 3 | 0xA2CE 0x5345 4 | 0xF9E9 0x255E 5 | 0xF9EA 0x256A 6 | 0xF9EB 0x2561 7 | 0xF9F9 0x2550 8 | 0xF9FA 0x256D 9 | 0xF9FB 0x256E 10 | 0xF9FC 0x2570 11 | 0xF9FD 0x256F 12 | -------------------------------------------------------------------------------- /tests/data/ISO-2022-JP-2-snippet.UTF-8: -------------------------------------------------------------------------------- 1 | Japanese (日本語) こんにちは, コンニチハ 2 | JIS -- 元気 開発 3 | Just for a test of JISX0212: 騏驎 (the second character is of JISX0212) 4 | Chinese (中文,普通话,汉语) 你好 5 | GB -- 元气 开发 6 | Korean (한글) 안녕하세요, 안녕하십니까 7 | KSC -- 元氣 開發 8 | -------------------------------------------------------------------------------- /benchmark/reiconv_iconv.cpp: -------------------------------------------------------------------------------- 1 | #include "benchmark.hpp" 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | void* reiconv_iconv_open() 10 | { 11 | return reiconv_open_from_index(ENCODING_UTF8, ENCODING_GB18030, REICONV_NO_FLAGS); 12 | } 13 | -------------------------------------------------------------------------------- /tests/data/UTF-32-snippet.UTF-8: -------------------------------------------------------------------------------- 1 | ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ <- Greek 2 | 𐌀𐌁𐌂𐌃𐌄𐌅𐌆𐌇𐌈𐌉𐌊𐌋𐌌𐌍𐌎𐌏𐌐𐌑𐌒𐌓𐌔𐌕𐌖𐌗𐌘𐌙𐌚𐌛𐌜𐌝 <- Etruscan 3 | ABCDEFGHIJKLMNOPQRSTUVWXYZ <- Latin 4 | АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ <- Cyrillic 5 | 𐌰𐌱𐌲𐌳𐌴𐌵𐌶𐌷𐌸𐌹𐌺𐌻𐌼𐌽𐌾𐌿𐍀𐍁𐍂𐍃𐍄𐍅𐍆𐍇𐍈 <- Gothic 6 | אבגדהוזחטיךכלםמןנסעףפץצקרש <- Hebrew 7 | -------------------------------------------------------------------------------- /tests/data/UTF-32BE-snippet.UTF-8: -------------------------------------------------------------------------------- 1 | ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ <- Greek 2 | 𐌀𐌁𐌂𐌃𐌄𐌅𐌆𐌇𐌈𐌉𐌊𐌋𐌌𐌍𐌎𐌏𐌐𐌑𐌒𐌓𐌔𐌕𐌖𐌗𐌘𐌙𐌚𐌛𐌜𐌝 <- Etruscan 3 | ABCDEFGHIJKLMNOPQRSTUVWXYZ <- Latin 4 | АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ <- Cyrillic 5 | 𐌰𐌱𐌲𐌳𐌴𐌵𐌶𐌷𐌸𐌹𐌺𐌻𐌼𐌽𐌾𐌿𐍀𐍁𐍂𐍃𐍄𐍅𐍆𐍇𐍈 <- Gothic 6 | אבגדהוזחטיךכלםמןנסעףפץצקרש <- Hebrew 7 | -------------------------------------------------------------------------------- /tests/data/UTF-32LE-snippet.UTF-8: -------------------------------------------------------------------------------- 1 | ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ <- Greek 2 | 𐌀𐌁𐌂𐌃𐌄𐌅𐌆𐌇𐌈𐌉𐌊𐌋𐌌𐌍𐌎𐌏𐌐𐌑𐌒𐌓𐌔𐌕𐌖𐌗𐌘𐌙𐌚𐌛𐌜𐌝 <- Etruscan 3 | ABCDEFGHIJKLMNOPQRSTUVWXYZ <- Latin 4 | АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ <- Cyrillic 5 | 𐌰𐌱𐌲𐌳𐌴𐌵𐌶𐌷𐌸𐌹𐌺𐌻𐌼𐌽𐌾𐌿𐍀𐍁𐍂𐍃𐍄𐍅𐍆𐍇𐍈 <- Gothic 6 | אבגדהוזחטיךכלםמןנסעףפץצקרש <- Hebrew 7 | -------------------------------------------------------------------------------- /.rubisco/test.yml: -------------------------------------------------------------------------------- 1 | name: 🧪 Run tests for ${{ project.name }} 2 | 3 | steps: 4 | - name: 🛠️ Configure and build 5 | run: | 6 | cmake -B build -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON 7 | cmake --build build --config Release -v -j ${{ nproc }} 8 | 9 | - name: 🧪 Run tests 10 | run: | 11 | cd build 12 | ctest --output-on-failure -C Release 13 | cd .. 14 | -------------------------------------------------------------------------------- /tests/data/ISO-2022-JP-2-snippet: -------------------------------------------------------------------------------- 1 | Japanese ($BF|K\8l(B) $B$3$s$K$A$O(B, (I:]FAJ(B 2 | JIS -- $B855$(B $B3+H/(B 3 | Just for a test of JISX0212: $BqV$(DiQ(B (the second character is of JISX0212) 4 | Chinese ($BCfJ8(B,$BIaDL$A;0(B,$A::So(B) $(D0_$B9%(B 5 | GB -- $B85]c(B $A?*7"(B 6 | Korean ($(CGQ1[(B) $(C>H3gGO<H3gGO=J4O1n(B 7 | KSC -- $B85]f(B $B3+b$(B 8 | -------------------------------------------------------------------------------- /repo.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cppp-reiconv", 3 | "maintainer": "ChenPi11 ", 4 | "version": "3.0.0", 5 | "description": "C+++ character set conversion library.", 6 | "license": "LGPL-3.0", 7 | "hooks": { 8 | "build": { 9 | "run": ".rubisco/build.yml" 10 | }, 11 | "dist": { 12 | "run": ".rubisco/dist.yml" 13 | }, 14 | "test": { 15 | "run": ".rubisco/test.yml" 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /benchmark/benchmark.md: -------------------------------------------------------------------------------- 1 | # Benchmark 2 | 3 | ## Compile command 4 | 5 | ```bash 6 | cd benchmark 7 | g++ *.cpp -I/tmp/inst/include /tmp/inst/lib/libicudata.a /tmp/inst/lib/libicuuc.a /tmp/inst/lib/libiconv.a /tmp/inst/lib/libcharset.a /tmp/inst/lib/libcppp-reiconv.static.a /usr/local/lib/libbenchmark.a /tmp/inst/lib/*.a -O2 -fPIC -Wall -o benchmark 8 | ``` 9 | 10 | ## GCC version 11 | 12 | ```text 13 | gcc (Debian 14.2.0-8) 14.2.0 14 | ``` 15 | 16 | ## Benchmark result 17 | 18 | ![benchmark result](./benchmark.png) 19 | -------------------------------------------------------------------------------- /benchmark/benchmark.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | extern void* glibc_iconv_open(); 8 | extern int glibc_static_size_convert(void* cd, const char *input_data, size_t input_length, char *output_data, 9 | size_t output_length); 10 | 11 | extern void* libiconv_iconv_open(); 12 | extern int libiconv_static_size_convert(void* cd, const char *input_data, size_t input_length, char *output_data, 13 | size_t output_length); 14 | 15 | extern void* reiconv_iconv_open(); 16 | 17 | -------------------------------------------------------------------------------- /tests/data/CP1255.IRREVERSIBLE.TXT: -------------------------------------------------------------------------------- 1 | 0xD6C7 0xFB1F 2 | 0xE0C7 0xFB2E 3 | 0xE0C8 0xFB2F 4 | 0xE0CC 0xFB30 5 | 0xE1CC 0xFB31 6 | 0xE1CF 0xFB4C 7 | 0xE2CC 0xFB32 8 | 0xE3CC 0xFB33 9 | 0xE4CC 0xFB34 10 | 0xE5C9 0xFB4B 11 | 0xE5CC 0xFB35 12 | 0xE6CC 0xFB36 13 | 0xE8CC 0xFB38 14 | 0xE9C4 0xFB1D 15 | 0xE9CC 0xFB39 16 | 0xEACC 0xFB3A 17 | 0xEBCC 0xFB3B 18 | 0xEBCF 0xFB4D 19 | 0xECCC 0xFB3C 20 | 0xEECC 0xFB3E 21 | 0xF0CC 0xFB40 22 | 0xF1CC 0xFB41 23 | 0xF3CC 0xFB43 24 | 0xF4CC 0xFB44 25 | 0xF4CF 0xFB4E 26 | 0xF6CC 0xFB46 27 | 0xF7CC 0xFB47 28 | 0xF8CC 0xFB48 29 | 0xF9CC 0xFB49 30 | 0xF9CCD1 0xFB2C 31 | 0xF9CCD2 0xFB2D 32 | 0xF9D1 0xFB2A 33 | 0xF9D2 0xFB2B 34 | 0xFACC 0xFB4A 35 | -------------------------------------------------------------------------------- /tests/data/ISO-IR-165.IRREVERSIBLE.TXT: -------------------------------------------------------------------------------- 1 | 0x2821 0x1FB1 2 | 0x2822 0x03AC 3 | 0x2823 0x1FB0 4 | 0x2824 0x1F70 5 | 0x2825 0x0113 6 | 0x2826 0x00E9 7 | 0x2827 0x011B 8 | 0x2828 0x00E8 9 | 0x2829 0x012B 10 | 0x282A 0x00ED 11 | 0x282B 0x01D0 12 | 0x282C 0x00EC 13 | 0x282D 0x014D 14 | 0x282E 0x00F3 15 | 0x282F 0x01D2 16 | 0x2830 0x00F2 17 | 0x2831 0x016B 18 | 0x2832 0x00FA 19 | 0x2833 0x01D4 20 | 0x2834 0x00F9 21 | 0x2835 0x01D6 22 | 0x2836 0x01D8 23 | 0x2837 0x01DA 24 | 0x2838 0x01DC 25 | 0x2839 0x00FC 26 | 0x283A 0x00EA 27 | 0x283B 0x03B1 28 | 0x283C 0x1E3F 29 | 0x283D 0x0144 30 | 0x283E 0x0148 31 | 0x283F 0x01F9 32 | 0x2840 0xFF47 33 | 0x2B3B 0x03B1 34 | 0x2B40 0xFF47 35 | -------------------------------------------------------------------------------- /docs/zh_CN/README.md: -------------------------------------------------------------------------------- 1 | # cppp-reiconv 文档 2 | 3 | ## 介绍 4 | 5 | 这是 `cppp-reiconv 3.0.0` 文档。不稳定版本。 6 | 7 | cppp-reiconv 是一个可移植的 C/C++ 库,用于字符编码之间的转换和字符集检测。 8 | 9 | ## 依赖 10 | 11 | ### 构建 12 | 13 | - 一个支持 C++20 的 C++ 编译器。 14 | - CMake 3.12 或更高版本. 15 | 16 | ### 运行时 17 | 18 | - C 运行时。 19 | - 支持 C++20 的 C++ 运行时。 20 | 21 | ### 生成数据 22 | 23 | 我们需要为编码生成索引,它们存储在 `lib/generated` 中。 24 | 25 | **如果你不感兴趣,你可以跳过这步骤。** 26 | **我们已经在 Git 仓库和源码包提供了相关数据。** 27 | 28 | - GNU Make. 29 | - GNU Gperf. 30 | - 一个 POSIX 操作系统. 31 | 32 | 安装以上工具以后,你可以用下面的命令生成数据: 33 | 34 | ```shell 35 | make -f Makefile.devel -B 36 | ``` 37 | 38 | ## API 参考 39 | 40 | - [C API 和 iconv 兼容 API 参考](c-api.md) 41 | - [C++ API 参考](cpp-api.md) 42 | -------------------------------------------------------------------------------- /lib/codepage_to_ei.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file codepage_to_ei.h 3 | * @brief A mapping of codepages to encoding indexes. 4 | * @copyright Copyright (C) 2024 The C++ Plus Project. 5 | */ 6 | 7 | #ifndef _CODEPAGE_TO_EI_H_ 8 | #define _CODEPAGE_TO_EI_H_ 9 | 10 | #include "encoding_indexes.h" // IWYU pragma: keep 11 | 12 | #define DEFENCODING(xxx_names, xxx, xxx_index, xxx_ifuncs1, xxx_ifuncs2, xxx_ofuncs1, xxx_ofuncs2) 13 | #define DEFCODEPAGE(codepage, xxx) [codepage] = ei_##xxx + 1, 14 | #define DEFINDEX(alias, name) 15 | 16 | static const int codepage_to_eindex[] = { 17 | #include "encodings.h.snippet" 18 | }; 19 | 20 | #undef DEFINDEX 21 | #undef DEFENCODING 22 | #undef DEFCODEPAGE 23 | 24 | #endif /* _CODEPAGE_TO_EI_H_ */ 25 | -------------------------------------------------------------------------------- /.rubisco/build.yml: -------------------------------------------------------------------------------- 1 | name: 🛠️ Build ${{ project.name }} 2 | 3 | steps: 4 | - name: 📦 Make binary distribution directory 5 | mkdir: ${{ project.name }}-${{ project.version }}-bin 6 | 7 | - name: 🛠️ Configure and build 8 | run: | 9 | cmake -B build -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON -DCMAKE_INSTALL_PREFIX="${{ project.name }}-${{ project.version }}-bin" 10 | cmake --build build --config Release -v -j ${{ nproc }} 11 | 12 | - name: 📦 Install to temp install path 13 | run: | 14 | cmake --install build --config Release --prefix "${{ project.name }}-${{ project.version }}-bin" 15 | 16 | - name: 📦 Packing the binary distribution 17 | compress: ${{ project.name }}-${{ project.version }}-bin 18 | to: dist/${{ project.name }}-${{ project.version }} 19 | format: [zip, tar.xz] 20 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: ✔️ Test cppp-reiconv 2 | 3 | on: 4 | release: 5 | types: [published] 6 | push: 7 | branches: [ "main" ] 8 | pull_request: 9 | branches: [ "main" ] 10 | 11 | jobs: 12 | build: 13 | runs-on: ${{ matrix.os }} 14 | 15 | strategy: 16 | fail-fast: false 17 | 18 | matrix: 19 | include: 20 | - { os: macos-latest, host: aarch64-apple-darwin } 21 | - { os: ubuntu-latest, host: x86_64-linux-gnu } 22 | 23 | steps: 24 | - name: 📁 Checkout 25 | uses: actions/checkout@v4 26 | 27 | - name: 🧰 Setup Rubisco 28 | uses: cppp-project/rubisco@main 29 | with: 30 | host: ${{ matrix.host }} 31 | 32 | - name: 📁 Setup subpackages 33 | run: | 34 | git clone https://github.com/cppp-project/build-aux --depth 1 35 | git clone https://github.com/cppp-project/cppp-platform --depth 1 36 | 37 | - name: 🧪 Test 38 | run: rubisco test 39 | -------------------------------------------------------------------------------- /tests/data/GB18030-2005.IRREVERSIBLE.TXT: -------------------------------------------------------------------------------- 1 | 0x82359037 0x9FB4 2 | 0x82359038 0x9FB5 3 | 0x82359039 0x9FB6 4 | 0x82359130 0x9FB7 5 | 0x82359131 0x9FB8 6 | 0x82359132 0x9FB9 7 | 0x82359133 0x9FBA 8 | 0x82359134 0x9FBB 9 | 0x84318236 0xFE10 10 | 0x84318237 0xFE11 11 | 0x84318238 0xFE12 12 | 0x84318239 0xFE13 13 | 0x84318330 0xFE14 14 | 0x84318331 0xFE15 15 | 0x84318332 0xFE16 16 | 0x84318333 0xFE17 17 | 0x84318334 0xFE18 18 | 0x84318335 0xFE19 19 | 0x95329031 0xE816 20 | 0x95329033 0xE817 21 | 0x95329730 0xE818 22 | 0x9536B937 0xE831 23 | 0x9630BA35 0xE83B 24 | 0x9635B630 0xE855 25 | 0xA6D9 0xE78D 26 | 0xA6DA 0xE78E 27 | 0xA6DB 0xE78F 28 | 0xA6DC 0xE790 29 | 0xA6DD 0xE791 30 | 0xA6DE 0xE792 31 | 0xA6DF 0xE793 32 | 0xA6EC 0xE794 33 | 0xA6ED 0xE795 34 | 0xA6F3 0xE796 35 | 0xFE51 0x20087 36 | 0xFE52 0x20089 37 | 0xFE53 0x200CC 38 | 0xFE59 0xE81E 39 | 0xFE61 0xE826 40 | 0xFE66 0xE82B 41 | 0xFE67 0xE82C 42 | 0xFE6C 0x215D7 43 | 0xFE6D 0xE832 44 | 0xFE76 0x2298F 45 | 0xFE7E 0xE843 46 | 0xFE90 0xE854 47 | 0xFE91 0x241FE 48 | 0xFEA0 0xE864 49 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # cppp-reiconv documentation 2 | 3 | [[Simplified Chinese]](zh_CN/README.md) 4 | 5 | ## Introduction 6 | 7 | These docs is for `cppp-reiconv 3.0.0`. 8 | 9 | It's a unstable version. 10 | 11 | cppp-reiconv is a portable C/C++ library for converting between character 12 | encodings and locale charset detection. 13 | 14 | ## Requirements 15 | 16 | ### Build 17 | 18 | - A C++ compiler with C++20 support. 19 | - CMake 3.12 or later. 20 | 21 | ### Runtime 22 | 23 | - C runtime. 24 | - C++ runtime with C++20 support. 25 | 26 | ### Build with data generation 27 | 28 | We need to generate index for encoding, they are stored in `lib/generated`. 29 | 30 | **You can skip this section if you are not interested in it.** 31 | **We already provide generated data in this Git repository and source tarball.** 32 | 33 | - GNU Make. 34 | - GNU Gperf. 35 | - A POSIX system. 36 | 37 | After you have installed these tools, you can use the following command to 38 | generate data: 39 | 40 | ```shell 41 | make -f Makefile.devel -B 42 | ``` 43 | 44 | ## API Reference 45 | 46 | - [C api reference and iconv compatibility](c-api.md) 47 | - [C++ api reference](cpp-api.md) 48 | -------------------------------------------------------------------------------- /lib/loops.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file loops.h 3 | * @brief All the loops. 4 | * @copyright Copyright (C) 2000 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _LOOPS_H_ 26 | #define _LOOPS_H_ 27 | 28 | #include "loop_unicode.h" // IWYU pragma: export 29 | 30 | #endif /* _LOOPS_H_ */ 31 | -------------------------------------------------------------------------------- /tests/data/TCVN.IRREVERSIBLE.TXT: -------------------------------------------------------------------------------- 1 | 0x01B2 0x1E78 2 | 0x20B2 0x02DC 3 | 0x20B3 0x00B4 4 | 0x42B4 0x1E04 5 | 0x43B3 0x0106 6 | 0x44B4 0x1E0C 7 | 0x47B3 0x01F4 8 | 0x48B4 0x1E24 9 | 0x4BB3 0x1E30 10 | 0x4BB4 0x1E32 11 | 0x4CB3 0x0139 12 | 0x4CB4 0x1E36 13 | 0x4DB3 0x1E3E 14 | 0x4DB4 0x1E42 15 | 0x4EB0 0x01F8 16 | 0x4EB2 0x00D1 17 | 0x4EB3 0x0143 18 | 0x4EB4 0x1E46 19 | 0x50B3 0x1E54 20 | 0x52B3 0x0154 21 | 0x52B4 0x1E5A 22 | 0x53B3 0x015A 23 | 0x53B4 0x1E62 24 | 0x54B4 0x1E6C 25 | 0x56B2 0x1E7C 26 | 0x56B4 0x1E7E 27 | 0x57B0 0x1E80 28 | 0x57B3 0x1E82 29 | 0x57B4 0x1E88 30 | 0x5AB3 0x0179 31 | 0x5AB4 0x1E92 32 | 0x62B4 0x1E05 33 | 0x63B3 0x0107 34 | 0x64B4 0x1E0D 35 | 0x67B3 0x01F5 36 | 0x68B4 0x1E25 37 | 0x6BB3 0x1E31 38 | 0x6BB4 0x1E33 39 | 0x6CB3 0x013A 40 | 0x6CB4 0x1E37 41 | 0x6DB3 0x1E3F 42 | 0x6DB4 0x1E43 43 | 0x6EB0 0x01F9 44 | 0x6EB2 0x00F1 45 | 0x6EB3 0x0144 46 | 0x6EB4 0x1E47 47 | 0x70B3 0x1E55 48 | 0x72B3 0x0155 49 | 0x72B4 0x1E5B 50 | 0x73B3 0x015B 51 | 0x73B4 0x1E63 52 | 0x74B4 0x1E6D 53 | 0x76B2 0x1E7D 54 | 0x76B4 0x1E7F 55 | 0x77B0 0x1E81 56 | 0x77B3 0x1E83 57 | 0x77B4 0x1E89 58 | 0x7AB3 0x017A 59 | 0x7AB4 0x1E93 60 | 0x95B2 0x1E4C 61 | 0xB0 0x0340 62 | 0xB3 0x0341 63 | 0xE3B2 0x1E4D 64 | 0xF3B2 0x1E79 65 | -------------------------------------------------------------------------------- /lib/localecharset/lc_types.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file lc_types.h 3 | * @brief Types for locale charset. 4 | * @copyright Copyright (C) 2024 The C++ Plus Project. 5 | */ 6 | /* 7 | * This file is part of the cppp-reiconv library. 8 | * 9 | * The cppp-reiconv library is free software; you can redistribute it 10 | * and/or modify it under the terms of the GNU Lesser General Public 11 | * License as published by the Free Software Foundation; either version 3 12 | * of the License, or (at your option) any later version. 13 | * 14 | * The cppp-reiconv library is distributed in the hope that it will be 15 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 | * Lesser General Public License for more details. 18 | * 19 | * You should have received a copy of the GNU Lesser General Public 20 | * License along with the cppp-reiconv library; see the file LICENSE. 21 | * If not, see . 22 | */ 23 | 24 | #ifndef _LC_TYPES_H_ 25 | #define _LC_TYPES_H_ 26 | 27 | #define REICONV_DEFAULT_CHARSET "UTF-8" 28 | 29 | struct locale_table_entry 30 | { 31 | const char locale[17 + 1]; 32 | const char canonical[11 + 1]; 33 | }; 34 | 35 | #endif // _LC_TYPES_H_ 36 | -------------------------------------------------------------------------------- /lib/locale_charset.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file locale_charset.cpp 3 | * @brief Detect the locale charset. 4 | * @copyright Copyright (C) 2024 The C++ Plus Project. 5 | */ 6 | /* 7 | * This file is part of the cppp-reiconv library. 8 | * 9 | * The cppp-reiconv library is free software; you can redistribute it 10 | * and/or modify it under the terms of the GNU Lesser General Public 11 | * License as published by the Free Software Foundation; either version 3 12 | * of the License, or (at your option) any later version. 13 | * 14 | * The cppp-reiconv library is distributed in the hope that it will be 15 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 | * Lesser General Public License for more details. 18 | * 19 | * You should have received a copy of the GNU Lesser General Public 20 | * License along with the cppp-reiconv library; see the file LICENSE. 21 | * If not, see . 22 | */ 23 | 24 | #include 25 | 26 | #include 27 | 28 | #include 29 | 30 | namespace reiconv 31 | { 32 | _CPPP_API std::string_view locale_charset() 33 | { 34 | // The return value of C function is statically allocated. 35 | return ::locale_charset(); 36 | } 37 | } // namespace reiconv 38 | -------------------------------------------------------------------------------- /include/cppp/encodings/reiconv.h.in: -------------------------------------------------------------------------------- 1 | /** 2 | * @file cppp/encodings/reiconv.h 3 | * @author ChenPi11 4 | * @brief cppp-reiconv encodings list. 5 | * @version 3.0.0 6 | * @copyright Copyright (C) 2024 The C++ Plus Project. 7 | */ 8 | /* 9 | * This file is part of the cppp-reiconv Library. 10 | * 11 | * The cppp-reiconv Library is free software; you can redistribute it 12 | * and/or modify it under the terms of the GNU Lesser General Public 13 | * License as published by the Free Software Foundation; either version 3 14 | * of the License, or (at your option) any later version. 15 | * 16 | * The cppp-reiconv Library is distributed in the hope that it will be 17 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 | * Lesser General Public License for more details. 20 | * 21 | * You should have received a copy of the GNU Lesser General Public 22 | * License along with the cppp-reiconv Library; see the file LICENSE. 23 | * If not, see . 24 | */ 25 | 26 | #pragma once 27 | #ifndef _CPPP_ENCODINGS_REICONV_H_ 28 | #define _CPPP_ENCODINGS_REICONV_H_ 29 | 30 | #if _MSC_VER >= 1600 31 | #pragma execution_character_set("utf-8") 32 | #endif /* _MSC_VER >= 1600 */ 33 | 34 | @INDEXES_CODE@ 35 | #endif /* _CPPP_ENCODINGS_REICONV_H_ */ 36 | -------------------------------------------------------------------------------- /include/cppp/encodings/reiconv.hpp.in: -------------------------------------------------------------------------------- 1 | /** 2 | * @file cppp/encodings/reiconv.hpp 3 | * @author ChenPi11 4 | * @brief cppp-reiconv encodings list. 5 | * @version 3.0.0 6 | * @copyright Copyright (C) 2024 The C++ Plus Project. 7 | */ 8 | /* 9 | * This file is part of the cppp-reiconv Library. 10 | * 11 | * The cppp-reiconv Library is free software; you can redistribute it 12 | * and/or modify it under the terms of the GNU Lesser General Public 13 | * License as published by the Free Software Foundation; either version 3 14 | * of the License, or (at your option) any later version. 15 | * 16 | * The cppp-reiconv Library is distributed in the hope that it will be 17 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 | * Lesser General Public License for more details. 20 | * 21 | * You should have received a copy of the GNU Lesser General Public 22 | * License along with the cppp-reiconv Library; see the file LICENSE. 23 | * If not, see . 24 | */ 25 | 26 | #pragma once 27 | #ifndef _CPPP_ENCODINGS_REICONV_HPP_ 28 | #define _CPPP_ENCODINGS_REICONV_HPP_ 29 | 30 | #if _MSC_VER >= 1600 31 | #pragma execution_character_set("utf-8") 32 | #endif // _MSC_VER >= 1600 33 | 34 | @INDEXES_CODE@ 35 | #endif // _CPPP_ENCODINGS_REICONV_HPP_ 36 | -------------------------------------------------------------------------------- /tests/sort.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file sort.cpp 3 | * @brief Sort file lines. 4 | * @author ChenPi11 5 | * @copyright Copyright (C) 2024 The C++ Plus Project 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv Library. 9 | * 10 | * The cppp-reiconv Library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv Library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv Library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #include 26 | 27 | #include "sort.hpp" 28 | #include "output.hpp" 29 | 30 | int main(int argc, char *argv[]) 31 | { 32 | if (argc != 3) 33 | { 34 | print_stderr("Usage: ./sort INPUT-FILE OUTPUT-FILE\n"); 35 | return EXIT_FAILURE; 36 | } 37 | 38 | sort_file(argv[1], argv[2]); 39 | return EXIT_SUCCESS; 40 | } 41 | -------------------------------------------------------------------------------- /lib/encoding.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file encoding.h 3 | * @brief Encoding structure definition. 4 | * @copyright Copyright (C) 2024 The C++ Plus Project. 5 | */ 6 | /* 7 | * This file is part of the cppp-reiconv library. 8 | * 9 | * The cppp-reiconv library is free software; you can redistribute it 10 | * and/or modify it under the terms of the GNU Lesser General Public 11 | * License as published by the Free Software Foundation; either version 3 12 | * of the License, or (at your option) any later version. 13 | * 14 | * The cppp-reiconv library is distributed in the hope that it will be 15 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 | * Lesser General Public License for more details. 18 | * 19 | * You should have received a copy of the GNU Lesser General Public 20 | * License along with the cppp-reiconv library; see the file LICENSE. 21 | * If not, see . 22 | */ 23 | 24 | #ifndef _ENCODING_H_ 25 | #define _ENCODING_H_ 26 | 27 | #include "reiconv_defines.h" 28 | 29 | /** 30 | * @brief Table of all supported encodings. 31 | */ 32 | struct encoding 33 | { 34 | struct mbtowc_funcs ifuncs; // Conversion multibyte -> unicode 35 | struct wctomb_funcs ofuncs; // Conversion unicode -> multibyte 36 | }; 37 | 38 | #endif /* _ENCODING_H_ */ 39 | -------------------------------------------------------------------------------- /benchmark/glib_iconv.cpp: -------------------------------------------------------------------------------- 1 | #include "benchmark.hpp" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | extern "C" 8 | { 9 | typedef void* iconv_t; 10 | void* iconv_open(const char* tocode, const char* fromcode); 11 | int iconv_close(void* cd); 12 | size_t iconv(void* cd, char** inbuf, size_t* inbytesleft, char** outbuf, size_t* outbytesleft); 13 | } 14 | 15 | void* glibc_iconv_open() 16 | { 17 | return iconv_open("GB18030", "UTF-8"); 18 | } 19 | 20 | int glibc_static_size_convert(void* cd, const char *input_data, size_t input_length, char *output_data, 21 | size_t output_length) 22 | { 23 | char *inptr = (char *)input_data; 24 | size_t insize = input_length; 25 | char *outptr = output_data; 26 | size_t outsize = output_length; 27 | while (insize > 0) 28 | { 29 | size_t res = iconv(cd, &inptr, &insize, &outptr, &outsize); 30 | if (res == (size_t)(-1)) 31 | { 32 | if (errno == EINVAL) 33 | { 34 | break; 35 | } 36 | else 37 | { 38 | return -1; 39 | } 40 | } 41 | } 42 | if (iconv(cd, NULL, NULL, &outptr, &outsize) == (size_t)(-1)) 43 | { 44 | return -1; 45 | } 46 | memset(outptr, 0, outsize); // Fill the rest of the buffer with '\0'. 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /tests/data/UCS-2BE-snippet.UTF-8: -------------------------------------------------------------------------------- 1 | This is a list of ways to say hello in various languages. 2 | Its purpose is to illustrate a number of scripts. 3 | 4 | --------------------------------------------------------- 5 | Amharic (አማርኛ) ሠላም 6 | Arabic ﺍﻟﺴﻼﻡ ﻋﻠﻴﻜﻢ 7 | Czech (česky) Dobrý den 8 | Danish (Dansk) Hej, Goddag 9 | English Hello 10 | Esperanto Saluton 11 | Estonian Tere, Tervist 12 | FORTRAN PROGRAM 13 | Finnish (Suomi) Hei 14 | French (Français) Bonjour, Salut 15 | German (Deutsch Nord) Guten Tag 16 | German (Deutsch Süd) Grüß Gott 17 | Greek (Ελληνικά) Γειά σας 18 | Hebrew שלום 19 | Italiano Ciao, Buon giorno 20 | Lao(ພາສາລາວ) ສະບາຍດີ, ຂໍໃຫ້ໂຊກດີ 21 | Maltese Ciao 22 | Nederlands, Vlaams Hallo, Dag 23 | Norwegian (Norsk) Hei, God dag 24 | Polish Dzień dobry, Hej 25 | Russian (Русский) Здравствуйте! 26 | Slovak Dobrý deň 27 | Spanish (Español) ¡Hola! 28 | Swedish (Svenska) Hej, Goddag 29 | Thai (ภาษาไทย) สวัสดีครับ, สวัสดีค่ะ 30 | 31 | Tigrigna (ትግርኛ) ሰላማት 32 | Turkish (Türkçe) Merhaba 33 | Vietnamese (Tiếng Việt) Chào bạn 34 | 35 | Japanese (日本語) こんにちは, コンニチハ 36 | Chinese (中文,普通话,汉语) 你好 37 | Cantonese (粵語,廣東話) 早晨, 你好 38 | Korean (한글) 안녕하세요, 안녕하십니까 39 | 40 | Difference among chinese characters in GB, JIS, KSC, BIG5: 41 | GB -- 元气 开发 42 | JIS -- 元気 開発 43 | KSC -- 元氣 開發 44 | BIG5 -- 元氣 開發 45 | 46 | Just for a test of JISX0212: 騏驎 (the second character is of JISX0212) 47 | -------------------------------------------------------------------------------- /tests/data/UCS-2LE-snippet.UTF-8: -------------------------------------------------------------------------------- 1 | This is a list of ways to say hello in various languages. 2 | Its purpose is to illustrate a number of scripts. 3 | 4 | --------------------------------------------------------- 5 | Amharic (አማርኛ) ሠላም 6 | Arabic ﺍﻟﺴﻼﻡ ﻋﻠﻴﻜﻢ 7 | Czech (česky) Dobrý den 8 | Danish (Dansk) Hej, Goddag 9 | English Hello 10 | Esperanto Saluton 11 | Estonian Tere, Tervist 12 | FORTRAN PROGRAM 13 | Finnish (Suomi) Hei 14 | French (Français) Bonjour, Salut 15 | German (Deutsch Nord) Guten Tag 16 | German (Deutsch Süd) Grüß Gott 17 | Greek (Ελληνικά) Γειά σας 18 | Hebrew שלום 19 | Italiano Ciao, Buon giorno 20 | Lao(ພາສາລາວ) ສະບາຍດີ, ຂໍໃຫ້ໂຊກດີ 21 | Maltese Ciao 22 | Nederlands, Vlaams Hallo, Dag 23 | Norwegian (Norsk) Hei, God dag 24 | Polish Dzień dobry, Hej 25 | Russian (Русский) Здравствуйте! 26 | Slovak Dobrý deň 27 | Spanish (Español) ¡Hola! 28 | Swedish (Svenska) Hej, Goddag 29 | Thai (ภาษาไทย) สวัสดีครับ, สวัสดีค่ะ 30 | 31 | Tigrigna (ትግርኛ) ሰላማት 32 | Turkish (Türkçe) Merhaba 33 | Vietnamese (Tiếng Việt) Chào bạn 34 | 35 | Japanese (日本語) こんにちは, コンニチハ 36 | Chinese (中文,普通话,汉语) 你好 37 | Cantonese (粵語,廣東話) 早晨, 你好 38 | Korean (한글) 안녕하세요, 안녕하십니까 39 | 40 | Difference among chinese characters in GB, JIS, KSC, BIG5: 41 | GB -- 元气 开发 42 | JIS -- 元気 開発 43 | KSC -- 元氣 開發 44 | BIG5 -- 元氣 開發 45 | 46 | Just for a test of JISX0212: 騏驎 (the second character is of JISX0212) 47 | -------------------------------------------------------------------------------- /lib/converters/cp943.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file cp943.h 3 | * @brief IBM CP943 4 | * @copyright Copyright (C) 1999-2001 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _CP943_H_ 26 | #define _CP943_H_ 27 | 28 | #include "reiconv_defines.h" // IWYU pragma: keep 29 | 30 | /* This is essentially CP932, with many mappings missing in the AIX conversion 31 | table. We just pretend it were the same as CP932. */ 32 | 33 | #define cp943_mbtowc cp932_mbtowc 34 | #define cp943_wctomb cp932_wctomb 35 | 36 | #endif /* _CP943_H_ */ 37 | -------------------------------------------------------------------------------- /lib/converters/flushwc.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file flushwc.h 3 | * @brief tput the buffered character. 4 | * @copyright Copyright (C) 2001 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _FLUSHWC_H_ 26 | #define _FLUSHWC_H_ 27 | 28 | #include "reiconv_defines.h" 29 | 30 | static int 31 | normal_flushwc (conv_t conv, ucs4_t *pwc) 32 | { 33 | ucs4_t last_wc = conv->istate; 34 | if (last_wc) { 35 | /* Output the buffered character. */ 36 | conv->istate = 0; 37 | *pwc = (ucs4_t) last_wc; 38 | return 1; 39 | } else 40 | return 0; 41 | } 42 | 43 | #endif /* _FLUSHWC_H_ */ 44 | -------------------------------------------------------------------------------- /tests/reiconv-test.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file reiconv-test.hpp 3 | * @brief Conversion test. 4 | * @author ChenPi11 5 | * @copyright Copyright (C) 2024 The C++ Plus Project 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv Library. 9 | * 10 | * The cppp-reiconv Library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv Library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv Library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #pragma once 26 | 27 | #include 28 | 29 | #include 30 | 31 | #include "buffer.hpp" 32 | 33 | inline Buffer reiconv_test(reiconv::Encoding from, reiconv::Encoding to, const std::filesystem::path &input_file_path) 34 | { 35 | using namespace reiconv; 36 | Buffer input = Buffer::read_from_file(input_file_path); 37 | std::string res = convert(from, to, {input.data(), input.size}, ConvertFlag::DISCARD_ILSEQ); 38 | return Buffer(res, "converted"); 39 | } 40 | -------------------------------------------------------------------------------- /benchmark/libiconv_iconv.cpp: -------------------------------------------------------------------------------- 1 | #include "benchmark.hpp" 2 | #include 3 | #include 4 | #include 5 | 6 | #define iconv_t libiconv_t 7 | #define iconv_open libiconv_open 8 | #define iconv libiconv 9 | #define iconv_close libiconv_close 10 | 11 | extern "C" 12 | { 13 | typedef void *iconv_t; 14 | extern int iconv_close(iconv_t cd); 15 | extern iconv_t iconv_open(const char *tocode, const char *fromcode); 16 | extern size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); 17 | } 18 | 19 | void* libiconv_iconv_open() 20 | { 21 | return iconv_open("GB18030", "UTF-8"); 22 | } 23 | 24 | int libiconv_static_size_convert(void* cd, const char *input_data, size_t input_length, char *output_data, 25 | size_t output_length) 26 | { 27 | char *inptr = (char *)input_data; 28 | size_t insize = input_length; 29 | char *outptr = output_data; 30 | size_t outsize = output_length; 31 | while (insize > 0) 32 | { 33 | size_t res = iconv(cd, &inptr, &insize, &outptr, &outsize); 34 | if (res == (size_t)(-1)) 35 | { 36 | if (errno == EINVAL) 37 | { 38 | break; 39 | } 40 | else 41 | { 42 | return -1; 43 | } 44 | } 45 | } 46 | if (iconv(cd, NULL, NULL, &outptr, &outsize) == (size_t)(-1)) 47 | { 48 | return -1; 49 | } 50 | memset(outptr, 0, outsize); // Fill the rest of the buffer with '\0'. 51 | return 0; 52 | } 53 | -------------------------------------------------------------------------------- /lib/encoding_indexes.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file encoding_indexes.h 3 | * @brief All encoding's internal indexes. 4 | * @copyright Copyright (C) 1999-2002, 2004-2011, 2016, 2022-2023 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _ENCODING_INDEXES_H_ 26 | #define _ENCODING_INDEXES_H_ 27 | 28 | #define DEFENCODING(xxx_names, xxx, xxx_index, xxx_ifuncs1, xxx_ifuncs2, xxx_ofuncs1, xxx_ofuncs2) ei_##xxx, 29 | #define DEFCODEPAGE(codepage, xxx) 30 | #define DEFINDEX(alias, index) 31 | 32 | enum 33 | { 34 | #include "encodings.h.snippet" 35 | ei_end 36 | }; 37 | 38 | #undef DEFINDEX 39 | #undef DEFCODEPAGE 40 | #undef DEFENCODING 41 | 42 | #endif /* _ENCODING_INDEXES_H_ */ 43 | -------------------------------------------------------------------------------- /lib/converters/iso8859_1.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file iso8859_1.h 3 | * @brief ISO-8859-1 4 | * @copyright Copyright (C) 1999-2001, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _ISO8859_1_H_ 26 | #define _ISO8859_1_H_ 27 | 28 | #include "reiconv_defines.h" 29 | 30 | static int iso8859_1_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 31 | { 32 | unsigned char c = *s; 33 | *pwc = (ucs4_t)c; 34 | return 1; 35 | } 36 | 37 | static int iso8859_1_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 38 | { 39 | if (wc < 0x0100) 40 | { 41 | *r = wc; 42 | return 1; 43 | } 44 | return RET_ILUNI; 45 | } 46 | 47 | #endif /* _ISO8859_1_H_ */ 48 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # CMake build files 2 | build 3 | install 4 | CMakeFiles 5 | cmake_install.cmake 6 | CMakeCache.txt 7 | CTestTestfile.cmake 8 | DartConfiguration.tcl 9 | Makefile 10 | cmake_uninstall.cmake 11 | reiconv.hpp 12 | CTestCostData.txt 13 | Testing 14 | install_manifest.txt 15 | CPackConfig.cmake 16 | CPackSourceConfig.cmake 17 | _CPack_Packages 18 | pdb 19 | tests/check-encoding 20 | tests/check-stateful 21 | tests/check-stateless 22 | tests/data-generator 23 | tests/sort 24 | tests/test-bom-state 25 | tests/test-discard 26 | windows/cppp-reiconv.rc 27 | *.inst 28 | *.log 29 | *.dSYM 30 | *.so 31 | *.so.*.*.* 32 | *.a 33 | *.dylib 34 | *.stackdump 35 | *.lib 36 | *.pdb 37 | *.dll 38 | *.exe 39 | *.sln 40 | *.out 41 | *.vcxproj 42 | *.vcxproj.filters 43 | *.vcxproj.user 44 | *.vcxproj.user.* 45 | build.ninja 46 | .ninja_* 47 | Release/ 48 | Debug/ 49 | RelWithDebInfo/ 50 | MinSizeRel/ 51 | x64/ 52 | x86/ 53 | ARM/ 54 | ARM64/ 55 | *.suo 56 | *.user 57 | *.user.* 58 | *.dir 59 | __pycache__ 60 | 61 | # Dist files 62 | dist/ 63 | cppp-reiconv-* 64 | *.tar.xz 65 | *.deb 66 | 67 | # Makefile.devel temp files 68 | lib/generated/genaliases 69 | lib/generated/genindexes 70 | lib/generated/check-encodings 71 | *.gperf 72 | 73 | # Patterns for all subdirectories: all kinds of automatic backup files. 74 | *.orig 75 | *.rej 76 | *~ 77 | *.kate-swp 78 | *.swp 79 | .#* 80 | \#*# 81 | .vscode/ 82 | .cache/ 83 | 84 | # Submodules. 85 | build-aux/ 86 | cppp-platform/ 87 | 88 | # Temp tests files. 89 | tests/data/UTF-8.TXT 90 | tests/data/GB18030-2005.TXT 91 | tests/data/GB18030-2022.TXT 92 | tests/tmp-*.TXT 93 | *.tmp 94 | *.dump 95 | benchmark/benchmark 96 | -------------------------------------------------------------------------------- /cpack.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2024 The C++ Plus Project. 2 | # This file is part of the cppp-reiconv library. 3 | # 4 | # The cppp-reiconv library is free software; you can redistribute it 5 | # and/or modify it under the terms of the GNU Lesser General Public 6 | # License as published by the Free Software Foundation; either version 3 7 | # of the License, or (at your option) any later version. 8 | # 9 | # The cppp-reiconv library is distributed in the hope that it will be 10 | # useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 | # Lesser General Public License for more details. 13 | # 14 | # You should have received a copy of the GNU Lesser General Public 15 | # License along with the cppp-reiconv library; see the file LICENSE. 16 | # If not, see . 17 | 18 | include(InstallRequiredSystemLibraries) 19 | 20 | # Set the package generator. 21 | set(CPACK_GENERATOR "DEB") 22 | 23 | # Set package information. 24 | set(CPACK_PACKAGE_NAME ${CMAKE_PROJECT_NAME}) 25 | set(CPACK_PACKAGE_VERSION ${CMAKE_PROJECT_VERSION}) 26 | set(CPACK_PACKAGE_DESCRIPTION ${CMAKE_PROJECT_DESCRIPTION}) 27 | set(CPACK_PACKAGE_CONTACT "ChenPi11 ") 28 | 29 | # Configure package-specific settings. 30 | set(CPACK_DEBIAN_PACKAGE_MAINTAINER ${CPACK_PACKAGE_CONTACT}) 31 | set(CPACK_DEBIAN_PACKAGE_SECTION "devel") 32 | set(CPACK_DEBIAN_PACKAGE_HOMEPAGE ${PROJECT_HOMEPAGE_URL}) 33 | set(CPACK_DEBIAN_PACKAGE_DEPENDS "libc6 (>= 2.31), libgcc-s1 (>= 10.2.0), libstdc++6 (>= 10.2.0)") 34 | 35 | # Generate the package. 36 | include(CPack) 37 | -------------------------------------------------------------------------------- /lib/converters/ascii.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file ascii.h 3 | * @brief ASCII 4 | * @copyright Copyright (C) 1999-2001, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _ASCII_H_ 26 | #define _ASCII_H_ 27 | 28 | #include "reiconv_defines.h" 29 | 30 | static int ascii_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 31 | { 32 | unsigned char c = *s; 33 | if (c < 0x80) 34 | { 35 | *pwc = (ucs4_t)c; 36 | return 1; 37 | } 38 | return RET_ILSEQ; 39 | } 40 | 41 | static int ascii_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 42 | { 43 | if (wc < 0x0080) 44 | { 45 | *r = wc; 46 | return 1; 47 | } 48 | return RET_ILUNI; 49 | } 50 | 51 | #endif /* _ASCII_H_ */ 52 | -------------------------------------------------------------------------------- /lib/all_encodings.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file all_encodings.h 3 | * @brief Definition of `all_encodings[]`. 4 | * @copyright Copyright (C) 2024 The C++ Plus Project. 5 | */ 6 | /* 7 | * This file is part of the cppp-reiconv library. 8 | * 9 | * The cppp-reiconv library is free software; you can redistribute it 10 | * and/or modify it under the terms of the GNU Lesser General Public 11 | * License as published by the Free Software Foundation; either version 3 12 | * of the License, or (at your option) any later version. 13 | * 14 | * The cppp-reiconv library is distributed in the hope that it will be 15 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 | * Lesser General Public License for more details. 18 | * 19 | * You should have received a copy of the GNU Lesser General Public 20 | * License along with the cppp-reiconv library; see the file LICENSE. 21 | * If not, see . 22 | */ 23 | 24 | #ifndef _ALL_ENCODINGS_H_ 25 | #define _ALL_ENCODINGS_H_ 26 | 27 | #include "converters.h" // IWYU pragma: keep. Include all the converters for `encodings.h.snippet`. 28 | #include "encoding.h" 29 | 30 | #define DEFENCODING(xxx_names, xxx, xxx_index, xxx_ifuncs1, xxx_ifuncs2, xxx_ofuncs1, xxx_ofuncs2) \ 31 | {xxx_ifuncs1, xxx_ifuncs2, xxx_ofuncs1, xxx_ofuncs2}, 32 | #define DEFCODEPAGE(codepage, xxx) 33 | #define DEFINDEX(alias, name) 34 | 35 | static struct encoding const all_encodings[] = { 36 | #include "encodings.h.snippet" 37 | }; 38 | 39 | #undef DEFINDEX 40 | #undef DEFENCODING 41 | #undef DEFCODEPAGE 42 | 43 | #endif /* _ALL_ENCODINGS_H_ */ 44 | -------------------------------------------------------------------------------- /lib/converters/ucs4internal.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file ucs4internal.h 3 | * @brief UCS-4-INTERNAL = UCS-4 with machine dependent endianness and alignment 4 | * @copyright Copyright (C) 1999-2000, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _UCS4INTERNAL_H_ 26 | #define _UCS4INTERNAL_H_ 27 | 28 | #include "reiconv_defines.h" 29 | 30 | static int ucs4internal_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 31 | { 32 | if (n >= 4) 33 | { 34 | *pwc = *(const unsigned int *)s; 35 | return 4; 36 | } 37 | return RET_TOOFEW(0); 38 | } 39 | 40 | static int ucs4internal_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 41 | { 42 | if (n >= 4) 43 | { 44 | *(unsigned int *)r = wc; 45 | return 4; 46 | } 47 | else 48 | return RET_TOOSMALL; 49 | } 50 | 51 | #endif /* _UCS4INTERNAL_H_ */ 52 | -------------------------------------------------------------------------------- /lib/converters/ucs4be.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file ucs4be.h 3 | * @brief UCS-4BE = UCS-4 big endian 4 | * @copyright Copyright (C) 1999-2000, 2016, 2024 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _UCS4BE_H_ 26 | #define _UCS4BE_H_ 27 | 28 | #include "reiconv_defines.h" 29 | 30 | static int ucs4be_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 31 | { 32 | if (n >= 4) 33 | { 34 | *pwc = ((ucs4_t)s[0] << 24) + ((ucs4_t)s[1] << 16) + ((ucs4_t)s[2] << 8) + (ucs4_t)s[3]; 35 | return 4; 36 | } 37 | return RET_TOOFEW(0); 38 | } 39 | 40 | static int ucs4be_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 41 | { 42 | if (n >= 4) 43 | { 44 | r[0] = (unsigned char)(wc >> 24); 45 | r[1] = (unsigned char)(wc >> 16); 46 | r[2] = (unsigned char)(wc >> 8); 47 | r[3] = (unsigned char)wc; 48 | return 4; 49 | } 50 | else 51 | return RET_TOOSMALL; 52 | } 53 | 54 | #endif /* _UCS4BE_H_ */ 55 | -------------------------------------------------------------------------------- /lib/converters/ucs4le.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file ucs4le.h 3 | * @brief UCS-4LE = UCS-4 little endian 4 | * @copyright Copyright (C) 1999-2000, 2016, 2024 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _UCS4LE_H_ 26 | #define _UCS4LE_H_ 27 | 28 | #include "reiconv_defines.h" 29 | 30 | static int ucs4le_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 31 | { 32 | if (n >= 4) 33 | { 34 | *pwc = (ucs4_t)s[0] + ((ucs4_t)s[1] << 8) + ((ucs4_t)s[2] << 16) + ((ucs4_t)s[3] << 24); 35 | return 4; 36 | } 37 | return RET_TOOFEW(0); 38 | } 39 | 40 | static int ucs4le_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 41 | { 42 | if (n >= 4) 43 | { 44 | r[0] = (unsigned char)wc; 45 | r[1] = (unsigned char)(wc >> 8); 46 | r[2] = (unsigned char)(wc >> 16); 47 | r[3] = (unsigned char)(wc >> 24); 48 | return 4; 49 | } 50 | else 51 | return RET_TOOSMALL; 52 | } 53 | 54 | #endif /* _UCS4LE_H_ */ 55 | -------------------------------------------------------------------------------- /lib/converters/tis620.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file tis620.h 3 | * @brief TIS620.2533-1 4 | * @copyright Copyright (C) 1999-2001, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _TIS620_H_ 26 | #define _TIS620_H_ 27 | 28 | #include "reiconv_defines.h" 29 | 30 | static int tis620_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 31 | { 32 | unsigned char c = *s; 33 | if (c < 0x80) 34 | { 35 | *pwc = (ucs4_t)c; 36 | return 1; 37 | } 38 | else if (c >= 0xa1 && c <= 0xfb && !(c >= 0xdb && c <= 0xde)) 39 | { 40 | *pwc = (ucs4_t)(c + 0x0d60); 41 | return 1; 42 | } 43 | return RET_ILSEQ; 44 | } 45 | 46 | static int tis620_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 47 | { 48 | if (wc < 0x0080) 49 | { 50 | *r = wc; 51 | return 1; 52 | } 53 | else if (wc >= 0x0e01 && wc <= 0x0e5b && !(wc >= 0x0e3b && wc <= 0x0e3e)) 54 | { 55 | *r = wc - 0x0d60; 56 | return 1; 57 | } 58 | return RET_ILUNI; 59 | } 60 | 61 | #endif /* _TIS620_H_ */ 62 | -------------------------------------------------------------------------------- /lib/converters/iso8859_11.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file iso8859_11.h 3 | * @brief ISO-8859-11 4 | * @copyright Copyright (C) 1999-2004, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _ISO8859_11_H_ 26 | #define _ISO8859_11_H_ 27 | 28 | #include "reiconv_defines.h" 29 | 30 | static int iso8859_11_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 31 | { 32 | unsigned char c = *s; 33 | if (c < 0xa1) 34 | { 35 | *pwc = (ucs4_t)c; 36 | return 1; 37 | } 38 | else if (c <= 0xfb && !(c >= 0xdb && c <= 0xde)) 39 | { 40 | *pwc = (ucs4_t)(c + 0x0d60); 41 | return 1; 42 | } 43 | return RET_ILSEQ; 44 | } 45 | 46 | static int iso8859_11_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 47 | { 48 | if (wc < 0x00a1) 49 | { 50 | *r = wc; 51 | return 1; 52 | } 53 | else if (wc >= 0x0e01 && wc <= 0x0e5b && !(wc >= 0x0e3b && wc <= 0x0e3e)) 54 | { 55 | *r = wc - 0x0d60; 56 | return 1; 57 | } 58 | return RET_ILUNI; 59 | } 60 | 61 | #endif /* _ISO8859_11_H_ */ 62 | -------------------------------------------------------------------------------- /Makefile.devel: -------------------------------------------------------------------------------- 1 | # -*- makefile -*- 2 | # This is the developer's makefile. 3 | # It obeys the environment variables CXX, CFLAGS and CXXFLAGS if you have set them. 4 | 5 | 6 | GPERF = gperf 7 | CMAKE = cmake 8 | CTEST = ctest 9 | MKDIR = mkdir 10 | RM = rm 11 | 12 | CXX ?= g++ 13 | 14 | CFLAGS := $(CFLAGS) 15 | CXXFLAGS := $(CXXFLAGS) -Ilib 16 | 17 | all : lib/generated \ 18 | lib/generated/check-encodings \ 19 | lib/generated/aliases.h \ 20 | lib/generated/indexes.hpp.shippet \ 21 | lib/generated/indexes.h.shippet 22 | 23 | lib/generated : 24 | $(MKDIR) -p lib/generated 25 | 26 | lib/generated/check-encodings : tools/check-encodings.cpp 27 | $(CXX) $(CFLAGS) $(CXXFLAGS) $(LDFLAGS) $< -o $@ 28 | ./lib/generated/check-encodings 29 | 30 | lib/generated/genaliases : tools/genaliases.cpp 31 | $(CXX) $(CFLAGS) $(CXXFLAGS) $(LDFLAGS) $< -o $@ 32 | 33 | lib/generated/aliases.gperf : lib/generated/genaliases 34 | ./lib/generated/genaliases $@ 35 | 36 | lib/generated/aliases.h : lib/generated/aliases.gperf 37 | $(GPERF) -L ANSI-C -m 10 $< > $@ 38 | 39 | lib/generated/genindexes : tools/genindexes.cpp 40 | $(CXX) $(CFLAGS) $(CXXFLAGS) $(LDFLAGS) $< -o $@ 41 | 42 | lib/generated/indexes.hpp.shippet : lib/generated/genindexes 43 | ./lib/generated/genindexes C++ > $@ 44 | 45 | lib/generated/indexes.h.shippet : lib/generated/genindexes 46 | ./lib/generated/genindexes C > $@ 47 | 48 | test : all 49 | $(RM) -r -f build tests/data/GB18030-2005.TXT tests/data/GB18030-2022.TXT tests/data/UTF-8.TXT 50 | $(CMAKE) -B build -S . -DCMAKE_BUILD_TYPE=Debug 51 | $(CMAKE) --build build --config Debug --target all -j $(shell nproc) 52 | $(CTEST) --test-dir build --output-on-failure 53 | 54 | # Alias to test 55 | check : test 56 | 57 | clean : 58 | $(RM) -r -f build 59 | $(RM) -r -f cppp-reiconv-* 60 | $(RM) -r -f .cache 61 | $(RM) -f lib/generated/aliases.gperf 62 | $(RM) -f lib/generated/genaliases 63 | $(RM) -f tests/data/GB18030-2005.TXT 64 | $(RM) -f tests/data/GB18030-2022.TXT 65 | $(RM) -f tests/data/UTF-8.TXT 66 | -------------------------------------------------------------------------------- /windows/cppp-reiconv.rc.in: -------------------------------------------------------------------------------- 1 | /* Resources for cppp-reiconv.dll */ 2 | 3 | #include 4 | 5 | #define PROJECT_VERSION_MAJOR @PROJECT_VERSION_MAJOR@ 6 | #define PROJECT_VERSION_MINOR @PROJECT_VERSION_MINOR@ 7 | #define PROJECT_VERSION_PATCH @PROJECT_VERSION_PATCH@ 8 | #define PROJECT_VERSION "@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@.@PROJECT_VERSION_PATCH@" 9 | 10 | VS_VERSION_INFO VERSIONINFO 11 | FILEVERSION PROJECT_VERSION_MAJOR,PROJECT_VERSION_MINOR,PROJECT_VERSION_PATCH,0 12 | PRODUCTVERSION PROJECT_VERSION_MAJOR,PROJECT_VERSION_MINOR,PROJECT_VERSION_PATCH,0 13 | FILEFLAGSMASK 0x3fL /* VS_FFI_FILEFLAGSMASK */ 14 | #ifdef _DEBUG 15 | FILEFLAGS 0x1L /* VS_FF_DEBUG */ 16 | #else 17 | FILEFLAGS 0x0L 18 | #endif 19 | FILEOS 0x10004L /* VOS_DOS_WINDOWS32 */ 20 | FILETYPE 0x2L /* VFT_DLL */ 21 | FILESUBTYPE 0x0L /* VFT2_UNKNOWN */ 22 | BEGIN 23 | BLOCK "StringFileInfo" 24 | BEGIN 25 | BLOCK "04090000" /* Lang = US English, Charset = ASCII */ 26 | BEGIN 27 | VALUE "Comments", "This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License. You should have received a copy of the GNU Lesser General Public License along with this library; if not, see .\0" 28 | VALUE "CompanyName", "The C++ Plus Project\0" 29 | VALUE "FileDescription", "LGPLed cppp-reiconv for Windows\0" 30 | VALUE "FileVersion", PROJECT_VERSION "\0" 31 | VALUE "InternalName", "cppp-reiconv.dll\0" 32 | VALUE "LegalCopyright", "Copyright (C) 2023\0" 33 | VALUE "LegalTrademarks", "\0" 34 | VALUE "OriginalFilename", "cppp-reiconv.dll\0" 35 | VALUE "ProductName", "cppp-reiconv: Character set conversion library.\0" 36 | VALUE "ProductVersion", PROJECT_VERSION "\0" 37 | END 38 | END 39 | BLOCK "VarFileInfo" 40 | BEGIN 41 | VALUE "Translation", 0x0409, 0 /* US English, ASCII */ 42 | END 43 | END 44 | -------------------------------------------------------------------------------- /lib/converters/cns11643.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file cns11643.h 3 | * @brief CNS 11643-1992 4 | * @copyright Copyright (C) 1999-2001 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | /* ISO-2022-CN and EUC-TW use CNS 11643-1992 planes 1 to 7. We also 26 | * have a table for the older plane 15. We use a trick to keep the 27 | * Unicode -> CNS 11643 table as small as possible (see cns11643_inv.h). 28 | */ 29 | 30 | #ifndef _CNS11643_H_ 31 | #define _CNS11643_H_ 32 | 33 | #include "reiconv_defines.h" // IWYU pragma: keep 34 | 35 | #include "cns11643_1.h" // IWYU pragma: keep 36 | #include "cns11643_2.h" // IWYU pragma: keep 37 | #include "cns11643_3.h" // IWYU pragma: keep 38 | #include "cns11643_4.h" // IWYU pragma: keep 39 | #include "cns11643_5.h" // IWYU pragma: keep 40 | #include "cns11643_6.h" // IWYU pragma: keep 41 | #include "cns11643_7.h" // IWYU pragma: keep 42 | #include "cns11643_15.h" // IWYU pragma: keep 43 | #include "cns11643_inv.h" // IWYU pragma: keep 44 | 45 | /* Returns the plane number (1,...,7,15) in r[0], the two bytes in r[1],r[2]. */ 46 | #define cns11643_wctomb cns11643_inv_wctomb 47 | 48 | #endif /* _CNS11643_H_ */ 49 | -------------------------------------------------------------------------------- /lib/converters/ucs2be.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file ucs2be.h 3 | * @brief UCS-2BE = UCS-2 big endian 4 | * @copyright Copyright (C) 1999-2001, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _UCS2BE_H_ 26 | #define _UCS2BE_H_ 27 | 28 | #include "reiconv_defines.h" 29 | 30 | static int ucs2be_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 31 | { 32 | if (n >= 2) 33 | { 34 | if (s[0] >= 0xd8 && s[0] < 0xe0) 35 | { 36 | return RET_ILSEQ; 37 | } 38 | else 39 | { 40 | *pwc = (s[0] << 8) + s[1]; 41 | return 2; 42 | } 43 | } 44 | return RET_TOOFEW(0); 45 | } 46 | 47 | static int ucs2be_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 48 | { 49 | if (wc < 0x10000 && !(wc >= 0xd800 && wc < 0xe000)) 50 | { 51 | if (n >= 2) 52 | { 53 | r[0] = (unsigned char)(wc >> 8); 54 | r[1] = (unsigned char)wc; 55 | return 2; 56 | } 57 | else 58 | return RET_TOOSMALL; 59 | } 60 | return RET_ILUNI; 61 | } 62 | 63 | #endif /* _UCS2BE_H_ */ 64 | -------------------------------------------------------------------------------- /lib/converters/ucs2le.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file ucs2le.h 3 | * @brief UCS-2LE = UCS-2 little endian 4 | * @copyright Copyright (C) 1999-2001, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _UCS2LE_H_ 26 | #define _UCS2LE_H_ 27 | 28 | #include "reiconv_defines.h" 29 | 30 | static int ucs2le_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 31 | { 32 | if (n >= 2) 33 | { 34 | if (s[1] >= 0xd8 && s[1] < 0xe0) 35 | { 36 | return RET_ILSEQ; 37 | } 38 | else 39 | { 40 | *pwc = s[0] + (s[1] << 8); 41 | return 2; 42 | } 43 | } 44 | return RET_TOOFEW(0); 45 | } 46 | 47 | static int ucs2le_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 48 | { 49 | if (wc < 0x10000 && !(wc >= 0xd800 && wc < 0xe000)) 50 | { 51 | if (n >= 2) 52 | { 53 | r[0] = (unsigned char)wc; 54 | r[1] = (unsigned char)(wc >> 8); 55 | return 2; 56 | } 57 | else 58 | return RET_TOOSMALL; 59 | } 60 | return RET_ILUNI; 61 | } 62 | 63 | #endif /* _UCS2LE_H_ */ 64 | -------------------------------------------------------------------------------- /tests/sort.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file sort.hpp 3 | * @brief Sort file lines. 4 | * @author ChenPi11 5 | * @copyright Copyright (C) 2024 The C++ Plus Project 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv Library. 9 | * 10 | * The cppp-reiconv Library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv Library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv Library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #pragma once 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | #include "output.hpp" 34 | #include "utils.hpp" 35 | 36 | inline void sort_file(const std::filesystem::path &file_name, const std::filesystem::path &output_file_name) 37 | { 38 | std::vector lines; 39 | 40 | std::ifstream input_file {file_name}; 41 | if (!input_file.good()) 42 | { 43 | error("sort", "Failed to open the input file."); 44 | } 45 | 46 | std::ofstream output_file {output_file_name, std::ios::trunc}; 47 | if (!output_file.good()) 48 | { 49 | error("sort", "Failed to open the output file."); 50 | } 51 | 52 | std::string line; 53 | while (std::getline(input_file, line)) 54 | { 55 | lines.push_back(line); 56 | } 57 | input_file.close(); 58 | 59 | std::sort(lines.begin(), lines.end()); 60 | 61 | for (const std::string &line : lines) 62 | { 63 | print_string(""); 64 | write_stream(output_file, line + "\n"); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /lib/converters/iso646_cn.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file iso646_cn.h 3 | * @brief ISO646-CN 4 | * @copyright Copyright (C) 1999-2001, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | /* 26 | * also known as GB_1988-80 27 | */ 28 | 29 | #ifndef _ISO646_CN_H_ 30 | #define _ISO646_CN_H_ 31 | 32 | #include "reiconv_defines.h" 33 | 34 | static int iso646_cn_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 35 | { 36 | unsigned char c = *s; 37 | if (c < 0x80) 38 | { 39 | if (c == 0x24) 40 | *pwc = (ucs4_t)0x00a5; 41 | else if (c == 0x7e) 42 | *pwc = (ucs4_t)0x203e; 43 | else 44 | *pwc = (ucs4_t)c; 45 | return 1; 46 | } 47 | return RET_ILSEQ; 48 | } 49 | 50 | static int iso646_cn_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 51 | { 52 | if (wc < 0x0080 && !(wc == 0x0024 || wc == 0x007e)) 53 | { 54 | *r = wc; 55 | return 1; 56 | } 57 | if (wc == 0x00a5) 58 | { 59 | *r = 0x24; 60 | return 1; 61 | } 62 | if (wc == 0x203e) 63 | { 64 | *r = 0x7e; 65 | return 1; 66 | } 67 | return RET_ILUNI; 68 | } 69 | 70 | #endif /* _ISO646_CN_H_ */ 71 | -------------------------------------------------------------------------------- /lib/converters/ucs2internal.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file ucs2internal.h 3 | * @brief UCS-2-INTERNAL = UCS-2 with machine dependent endianness and alignment 4 | * @copyright Copyright (C) 1999-2001, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _UCS2INTERNAL_H_ 26 | #define _UCS2INTERNAL_H_ 27 | 28 | #include "reiconv_defines.h" 29 | 30 | static int ucs2internal_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 31 | { 32 | if (n >= 2) 33 | { 34 | unsigned short x = *(const unsigned short *)s; 35 | if (x >= 0xd800 && x < 0xe000) 36 | { 37 | return RET_ILSEQ; 38 | } 39 | else 40 | { 41 | *pwc = x; 42 | return 2; 43 | } 44 | } 45 | return RET_TOOFEW(0); 46 | } 47 | 48 | static int ucs2internal_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 49 | { 50 | if (wc < 0x10000 && !(wc >= 0xd800 && wc < 0xe000)) 51 | { 52 | if (n >= 2) 53 | { 54 | *(unsigned short *)r = wc; 55 | return 2; 56 | } 57 | else 58 | return RET_TOOSMALL; 59 | } 60 | else 61 | return RET_ILUNI; 62 | } 63 | 64 | #endif /* _UCS2INTERNAL_H_ */ 65 | -------------------------------------------------------------------------------- /lib/converters/iso646_jp.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file iso646_jp.h 3 | * @brief ISO646-JP 4 | * @copyright Copyright (C) 1999-2001, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | /* 26 | * also known as JIS_C6220-1969-RO 27 | */ 28 | 29 | #ifndef _ISO646_JP_H_ 30 | #define _ISO646_JP_H_ 31 | 32 | #include "reiconv_defines.h" 33 | 34 | /* This is the lower half of JIS_X0201. */ 35 | 36 | static int iso646_jp_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 37 | { 38 | unsigned char c = *s; 39 | if (c < 0x80) 40 | { 41 | if (c == 0x5c) 42 | *pwc = (ucs4_t)0x00a5; 43 | else if (c == 0x7e) 44 | *pwc = (ucs4_t)0x203e; 45 | else 46 | *pwc = (ucs4_t)c; 47 | return 1; 48 | } 49 | return RET_ILSEQ; 50 | } 51 | 52 | static int iso646_jp_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 53 | { 54 | if (wc < 0x0080 && !(wc == 0x005c || wc == 0x007e)) 55 | { 56 | *r = wc; 57 | return 1; 58 | } 59 | if (wc == 0x00a5) 60 | { 61 | *r = 0x5c; 62 | return 1; 63 | } 64 | if (wc == 0x203e) 65 | { 66 | *r = 0x7e; 67 | return 1; 68 | } 69 | return RET_ILUNI; 70 | } 71 | 72 | #endif /* _ISO646_JP_H_ */ 73 | -------------------------------------------------------------------------------- /lib/converters/utf32le.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file utf32le.h 3 | * @brief UTF-32LE 4 | * @copyright Copyright (C) 1999-2001, 2016, 2024 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _UTF32LE_H_ 26 | #define _UTF32LE_H_ 27 | 28 | #include "reiconv_defines.h" 29 | 30 | /* Specification: Unicode 3.1 Standard Annex #19 */ 31 | 32 | static int 33 | utf32le_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 34 | { 35 | if (n >= 4) { 36 | ucs4_t wc = (ucs4_t) s[0] 37 | + ((ucs4_t) s[1] << 8) 38 | + ((ucs4_t) s[2] << 16) 39 | + ((ucs4_t) s[3] << 24); 40 | if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000)) { 41 | *pwc = wc; 42 | return 4; 43 | } else 44 | return RET_ILSEQ; 45 | } 46 | return RET_TOOFEW(0); 47 | } 48 | 49 | static int 50 | utf32le_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 51 | { 52 | if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000)) { 53 | if (n >= 4) { 54 | r[0] = (unsigned char) wc; 55 | r[1] = (unsigned char) (wc >> 8); 56 | r[2] = (unsigned char) (wc >> 16); 57 | r[3] = 0; 58 | return 4; 59 | } else 60 | return RET_TOOSMALL; 61 | } 62 | return RET_ILUNI; 63 | } 64 | 65 | #endif /* _UTF32LE_H_ */ 66 | -------------------------------------------------------------------------------- /lib/loop_funcs.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file loop_funcs.h 3 | * @brief Data type for general conversion loop. 4 | * @copyright Copyright (C) 2024 The C++ Plus Project. 5 | */ 6 | /* 7 | * This file is part of the cppp-reiconv library. 8 | * 9 | * The cppp-reiconv library is free software; you can redistribute it 10 | * and/or modify it under the terms of the GNU Lesser General Public 11 | * License as published by the Free Software Foundation; either version 3 12 | * of the License, or (at your option) any later version. 13 | * 14 | * The cppp-reiconv library is distributed in the hope that it will be 15 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 | * Lesser General Public License for more details. 18 | * 19 | * You should have received a copy of the GNU Lesser General Public 20 | * License along with the cppp-reiconv library; see the file LICENSE. 21 | * If not, see . 22 | */ 23 | 24 | #ifndef _LOOP_FUNCS_H_ 25 | #define _LOOP_FUNCS_H_ 26 | 27 | #include 28 | 29 | #include 30 | 31 | /** 32 | * @brief Data type for general conversion loop. 33 | */ 34 | struct loop_funcs 35 | { 36 | /** 37 | * @brief Conversion loop. 38 | * @param icd Conversion descriptor. 39 | * @param inbuf Pointer to the input buffer. 40 | * @param inbytesleft Number of bytes available at `inbuf`. 41 | * @param outbuf Pointer to the output buffer. 42 | * @param outbytesleft Number of bytes available at `outbuf`. 43 | * @return Number of bytes written to `outbuf`. 44 | */ 45 | size_t (*loop_convert)(reiconv_t icd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); 46 | 47 | /** 48 | * @brief Reset conversion loop. 49 | * @param icd Conversion descriptor. 50 | * @param outbuf Pointer to the output buffer. 51 | * @param outbytesleft Number of bytes available at `outbuf`. 52 | * @return Number of bytes written to `outbuf`. 53 | */ 54 | size_t (*loop_reset)(reiconv_t icd, char **outbuf, size_t *outbytesleft); 55 | }; 56 | 57 | #endif /* _LOOP_FUNCS_H_ */ 58 | -------------------------------------------------------------------------------- /lib/converters/cp858.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file cp858.h 3 | * @brief CP858 4 | * @copyright Copyright (C) 1999-2002, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _CP858_H_ 26 | #define _CP858_H_ 27 | 28 | #include "converters/cp850.h" 29 | #include "reiconv_defines.h" 30 | 31 | static int cp858_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 32 | { 33 | unsigned char c = *s; 34 | if (c < 0x80) 35 | *pwc = (ucs4_t)c; 36 | else if (c == 0xd5) 37 | *pwc = 0x20ac; 38 | else 39 | *pwc = (ucs4_t)cp850_2uni[c - 0x80]; 40 | return 1; 41 | } 42 | 43 | static int cp858_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 44 | { 45 | unsigned char c = 0; 46 | if (wc < 0x0080) 47 | { 48 | *r = wc; 49 | return 1; 50 | } 51 | else if (wc >= 0x00a0 && wc < 0x0100) 52 | c = cp850_page00[wc - 0x00a0]; 53 | else if (wc == 0x0192) 54 | c = 0x9f; 55 | else if (wc == 0x2017) 56 | c = 0xf2; 57 | else if (wc == 0x20ac) 58 | c = 0xd5; 59 | else if (wc >= 0x2500 && wc < 0x25a8) 60 | c = cp850_page25[wc - 0x2500]; 61 | if (c != 0) 62 | { 63 | *r = c; 64 | return 1; 65 | } 66 | return RET_ILUNI; 67 | } 68 | 69 | #endif /* _CP858_H_ */ 70 | -------------------------------------------------------------------------------- /tests/data/ASCII.TXT: -------------------------------------------------------------------------------- 1 | 0x00 0x0000 2 | 0x01 0x0001 3 | 0x02 0x0002 4 | 0x03 0x0003 5 | 0x04 0x0004 6 | 0x05 0x0005 7 | 0x06 0x0006 8 | 0x07 0x0007 9 | 0x08 0x0008 10 | 0x09 0x0009 11 | 0x0A 0x000A 12 | 0x0B 0x000B 13 | 0x0C 0x000C 14 | 0x0D 0x000D 15 | 0x0E 0x000E 16 | 0x0F 0x000F 17 | 0x10 0x0010 18 | 0x11 0x0011 19 | 0x12 0x0012 20 | 0x13 0x0013 21 | 0x14 0x0014 22 | 0x15 0x0015 23 | 0x16 0x0016 24 | 0x17 0x0017 25 | 0x18 0x0018 26 | 0x19 0x0019 27 | 0x1A 0x001A 28 | 0x1B 0x001B 29 | 0x1C 0x001C 30 | 0x1D 0x001D 31 | 0x1E 0x001E 32 | 0x1F 0x001F 33 | 0x20 0x0020 34 | 0x21 0x0021 35 | 0x22 0x0022 36 | 0x23 0x0023 37 | 0x24 0x0024 38 | 0x25 0x0025 39 | 0x26 0x0026 40 | 0x27 0x0027 41 | 0x28 0x0028 42 | 0x29 0x0029 43 | 0x2A 0x002A 44 | 0x2B 0x002B 45 | 0x2C 0x002C 46 | 0x2D 0x002D 47 | 0x2E 0x002E 48 | 0x2F 0x002F 49 | 0x30 0x0030 50 | 0x31 0x0031 51 | 0x32 0x0032 52 | 0x33 0x0033 53 | 0x34 0x0034 54 | 0x35 0x0035 55 | 0x36 0x0036 56 | 0x37 0x0037 57 | 0x38 0x0038 58 | 0x39 0x0039 59 | 0x3A 0x003A 60 | 0x3B 0x003B 61 | 0x3C 0x003C 62 | 0x3D 0x003D 63 | 0x3E 0x003E 64 | 0x3F 0x003F 65 | 0x40 0x0040 66 | 0x41 0x0041 67 | 0x42 0x0042 68 | 0x43 0x0043 69 | 0x44 0x0044 70 | 0x45 0x0045 71 | 0x46 0x0046 72 | 0x47 0x0047 73 | 0x48 0x0048 74 | 0x49 0x0049 75 | 0x4A 0x004A 76 | 0x4B 0x004B 77 | 0x4C 0x004C 78 | 0x4D 0x004D 79 | 0x4E 0x004E 80 | 0x4F 0x004F 81 | 0x50 0x0050 82 | 0x51 0x0051 83 | 0x52 0x0052 84 | 0x53 0x0053 85 | 0x54 0x0054 86 | 0x55 0x0055 87 | 0x56 0x0056 88 | 0x57 0x0057 89 | 0x58 0x0058 90 | 0x59 0x0059 91 | 0x5A 0x005A 92 | 0x5B 0x005B 93 | 0x5C 0x005C 94 | 0x5D 0x005D 95 | 0x5E 0x005E 96 | 0x5F 0x005F 97 | 0x60 0x0060 98 | 0x61 0x0061 99 | 0x62 0x0062 100 | 0x63 0x0063 101 | 0x64 0x0064 102 | 0x65 0x0065 103 | 0x66 0x0066 104 | 0x67 0x0067 105 | 0x68 0x0068 106 | 0x69 0x0069 107 | 0x6A 0x006A 108 | 0x6B 0x006B 109 | 0x6C 0x006C 110 | 0x6D 0x006D 111 | 0x6E 0x006E 112 | 0x6F 0x006F 113 | 0x70 0x0070 114 | 0x71 0x0071 115 | 0x72 0x0072 116 | 0x73 0x0073 117 | 0x74 0x0074 118 | 0x75 0x0075 119 | 0x76 0x0076 120 | 0x77 0x0077 121 | 0x78 0x0078 122 | 0x79 0x0079 123 | 0x7A 0x007A 124 | 0x7B 0x007B 125 | 0x7C 0x007C 126 | 0x7D 0x007D 127 | 0x7E 0x007E 128 | 0x7F 0x007F 129 | -------------------------------------------------------------------------------- /tests/data/TDS565.TXT: -------------------------------------------------------------------------------- 1 | 0x00 0x0000 2 | 0x01 0x0001 3 | 0x02 0x0002 4 | 0x03 0x0003 5 | 0x04 0x0004 6 | 0x05 0x0005 7 | 0x06 0x0006 8 | 0x07 0x0007 9 | 0x08 0x0008 10 | 0x09 0x0009 11 | 0x0A 0x000A 12 | 0x0B 0x000B 13 | 0x0C 0x000C 14 | 0x0D 0x000D 15 | 0x0E 0x000E 16 | 0x0F 0x000F 17 | 0x10 0x0010 18 | 0x11 0x0011 19 | 0x12 0x0012 20 | 0x13 0x0013 21 | 0x14 0x0014 22 | 0x15 0x0015 23 | 0x16 0x0016 24 | 0x17 0x0017 25 | 0x18 0x0018 26 | 0x19 0x0019 27 | 0x1A 0x001A 28 | 0x1B 0x001B 29 | 0x1C 0x001C 30 | 0x1D 0x001D 31 | 0x1E 0x001E 32 | 0x1F 0x001F 33 | 0x20 0x0020 34 | 0x21 0x0021 35 | 0x22 0x0022 36 | 0x23 0x0023 37 | 0x24 0x0024 38 | 0x25 0x0025 39 | 0x26 0x0026 40 | 0x27 0x0027 41 | 0x28 0x0028 42 | 0x29 0x0029 43 | 0x2A 0x002A 44 | 0x2B 0x002B 45 | 0x2C 0x002C 46 | 0x2D 0x002D 47 | 0x2E 0x002E 48 | 0x2F 0x002F 49 | 0x30 0x0030 50 | 0x31 0x0031 51 | 0x32 0x0032 52 | 0x33 0x0033 53 | 0x34 0x0034 54 | 0x35 0x0035 55 | 0x36 0x0036 56 | 0x37 0x0037 57 | 0x38 0x0038 58 | 0x39 0x0039 59 | 0x3A 0x003A 60 | 0x3B 0x003B 61 | 0x3C 0x003C 62 | 0x3D 0x003D 63 | 0x3E 0x003E 64 | 0x3F 0x003F 65 | 0x40 0x0040 66 | 0x41 0x0041 67 | 0x42 0x0042 68 | 0x43 0x00C7 69 | 0x44 0x0044 70 | 0x45 0x0045 71 | 0x46 0x00C4 72 | 0x47 0x0046 73 | 0x48 0x0047 74 | 0x49 0x0048 75 | 0x4A 0x0049 76 | 0x4B 0x004A 77 | 0x4C 0x017D 78 | 0x4D 0x004B 79 | 0x4E 0x004C 80 | 0x4F 0x004D 81 | 0x50 0x004E 82 | 0x51 0x0147 83 | 0x52 0x004F 84 | 0x53 0x00D6 85 | 0x54 0x0050 86 | 0x55 0x0052 87 | 0x56 0x0053 88 | 0x57 0x015E 89 | 0x58 0x0054 90 | 0x59 0x0055 91 | 0x5A 0x00DC 92 | 0x5B 0x0057 93 | 0x5C 0x0059 94 | 0x5D 0x00DD 95 | 0x5E 0x005A 96 | 0x5F 0x005F 97 | 0x60 0x2116 98 | 0x61 0x0061 99 | 0x62 0x0062 100 | 0x63 0x00E7 101 | 0x64 0x0064 102 | 0x65 0x0065 103 | 0x66 0x00E4 104 | 0x67 0x0066 105 | 0x68 0x0067 106 | 0x69 0x0068 107 | 0x6A 0x0069 108 | 0x6B 0x006A 109 | 0x6C 0x017E 110 | 0x6D 0x006B 111 | 0x6E 0x006C 112 | 0x6F 0x006D 113 | 0x70 0x006E 114 | 0x71 0x0148 115 | 0x72 0x006F 116 | 0x73 0x00F6 117 | 0x74 0x0070 118 | 0x75 0x0072 119 | 0x76 0x0073 120 | 0x77 0x015F 121 | 0x78 0x0074 122 | 0x79 0x0075 123 | 0x7A 0x00FC 124 | 0x7B 0x0077 125 | 0x7C 0x0079 126 | 0x7D 0x00FD 127 | 0x7E 0x007A 128 | 0x7F 0x007F 129 | -------------------------------------------------------------------------------- /tests/data/ISO646-CN.TXT: -------------------------------------------------------------------------------- 1 | 0x00 0x0000 2 | 0x01 0x0001 3 | 0x02 0x0002 4 | 0x03 0x0003 5 | 0x04 0x0004 6 | 0x05 0x0005 7 | 0x06 0x0006 8 | 0x07 0x0007 9 | 0x08 0x0008 10 | 0x09 0x0009 11 | 0x0A 0x000A 12 | 0x0B 0x000B 13 | 0x0C 0x000C 14 | 0x0D 0x000D 15 | 0x0E 0x000E 16 | 0x0F 0x000F 17 | 0x10 0x0010 18 | 0x11 0x0011 19 | 0x12 0x0012 20 | 0x13 0x0013 21 | 0x14 0x0014 22 | 0x15 0x0015 23 | 0x16 0x0016 24 | 0x17 0x0017 25 | 0x18 0x0018 26 | 0x19 0x0019 27 | 0x1A 0x001A 28 | 0x1B 0x001B 29 | 0x1C 0x001C 30 | 0x1D 0x001D 31 | 0x1E 0x001E 32 | 0x1F 0x001F 33 | 0x20 0x0020 34 | 0x21 0x0021 35 | 0x22 0x0022 36 | 0x23 0x0023 37 | 0x24 0x00A5 38 | 0x25 0x0025 39 | 0x26 0x0026 40 | 0x27 0x0027 41 | 0x28 0x0028 42 | 0x29 0x0029 43 | 0x2A 0x002A 44 | 0x2B 0x002B 45 | 0x2C 0x002C 46 | 0x2D 0x002D 47 | 0x2E 0x002E 48 | 0x2F 0x002F 49 | 0x30 0x0030 50 | 0x31 0x0031 51 | 0x32 0x0032 52 | 0x33 0x0033 53 | 0x34 0x0034 54 | 0x35 0x0035 55 | 0x36 0x0036 56 | 0x37 0x0037 57 | 0x38 0x0038 58 | 0x39 0x0039 59 | 0x3A 0x003A 60 | 0x3B 0x003B 61 | 0x3C 0x003C 62 | 0x3D 0x003D 63 | 0x3E 0x003E 64 | 0x3F 0x003F 65 | 0x40 0x0040 66 | 0x41 0x0041 67 | 0x42 0x0042 68 | 0x43 0x0043 69 | 0x44 0x0044 70 | 0x45 0x0045 71 | 0x46 0x0046 72 | 0x47 0x0047 73 | 0x48 0x0048 74 | 0x49 0x0049 75 | 0x4A 0x004A 76 | 0x4B 0x004B 77 | 0x4C 0x004C 78 | 0x4D 0x004D 79 | 0x4E 0x004E 80 | 0x4F 0x004F 81 | 0x50 0x0050 82 | 0x51 0x0051 83 | 0x52 0x0052 84 | 0x53 0x0053 85 | 0x54 0x0054 86 | 0x55 0x0055 87 | 0x56 0x0056 88 | 0x57 0x0057 89 | 0x58 0x0058 90 | 0x59 0x0059 91 | 0x5A 0x005A 92 | 0x5B 0x005B 93 | 0x5C 0x005C 94 | 0x5D 0x005D 95 | 0x5E 0x005E 96 | 0x5F 0x005F 97 | 0x60 0x0060 98 | 0x61 0x0061 99 | 0x62 0x0062 100 | 0x63 0x0063 101 | 0x64 0x0064 102 | 0x65 0x0065 103 | 0x66 0x0066 104 | 0x67 0x0067 105 | 0x68 0x0068 106 | 0x69 0x0069 107 | 0x6A 0x006A 108 | 0x6B 0x006B 109 | 0x6C 0x006C 110 | 0x6D 0x006D 111 | 0x6E 0x006E 112 | 0x6F 0x006F 113 | 0x70 0x0070 114 | 0x71 0x0071 115 | 0x72 0x0072 116 | 0x73 0x0073 117 | 0x74 0x0074 118 | 0x75 0x0075 119 | 0x76 0x0076 120 | 0x77 0x0077 121 | 0x78 0x0078 122 | 0x79 0x0079 123 | 0x7A 0x007A 124 | 0x7B 0x007B 125 | 0x7C 0x007C 126 | 0x7D 0x007D 127 | 0x7E 0x203E 128 | 0x7F 0x007F 129 | -------------------------------------------------------------------------------- /tests/data/ISO646-JP.TXT: -------------------------------------------------------------------------------- 1 | 0x00 0x0000 2 | 0x01 0x0001 3 | 0x02 0x0002 4 | 0x03 0x0003 5 | 0x04 0x0004 6 | 0x05 0x0005 7 | 0x06 0x0006 8 | 0x07 0x0007 9 | 0x08 0x0008 10 | 0x09 0x0009 11 | 0x0A 0x000A 12 | 0x0B 0x000B 13 | 0x0C 0x000C 14 | 0x0D 0x000D 15 | 0x0E 0x000E 16 | 0x0F 0x000F 17 | 0x10 0x0010 18 | 0x11 0x0011 19 | 0x12 0x0012 20 | 0x13 0x0013 21 | 0x14 0x0014 22 | 0x15 0x0015 23 | 0x16 0x0016 24 | 0x17 0x0017 25 | 0x18 0x0018 26 | 0x19 0x0019 27 | 0x1A 0x001A 28 | 0x1B 0x001B 29 | 0x1C 0x001C 30 | 0x1D 0x001D 31 | 0x1E 0x001E 32 | 0x1F 0x001F 33 | 0x20 0x0020 34 | 0x21 0x0021 35 | 0x22 0x0022 36 | 0x23 0x0023 37 | 0x24 0x0024 38 | 0x25 0x0025 39 | 0x26 0x0026 40 | 0x27 0x0027 41 | 0x28 0x0028 42 | 0x29 0x0029 43 | 0x2A 0x002A 44 | 0x2B 0x002B 45 | 0x2C 0x002C 46 | 0x2D 0x002D 47 | 0x2E 0x002E 48 | 0x2F 0x002F 49 | 0x30 0x0030 50 | 0x31 0x0031 51 | 0x32 0x0032 52 | 0x33 0x0033 53 | 0x34 0x0034 54 | 0x35 0x0035 55 | 0x36 0x0036 56 | 0x37 0x0037 57 | 0x38 0x0038 58 | 0x39 0x0039 59 | 0x3A 0x003A 60 | 0x3B 0x003B 61 | 0x3C 0x003C 62 | 0x3D 0x003D 63 | 0x3E 0x003E 64 | 0x3F 0x003F 65 | 0x40 0x0040 66 | 0x41 0x0041 67 | 0x42 0x0042 68 | 0x43 0x0043 69 | 0x44 0x0044 70 | 0x45 0x0045 71 | 0x46 0x0046 72 | 0x47 0x0047 73 | 0x48 0x0048 74 | 0x49 0x0049 75 | 0x4A 0x004A 76 | 0x4B 0x004B 77 | 0x4C 0x004C 78 | 0x4D 0x004D 79 | 0x4E 0x004E 80 | 0x4F 0x004F 81 | 0x50 0x0050 82 | 0x51 0x0051 83 | 0x52 0x0052 84 | 0x53 0x0053 85 | 0x54 0x0054 86 | 0x55 0x0055 87 | 0x56 0x0056 88 | 0x57 0x0057 89 | 0x58 0x0058 90 | 0x59 0x0059 91 | 0x5A 0x005A 92 | 0x5B 0x005B 93 | 0x5C 0x00A5 94 | 0x5D 0x005D 95 | 0x5E 0x005E 96 | 0x5F 0x005F 97 | 0x60 0x0060 98 | 0x61 0x0061 99 | 0x62 0x0062 100 | 0x63 0x0063 101 | 0x64 0x0064 102 | 0x65 0x0065 103 | 0x66 0x0066 104 | 0x67 0x0067 105 | 0x68 0x0068 106 | 0x69 0x0069 107 | 0x6A 0x006A 108 | 0x6B 0x006B 109 | 0x6C 0x006C 110 | 0x6D 0x006D 111 | 0x6E 0x006E 112 | 0x6F 0x006F 113 | 0x70 0x0070 114 | 0x71 0x0071 115 | 0x72 0x0072 116 | 0x73 0x0073 117 | 0x74 0x0074 118 | 0x75 0x0075 119 | 0x76 0x0076 120 | 0x77 0x0077 121 | 0x78 0x0078 122 | 0x79 0x0079 123 | 0x7A 0x007A 124 | 0x7B 0x007B 125 | 0x7C 0x007C 126 | 0x7D 0x007D 127 | 0x7E 0x203E 128 | 0x7F 0x007F 129 | -------------------------------------------------------------------------------- /lib/converters/ucs4swapped.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file ucs4swapped.h 3 | * @brief UCS-4-SWAPPED = UCS-4-INTERNAL with inverted endianness 4 | * @copyright Copyright (C) 1999-2000, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _UCS4SWAPPED_H_ 26 | #define _UCS4SWAPPED_H_ 27 | 28 | #include "reiconv_defines.h" 29 | 30 | #include 31 | 32 | static int ucs4swapped_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 33 | { 34 | /* This function assumes that 'unsigned int' has exactly 32 bits. */ 35 | if (sizeof(unsigned int) != 4) 36 | abort(); 37 | 38 | if (n >= 4) 39 | { 40 | unsigned int x = *(const unsigned int *)s; 41 | x = (x >> 24) | ((x >> 8) & 0xff00) | ((x & 0xff00) << 8) | (x << 24); 42 | *pwc = x; 43 | return 4; 44 | } 45 | return RET_TOOFEW(0); 46 | } 47 | 48 | static int ucs4swapped_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 49 | { 50 | /* This function assumes that 'unsigned int' has exactly 32 bits. */ 51 | if (sizeof(unsigned int) != 4) 52 | abort(); 53 | 54 | if (n >= 4) 55 | { 56 | unsigned int x = wc; 57 | x = (x >> 24) | ((x >> 8) & 0xff00) | ((x & 0xff00) << 8) | (x << 24); 58 | *(unsigned int *)r = x; 59 | return 4; 60 | } 61 | else 62 | return RET_TOOSMALL; 63 | } 64 | 65 | #endif /* _UCS4SWAPPED_H_ */ 66 | -------------------------------------------------------------------------------- /lib/converters/utf32be.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file utf32be.h 3 | * @brief UTF-32BE 4 | * @copyright Copyright (C) 1999-2001, 2016, 2024 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _UTF32BE_H_ 26 | #define _UTF32BE_H_ 27 | 28 | #include "reiconv_defines.h" 29 | 30 | // Specification: Unicode 3.1 Standard Annex #19 31 | 32 | static int utf32be_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 33 | { 34 | if (n >= 4) 35 | { 36 | ucs4_t wc = ((ucs4_t)s[0] << 24) + ((ucs4_t)s[1] << 16) + ((ucs4_t)s[2] << 8) + (ucs4_t)s[3]; 37 | if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000)) 38 | { 39 | *pwc = wc; 40 | return 4; 41 | } 42 | else 43 | { 44 | return RET_ILSEQ; 45 | } 46 | } 47 | return RET_TOOFEW(0); 48 | } 49 | 50 | static int utf32be_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 51 | { 52 | if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000)) 53 | { 54 | if (n >= 4) 55 | { 56 | r[0] = 0; 57 | r[1] = (unsigned char)(wc >> 16); 58 | r[2] = (unsigned char)(wc >> 8); 59 | r[3] = (unsigned char)wc; 60 | return 4; 61 | } 62 | else 63 | { 64 | return RET_TOOSMALL; 65 | } 66 | } 67 | return RET_ILUNI; 68 | } 69 | 70 | #endif /* _UTF32BE_H_ */ 71 | -------------------------------------------------------------------------------- /lib/converters/jisx0201.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file jisx0201.h 3 | * @brief JISX0201.1976-0 4 | * @copyright Copyright (C) 1999-2001, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _JISX0201_H_ 26 | #define _JISX0201_H_ 27 | 28 | #include "reiconv_defines.h" 29 | 30 | static int jisx0201_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 31 | { 32 | unsigned char c = *s; 33 | if (c < 0x80) 34 | { 35 | if (c == 0x5c) 36 | *pwc = (ucs4_t)0x00a5; 37 | else if (c == 0x7e) 38 | *pwc = (ucs4_t)0x203e; 39 | else 40 | *pwc = (ucs4_t)c; 41 | return 1; 42 | } 43 | else 44 | { 45 | if (c >= 0xa1 && c < 0xe0) 46 | { 47 | *pwc = (ucs4_t)c + 0xfec0; 48 | return 1; 49 | } 50 | } 51 | return RET_ILSEQ; 52 | } 53 | 54 | static int jisx0201_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 55 | { 56 | if (wc < 0x0080 && !(wc == 0x005c || wc == 0x007e)) 57 | { 58 | *r = wc; 59 | return 1; 60 | } 61 | if (wc == 0x00a5) 62 | { 63 | *r = 0x5c; 64 | return 1; 65 | } 66 | if (wc == 0x203e) 67 | { 68 | *r = 0x7e; 69 | return 1; 70 | } 71 | if (wc >= 0xff61 && wc < 0xffa0) 72 | { 73 | *r = wc - 0xfec0; 74 | return 1; 75 | } 76 | return RET_ILUNI; 77 | } 78 | 79 | #endif /* _JISX0201_H_ */ 80 | -------------------------------------------------------------------------------- /lib/converters/cp1163.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file cp1163.h 3 | * @brief CP1163 4 | * @copyright Copyright (C) 1999-2002, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _CP1163_H_ 26 | #define _CP1163_H_ 27 | 28 | #include "converters/cp1129.h" 29 | #include "reiconv_defines.h" 30 | 31 | static int cp1163_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 32 | { 33 | unsigned char c = *s; 34 | if (c < 0xa0) 35 | *pwc = (ucs4_t)c; 36 | else if (c == 0xa4) 37 | *pwc = 0x20ac; 38 | else 39 | *pwc = (ucs4_t)cp1129_2uni[c - 0xa0]; 40 | return 1; 41 | } 42 | 43 | static const unsigned char cp1163_page20[8] = { 44 | 0x00, 0x00, 0x00, 0xfe, 0xa4, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 45 | }; 46 | 47 | static int cp1163_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 48 | { 49 | unsigned char c = 0; 50 | if (wc < 0x00a0 || (wc < 0x00a8 && wc != 0x00a4) || wc == 0x00d0) 51 | { 52 | *r = wc; 53 | return 1; 54 | } 55 | else if (wc >= 0x00a8 && wc < 0x01b8) 56 | c = cp1129_page00[wc - 0x00a8]; 57 | else if (wc >= 0x0300 && wc < 0x0328) 58 | c = cp1129_page03[wc - 0x0300]; 59 | else if (wc == 0x203e) 60 | c = 0xaf; 61 | else if (wc >= 0x20a8 && wc < 0x20b0) 62 | c = cp1163_page20[wc - 0x20a8]; 63 | if (c != 0) 64 | { 65 | *r = c; 66 | return 1; 67 | } 68 | return RET_ILUNI; 69 | } 70 | 71 | #endif /* _CP1163_H_ */ 72 | -------------------------------------------------------------------------------- /lib/converters/ces_gbk.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file ces_gbk.h 3 | * @brief GBK 4 | * @copyright Copyright (C) 1999-2001, 2005, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _CES_GBK_H_ 26 | #define _CES_GBK_H_ 27 | 28 | #include "converters/ascii.h" 29 | #include "converters/gbk.h" 30 | #include "reiconv_defines.h" 31 | 32 | #include 33 | 34 | static int ces_gbk_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 35 | { 36 | unsigned char c = *s; 37 | 38 | /* Code set 0 (ASCII or GB 1988-89) */ 39 | if (c < 0x80) 40 | return ascii_mbtowc(conv, pwc, s, n); 41 | /* Code set 1 (GBK) */ 42 | if (c >= 0x81 && c < 0xff) 43 | { 44 | if (n < 2) 45 | return RET_TOOFEW(0); 46 | return gbk_mbtowc(conv, pwc, s, 2); 47 | } 48 | return RET_ILSEQ; 49 | } 50 | 51 | static int ces_gbk_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 52 | { 53 | unsigned char buf[2]; 54 | int ret; 55 | 56 | /* Code set 0 (ASCII or GB 1988-89) */ 57 | ret = ascii_wctomb(conv, r, wc, n); 58 | if (ret != RET_ILUNI) 59 | return ret; 60 | 61 | /* Code set 1 (GBK) */ 62 | ret = gbk_wctomb(conv, buf, wc, 2); 63 | if (ret != RET_ILUNI) 64 | { 65 | if (ret != 2) 66 | abort(); 67 | if (n < 2) 68 | return RET_TOOSMALL; 69 | r[0] = buf[0]; 70 | r[1] = buf[1]; 71 | return 2; 72 | } 73 | 74 | return RET_ILUNI; 75 | } 76 | 77 | #endif /* _CES_GBK_H_ */ 78 | -------------------------------------------------------------------------------- /lib/converters/cp1162.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file cp1162.h 3 | * @brief CP1162 4 | * @copyright Copyright (C) 1999-2002, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _CP1162_H_ 26 | #define _CP1162_H_ 27 | 28 | #include "reiconv_defines.h" 29 | 30 | static int cp1162_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 31 | { 32 | unsigned char c = *s; 33 | if (c < 0x80) 34 | { 35 | *pwc = (ucs4_t)c; 36 | return 1; 37 | } 38 | else 39 | { 40 | unsigned short wc = cp874_2uni[c - 0x80]; 41 | if (wc != 0xfffd) 42 | { 43 | *pwc = (ucs4_t)wc; 44 | return 1; 45 | } 46 | if (c < 0xa0) 47 | { 48 | *pwc = (ucs4_t)c; 49 | return 1; 50 | } 51 | } 52 | return RET_ILSEQ; 53 | } 54 | 55 | static int cp1162_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 56 | { 57 | unsigned char c = 0; 58 | if (wc < 0x0080) 59 | { 60 | *r = wc; 61 | return 1; 62 | } 63 | else if (wc >= 0x0080 && wc < 0x00a0 && cp874_2uni[wc - 0x0080] == 0xfffd) 64 | c = wc; 65 | else if (wc == 0x00a0) 66 | c = 0xa0; 67 | else if (wc >= 0x0e00 && wc < 0x0e60) 68 | c = cp874_page0e[wc - 0x0e00]; 69 | else if (wc >= 0x2010 && wc < 0x2028) 70 | c = cp874_page20[wc - 0x2010]; 71 | else if (wc == 0x20ac) 72 | c = 0x80; 73 | if (c != 0) 74 | { 75 | *r = c; 76 | return 1; 77 | } 78 | return RET_ILUNI; 79 | } 80 | 81 | #endif /* _CP1162_H_ */ 82 | -------------------------------------------------------------------------------- /lib/converters/ucs2swapped.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file ucs2swapped.h 3 | * @brief UCS-2-SWAPPED = UCS-2-INTERNAL with inverted endianness 4 | * @copyright Copyright (C) 1999-2001, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _UCS2SWAPPED_H_ 26 | #define _UCS2SWAPPED_H_ 27 | 28 | #include "reiconv_defines.h" 29 | 30 | #include 31 | 32 | static int ucs2swapped_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 33 | { 34 | /* This function assumes that 'unsigned short' has exactly 16 bits. */ 35 | if (sizeof(unsigned short) != 2) 36 | abort(); 37 | 38 | if (n >= 2) 39 | { 40 | unsigned short x = *(const unsigned short *)s; 41 | x = (x >> 8) | (x << 8); 42 | if (x >= 0xd800 && x < 0xe000) 43 | { 44 | return RET_ILSEQ; 45 | } 46 | else 47 | { 48 | *pwc = x; 49 | return 2; 50 | } 51 | } 52 | return RET_TOOFEW(0); 53 | } 54 | 55 | static int ucs2swapped_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 56 | { 57 | /* This function assumes that 'unsigned short' has exactly 16 bits. */ 58 | if (sizeof(unsigned short) != 2) 59 | abort(); 60 | 61 | if (wc < 0x10000 && !(wc >= 0xd800 && wc < 0xe000)) 62 | { 63 | if (n >= 2) 64 | { 65 | unsigned short x = wc; 66 | x = (x >> 8) | (x << 8); 67 | *(unsigned short *)r = x; 68 | return 2; 69 | } 70 | else 71 | return RET_TOOSMALL; 72 | } 73 | else 74 | return RET_ILUNI; 75 | } 76 | 77 | #endif /* _UCS2SWAPPED_H_ */ 78 | -------------------------------------------------------------------------------- /lib/converters/cns11643_4.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file cns11643_4.h 3 | * @brief CNS 11643-1992 plane 4 4 | * @copyright Copyright (C) 1999-2001, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | /* 26 | * The table has been split into two parts. Each part's entries fit it 16 bits. 27 | * But the combined table would need 17 bits per entry. 28 | */ 29 | 30 | #ifndef _CNS11643_4_H_ 31 | #define _CNS11643_4_H_ 32 | 33 | #include "reiconv_defines.h" 34 | 35 | #include "cns11643_4a.h" 36 | #include "cns11643_4b.h" 37 | 38 | static int cns11643_4_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 39 | { 40 | unsigned char c1 = s[0]; 41 | if ((c1 >= 0x21 && c1 <= 0x6e)) 42 | { 43 | if (n >= 2) 44 | { 45 | unsigned char c2 = s[1]; 46 | if (c2 >= 0x21 && c2 < 0x7f) 47 | { 48 | unsigned int i = 94 * (c1 - 0x21) + (c2 - 0x21); 49 | ucs4_t wc = 0xfffd; 50 | unsigned short swc; 51 | { 52 | if (i < 2914) 53 | swc = cns11643_4a_2uni_page21[i], wc = cns11643_4a_2uni_upages[swc >> 8] | (swc & 0xff); 54 | else if (i < 7298) 55 | swc = cns11643_4b_2uni_page40[i - 2914], wc = cns11643_4b_2uni_upages[swc >> 8] | (swc & 0xff); 56 | } 57 | if (wc != 0xfffd) 58 | { 59 | *pwc = wc; 60 | return 2; 61 | } 62 | } 63 | return RET_ILSEQ; 64 | } 65 | return RET_TOOFEW(0); 66 | } 67 | return RET_ILSEQ; 68 | } 69 | 70 | #endif /* _CNS11643_4_H_ */ 71 | -------------------------------------------------------------------------------- /lib/converters/ces_big5.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file ces_big5.h 3 | * @brief BIG-5 4 | * @copyright Copyright (C) 1999-2001, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _CES_BIG5_H_ 26 | #define _CES_BIG5_H_ 27 | 28 | #include "converters/ascii.h" 29 | #include "converters/big5.h" 30 | #include "reiconv_defines.h" 31 | 32 | #include 33 | 34 | static int ces_big5_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 35 | { 36 | unsigned char c = *s; 37 | /* Code set 0 (ASCII) */ 38 | if (c < 0x80) 39 | return ascii_mbtowc(conv, pwc, s, n); 40 | /* Code set 1 (BIG5) */ 41 | if (c >= 0xa1 && c < 0xff) 42 | { 43 | if (n < 2) 44 | return RET_TOOFEW(0); 45 | { 46 | unsigned char c2 = s[1]; 47 | if ((c2 >= 0x40 && c2 < 0x7f) || (c2 >= 0xa1 && c2 < 0xff)) 48 | return big5_mbtowc(conv, pwc, s, 2); 49 | else 50 | return RET_ILSEQ; 51 | } 52 | } 53 | return RET_ILSEQ; 54 | } 55 | 56 | static int ces_big5_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 57 | { 58 | unsigned char buf[2]; 59 | int ret; 60 | 61 | /* Code set 0 (ASCII) */ 62 | ret = ascii_wctomb(conv, r, wc, n); 63 | if (ret != RET_ILUNI) 64 | return ret; 65 | 66 | /* Code set 1 (BIG5) */ 67 | ret = big5_wctomb(conv, buf, wc, 2); 68 | if (ret != RET_ILUNI) 69 | { 70 | if (ret != 2) 71 | abort(); 72 | if (n < 2) 73 | return RET_TOOSMALL; 74 | r[0] = buf[0]; 75 | r[1] = buf[1]; 76 | return 2; 77 | } 78 | 79 | return RET_ILUNI; 80 | } 81 | 82 | #endif /* _CES_BIG5_H_ */ 83 | -------------------------------------------------------------------------------- /tests/check-encoding.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file check-encoding.hpp 3 | * @brief Simple check of a Encoding class. 4 | * @author ChenPi11 5 | * @copyright Copyright (C) 2024 The C++ Plus Project 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv Library. 9 | * 10 | * The cppp-reiconv Library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv Library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv Library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #include 26 | 27 | #include 28 | #include 29 | 30 | #include "output.hpp" 31 | 32 | #define COMPARE(name, codepage, index) Encoding(name) == Encoding(codepage) && Encoding(name) == Encoding(index) 33 | #define ASSERT(name, codepage, index) \ 34 | if (!(COMPARE(name, codepage, index))) \ 35 | { \ 36 | error("check-encoding", print_string("name={}, codepage={}, index={} are not equal.", name, \ 37 | std::to_string(codepage), std::to_string((int)index))); \ 38 | } 39 | 40 | int main() 41 | { 42 | using namespace reiconv; 43 | 44 | ASSERT("ASCII", 367, Encodings::ASCII); 45 | ASSERT("UTF-8", 65001, Encodings::UTF8); 46 | ASSERT("CP936", 936, Encodings::CP936); 47 | ASSERT("ISO-8859-1", 28591, Encodings::ISO8859_1); 48 | ASSERT("MSEE", 1250, Encodings::MSEE); 49 | ASSERT("IBM-850", 850, Encodings::IBM850); 50 | ASSERT("MacRoman", 10000, Encodings::MACINTOSH); 51 | ASSERT("MacRoman", 10000, Encodings::MAC_ROMAN); 52 | ASSERT("PT154", 154, Encodings::PT154); 53 | ASSERT("IBM-856", 856, Encodings::IBM856); 54 | ASSERT("csPC8codepage437", 437, Encodings::CP437); 55 | ASSERT("CP0037", 37, Encodings::EBCDIC_037); 56 | 57 | return EXIT_SUCCESS; 58 | } 59 | -------------------------------------------------------------------------------- /tests/check-stateful.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file check-stateful.hpp 3 | * @brief Simple check of a stateful encoding. 4 | * @author ChenPi11 5 | * @copyright Copyright (C) 2024 The C++ Plus Project 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv Library. 9 | * 10 | * The cppp-reiconv Library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv Library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv Library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #include 26 | #include 27 | #include 28 | 29 | #include "buffer.hpp" 30 | #include "cppp/reiconv.hpp" 31 | #include "reiconv-test.hpp" 32 | #include "utils.hpp" 33 | 34 | std::filesystem::path datadir; 35 | std::string charset; 36 | 37 | int main(int argc, char *argv[]) 38 | { 39 | if (argc != 3) 40 | { 41 | print_stderr("Usage: check-stateful DATADIR CHARSET\n"); 42 | return EXIT_FAILURE; 43 | } 44 | datadir = argv[1]; 45 | charset = argv[2]; 46 | 47 | std::string charsetf = replace(charset, ":", "-"); 48 | 49 | std::filesystem::path snippet_alt_file { datadir / (charsetf + "-snippet.alt") }; 50 | std::filesystem::path utf8_snippet_file { datadir / (charsetf + "-snippet.UTF-8") }; 51 | 52 | Buffer res, snippet_data; 53 | if (std::filesystem::exists(snippet_alt_file)) 54 | { 55 | res = reiconv_test(charset, reiconv::Encodings::UTF8, snippet_alt_file); 56 | snippet_data = Buffer::read_from_file(utf8_snippet_file); 57 | res.compare_assert(snippet_data); 58 | } 59 | 60 | res = reiconv_test(charset, reiconv::Encodings::UTF8, datadir / (charsetf + "-snippet")); 61 | snippet_data = Buffer::read_from_file(utf8_snippet_file); 62 | res.compare_assert(snippet_data); 63 | 64 | res = reiconv_test(reiconv::Encodings::UTF8, charset, utf8_snippet_file); 65 | snippet_data = Buffer::read_from_file(datadir / (charsetf + "-snippet")); 66 | res.compare_assert(snippet_data); 67 | 68 | success("check-stateful", charset + " OK."); 69 | 70 | return EXIT_SUCCESS; 71 | } 72 | -------------------------------------------------------------------------------- /lib/reiconv.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file reiconv.cpp 3 | * @brief reiconv C++ bindings implementation. 4 | * @copyright Copyright (C) 2024 The C++ Plus Project. 5 | */ 6 | /* 7 | * This file is part of the cppp-reiconv library. 8 | * 9 | * The cppp-reiconv library is free software; you can redistribute it 10 | * and/or modify it under the terms of the GNU Lesser General Public 11 | * License as published by the Free Software Foundation; either version 3 12 | * of the License, or (at your option) any later version. 13 | * 14 | * The cppp-reiconv library is distributed in the hope that it will be 15 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 | * Lesser General Public License for more details. 18 | * 19 | * You should have received a copy of the GNU Lesser General Public 20 | * License along with the cppp-reiconv library; see the file LICENSE. 21 | * If not, see . 22 | */ 23 | 24 | #include 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | #include 33 | 34 | _CPPP_API reiconv::Encoding::Encoding(const char *const name) 35 | { 36 | _index = ::reiconv_lookup_from_name(name); 37 | if (_index == -1) 38 | { 39 | throw std::invalid_argument("Invalid encoding name."); 40 | } 41 | } 42 | 43 | _CPPP_API reiconv::Encoding::Encoding(const int codepage) 44 | { 45 | _index = ::reiconv_lookup_from_codepage(codepage); 46 | if (_index == -1) 47 | { 48 | throw std::invalid_argument("Invalid codepage."); 49 | } 50 | } 51 | 52 | _CPPP_API reiconv::VersionInfo reiconv::version{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH}; 53 | 54 | _CPPP_API std::string reiconv::convert(reiconv::Encoding from, reiconv::Encoding to, const std::string_view input, 55 | enum ConvertFlag flag) 56 | { 57 | ::reiconv_t cd = ::reiconv_open_from_index(from, to, (enum ::ConvertFlag)flag); 58 | if (cd == (::reiconv_t)(-1)) 59 | { 60 | throw std::system_error(errno, std::system_category(), "reiconv_open_from_index"); 61 | } 62 | std::size_t outlen = ::reiconv_result_size(cd, input.data(), input.size()); 63 | 64 | std::string res(outlen, '\0'); 65 | char *result = const_cast(res.data()); 66 | if (::reiconv_convert_static_size(cd, input.data(), input.size(), result, outlen) != 0) 67 | { 68 | ::reiconv_handle_close(cd); 69 | throw std::system_error(errno, std::system_category(), "reiconv_convert"); 70 | } 71 | 72 | ::reiconv_handle_close(cd); 73 | return std::move(res); 74 | } 75 | -------------------------------------------------------------------------------- /lib/converters/dec_kanji.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file dec_kanji.h 3 | * @brief DEC-KANJI 4 | * @copyright Copyright (C) 2001, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _DEC_KANJI_H_ 26 | #define _DEC_KANJI_H_ 27 | 28 | #include "converters/ascii.h" 29 | #include "converters/jisx0208.h" 30 | #include "reiconv_defines.h" 31 | 32 | #include 33 | 34 | static int dec_kanji_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 35 | { 36 | unsigned char c = *s; 37 | /* Code set 0 (ASCII or JIS X 0201-1976 Roman) */ 38 | if (c < 0x80) 39 | return ascii_mbtowc(conv, pwc, s, n); 40 | /* Code set 1 (JIS X 0208) */ 41 | if (c >= 0xa1 && c < 0xf5) 42 | { 43 | if (n < 2) 44 | return RET_TOOFEW(0); 45 | { 46 | unsigned char c2 = s[1]; 47 | if (c2 >= 0xa1 && c2 < 0xff) 48 | { 49 | unsigned char buf[2]; 50 | buf[0] = c - 0x80; 51 | buf[1] = c2 - 0x80; 52 | return jisx0208_mbtowc(conv, pwc, buf, 2); 53 | } 54 | } 55 | } 56 | return RET_ILSEQ; 57 | } 58 | 59 | static int dec_kanji_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 60 | { 61 | unsigned char buf[2]; 62 | int ret; 63 | 64 | /* Code set 0 (ASCII or JIS X 0201-1976 Roman) */ 65 | ret = ascii_wctomb(conv, r, wc, n); 66 | if (ret != RET_ILUNI) 67 | return ret; 68 | 69 | /* Code set 1 (JIS X 0208) */ 70 | ret = jisx0208_wctomb(conv, buf, wc, 2); 71 | if (ret != RET_ILUNI) 72 | { 73 | if (ret != 2) 74 | abort(); 75 | if (n < 2) 76 | return RET_TOOSMALL; 77 | r[0] = buf[0] + 0x80; 78 | r[1] = buf[1] + 0x80; 79 | return 2; 80 | } 81 | 82 | return RET_ILUNI; 83 | } 84 | 85 | #endif /* _DEC_KANJI_H_ */ 86 | -------------------------------------------------------------------------------- /lib/converters/euc_cn.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file euc_cn.h 3 | * @brief EUC-CN 4 | * @copyright Copyright (C) 1999-2001, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _EUC_CN_H_ 26 | #define _EUC_CN_H_ 27 | 28 | #include "converters/ascii.h" 29 | #include "converters/gb2312.h" 30 | #include "reiconv_defines.h" 31 | 32 | #include 33 | 34 | static int euc_cn_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 35 | { 36 | unsigned char c = *s; 37 | /* Code set 0 (ASCII or GB 1988-89) */ 38 | if (c < 0x80) 39 | return ascii_mbtowc(conv, pwc, s, n); 40 | /* Code set 1 (GB 2312-1980) */ 41 | if (c >= 0xa1 && c < 0xff) 42 | { 43 | if (n < 2) 44 | return RET_TOOFEW(0); 45 | { 46 | unsigned char c2 = s[1]; 47 | if (c2 >= 0xa1 && c2 < 0xff) 48 | { 49 | unsigned char buf[2]; 50 | buf[0] = c - 0x80; 51 | buf[1] = c2 - 0x80; 52 | return gb2312_mbtowc(conv, pwc, buf, 2); 53 | } 54 | else 55 | return RET_ILSEQ; 56 | } 57 | } 58 | return RET_ILSEQ; 59 | } 60 | 61 | static int euc_cn_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 62 | { 63 | unsigned char buf[2]; 64 | int ret; 65 | 66 | /* Code set 0 (ASCII or GB 1988-89) */ 67 | ret = ascii_wctomb(conv, r, wc, n); 68 | if (ret != RET_ILUNI) 69 | return ret; 70 | 71 | /* Code set 1 (GB 2312-1980) */ 72 | ret = gb2312_wctomb(conv, buf, wc, 2); 73 | if (ret != RET_ILUNI) 74 | { 75 | if (ret != 2) 76 | abort(); 77 | if (n < 2) 78 | return RET_TOOSMALL; 79 | r[0] = buf[0] + 0x80; 80 | r[1] = buf[1] + 0x80; 81 | return 2; 82 | } 83 | 84 | return RET_ILUNI; 85 | } 86 | 87 | #endif /* _EUC_CN_H_ */ 88 | -------------------------------------------------------------------------------- /lib/converters/gb12345.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file gb12345.h 3 | * @brief GB/T 12345-1990 4 | * @copyright Copyright (C) 1999-2001, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | /* 26 | * GB/T 12345-1990 is a traditional chinese counterpart of GB 2312-1986. 27 | * According to the unicode.org tables: 28 | * 2146 characters have been changed to their traditional counterpart, 29 | * 103 characters have been added, no characters have been removed. 30 | * Therefore we use an auxiliary table, which contains only the changes. 31 | */ 32 | 33 | #ifndef _GB12345_H_ 34 | #define _GB12345_H_ 35 | 36 | #include "converters/gb2312.h" 37 | #include "reiconv_defines.h" 38 | 39 | #include "gb12345ext.h" 40 | 41 | static int gb12345_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 42 | { 43 | int ret; 44 | 45 | /* The gb12345ext table overrides some entries in the gb2312 table. */ 46 | /* Try the GB12345 extensions -> Unicode table. */ 47 | ret = gb12345ext_mbtowc(conv, pwc, s, n); 48 | if (ret != RET_ILSEQ) 49 | return ret; 50 | /* Try the GB2312 -> Unicode table. */ 51 | ret = gb2312_mbtowc(conv, pwc, s, n); 52 | return ret; 53 | } 54 | 55 | static int gb12345_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 56 | { 57 | int ret; 58 | 59 | /* The gb12345ext table overrides some entries in the gb2312 table. */ 60 | /* Try the Unicode -> GB12345 extensions table. */ 61 | ret = gb12345ext_wctomb(conv, r, wc, n); 62 | if (ret != RET_ILUNI) 63 | return ret; 64 | /* Try the Unicode -> GB2312 table, and check that the resulting GB2312 65 | byte sequence is not overridden by the GB12345 extensions table. */ 66 | ret = gb2312_wctomb(conv, r, wc, n); 67 | if (ret == 2 && gb12345ext_mbtowc(conv, &wc, r, 2) == 2) 68 | return RET_ILUNI; 69 | else 70 | return ret; 71 | } 72 | 73 | #endif /* _GB12345_H_ */ 74 | -------------------------------------------------------------------------------- /.rubisco/dist.yml: -------------------------------------------------------------------------------- 1 | name: 📁 Make ${{ project.name }} distribution package 2 | 3 | vars: 4 | - ignores: 5 | [ 6 | ".git", 7 | ".venv", 8 | ".github", 9 | "build", 10 | "install", 11 | "dist", 12 | "CMakeFiles", 13 | "cmake_install.cmake", 14 | "CMakeCache.txt", 15 | "CTestTestfile.cmake", 16 | "DartConfiguration.tcl", 17 | "Makefile", 18 | "cmake_uninstall.cmake", 19 | "reiconv.hpp", 20 | "CTestCostData.txt", 21 | "Testing", 22 | "install_manifest.txt", 23 | "CPackConfig.cmake", 24 | "CPackSourceConfig.cmake", 25 | "_CPack_Packages", 26 | "tests/check-ascii-converters", 27 | "tests/check-stateful", 28 | "tests/check-stateless", 29 | "tests/data-generator", 30 | "tests/sort", 31 | "windows/cppp-reiconv.rc", 32 | "pdb", 33 | "*.inst", 34 | "*.log", 35 | "*.dSYM", 36 | "*.so", 37 | "*.so.*.*.*", 38 | "*.a", 39 | "*.dylib", 40 | "*.stackdump", 41 | "*.lib", 42 | "*.pdb", 43 | "*.dll", 44 | "*.exe", 45 | "*.sln", 46 | "*.vcxproj", 47 | "*.vcxproj.filters", 48 | "*.vcxproj.user", 49 | "*.vcxproj.user.*", 50 | "build.ninja", 51 | ".ninja_*", 52 | "Release", 53 | "Debug", 54 | "RelWithDebInfo", 55 | "MinSizeRel", 56 | "x64", 57 | "x86", 58 | "ARM", 59 | "ARM64", 60 | "*.suo", 61 | "*.user", 62 | "*.user.*", 63 | "*.dir", 64 | "__pycache__", 65 | "dist", 66 | "cppp-reiconv-*", 67 | "*.tar.xz", 68 | "*.deb", 69 | "lib/generated/genaliases", 70 | "*.gperf", 71 | "*.orig", 72 | "*.rej", 73 | "*~", 74 | "*.kate-swp", 75 | "*.swp", 76 | ".vscode", 77 | ".cache", 78 | "build-aux/", 79 | "tests/data/UTF-8.TXT", 80 | "tests/data/GB18030-2005.TXT", 81 | "tests/data/GB18030-2022.TXT", 82 | "tests/tmp-*.TXT", 83 | "*.tmp", 84 | ] 85 | 86 | steps: 87 | - name: 📦 Make source distribution directory 88 | mkdir: ${{ project.name }}-${{ project.version }} 89 | - mkdir: dist 90 | 91 | - name: 📦 Copy files to source distribution directory 92 | copy: ${{ cwd }} 93 | to: ${{ project.name }}-${{ project.version }} 94 | excludes: ${{ ignores }} 95 | 96 | - name: 📦 Packing the source distribution 97 | compress: ${{ project.name }}-${{ project.version }} 98 | to: dist/${{ project.name }}-${{ project.version }} 99 | format: [zip, tar.xz] 100 | -------------------------------------------------------------------------------- /include/iconv.h.in: -------------------------------------------------------------------------------- 1 | /** 2 | * @file iconv.h 3 | * @author ChenPi11 4 | * @brief reiconv iconv API compatibility header. 5 | * @version 3.0.0 6 | * @copyright Copyright (C) 2024 The C++ Plus Project. 7 | */ 8 | /* 9 | * This file is part of the cppp-reiconv Library. 10 | * 11 | * The cppp-reiconv Library is free software; you can redistribute it 12 | * and/or modify it under the terms of the GNU Lesser General Public 13 | * License as published by the Free Software Foundation; either version 3 14 | * of the License, or (at your option) any later version. 15 | * 16 | * The cppp-reiconv Library is distributed in the hope that it will be 17 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 | * Lesser General Public License for more details. 20 | * 21 | * You should have received a copy of the GNU Lesser General Public 22 | * License along with the cppp-reiconv Library; see the file LICENSE. 23 | * If not, see . 24 | */ 25 | 26 | #ifndef _ICONV_H_ 27 | #define _ICONV_H_ 28 | 29 | #if _MSC_VER >= 1600 30 | #pragma execution_character_set("utf-8") 31 | #endif 32 | 33 | #include 34 | 35 | #undef iconv_t 36 | #undef iconv_open 37 | #undef iconv 38 | #undef iconv_close 39 | 40 | /** 41 | * @brief Iconv conversion descriptor type. 42 | * @note This type is equivalent to `reiconv_t`. 43 | * @see reiconv_t 44 | */ 45 | #define iconv_t reiconv_t 46 | 47 | /** 48 | * @brief Open a conversion descriptor. For iconv compatibility. 49 | * @param tocode The target encoding. Supports "//IGNORE". 50 | * @param fromcode The source encoding. 51 | * @return The conversion descriptor. (iconv_t)(-1) on error with errno set. 52 | */ 53 | #define iconv_open reiconv_open 54 | 55 | /** 56 | * @brief Convert at most `*inbytesleft` bytes from `*inbuf` according to the code conversion 57 | * algorithm specified by `cd` and place up to `*outbytesleft` bytes in buffer at `*outbuf`. 58 | * @param cd The conversion descriptor. 59 | * @param inbuf The input buffer. 60 | * @param inbytesleft The number of bytes in the input buffer. 61 | * @param outbuf The output buffer. 62 | * @param outbytesleft The number of bytes in the output buffer. 63 | * @return The number of bytes converted, or (size_t)(-1) on error with errno set. 64 | * @note This function is equivalent to `reiconv_iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft)`. 65 | * @see reiconv_iconv 66 | */ 67 | #define iconv reiconv_iconv 68 | 69 | /** 70 | * @brief Close the conversion descriptor. 71 | * @param cd The conversion descriptor. 72 | * @return 0 on success, -1 on error with errno set. 73 | * @note This function is equivalent to `reiconv_handle_close(cd)`. 74 | * @see reiconv_handle_close 75 | */ 76 | #define iconv_close reiconv_handle_close 77 | 78 | #endif /* _ICONV_H_ */ 79 | -------------------------------------------------------------------------------- /lib/converters/euc_kr.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file euc_kr.h 3 | * @brief EUC-KR 4 | * @copyright Copyright (C) 1999-2001, 2007, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _EUC_KR_H_ 26 | #define _EUC_KR_H_ 27 | 28 | #include "converters/ascii.h" 29 | #include "converters/ksc5601.h" 30 | #include "reiconv_defines.h" 31 | #include 32 | 33 | /* Specification: RFC 1557 */ 34 | 35 | static int euc_kr_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 36 | { 37 | unsigned char c = *s; 38 | /* Code set 0 (ASCII or KS C 5636-1993) */ 39 | if (c < 0x80) 40 | return ascii_mbtowc(conv, pwc, s, n); 41 | /* Code set 1 (KS C 5601-1992, now KS X 1001:2002) */ 42 | if (c >= 0xa1 && c < 0xff) 43 | { 44 | if (n < 2) 45 | return RET_TOOFEW(0); 46 | { 47 | unsigned char c2 = s[1]; 48 | if (c2 >= 0xa1 && c2 < 0xff) 49 | { 50 | unsigned char buf[2]; 51 | buf[0] = c - 0x80; 52 | buf[1] = c2 - 0x80; 53 | return ksc5601_mbtowc(conv, pwc, buf, 2); 54 | } 55 | else 56 | return RET_ILSEQ; 57 | } 58 | } 59 | return RET_ILSEQ; 60 | } 61 | 62 | static int euc_kr_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 63 | { 64 | unsigned char buf[2]; 65 | int ret; 66 | 67 | /* Code set 0 (ASCII or KS C 5636-1993) */ 68 | ret = ascii_wctomb(conv, r, wc, n); 69 | if (ret != RET_ILUNI) 70 | return ret; 71 | 72 | /* Code set 1 (KS C 5601-1992, now KS X 1001:2002) */ 73 | ret = ksc5601_wctomb(conv, buf, wc, 2); 74 | if (ret != RET_ILUNI) 75 | { 76 | if (ret != 2) 77 | abort(); 78 | if (n < 2) 79 | return RET_TOOSMALL; 80 | r[0] = buf[0] + 0x80; 81 | r[1] = buf[1] + 0x80; 82 | return 2; 83 | } 84 | 85 | return RET_ILUNI; 86 | } 87 | 88 | #endif /* _EUC_KR_H_ */ 89 | -------------------------------------------------------------------------------- /tools/check-encodings.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file check-encodings.cpp 3 | * @brief Check if all names in `encodings.h.snippet` are in upper case. 4 | * @copyright Copyright (C) 2024 The C++ Plus Project. 5 | * @note `name_canonicalize()` will upper the encoding name. So lower-case cannot be recognized. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv Library. 9 | * 10 | * The cppp-reiconv Library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv Library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv Library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #include 26 | #include 27 | #include 28 | 29 | template void check_upper_case(const char *const (&names)[N], ...) 30 | { 31 | for (std::size_t i = 0; i < N; i++) 32 | { 33 | const char *name = names[i]; 34 | for (; *name; name++) 35 | { 36 | unsigned char c = *(unsigned char *)name; 37 | if (std::islower(c)) 38 | { 39 | std::fprintf(stderr, "Encoding name `%s` is not in upper case.\n", names[i]); 40 | std::exit(EXIT_FAILURE); 41 | } 42 | } 43 | } 44 | } 45 | 46 | int main(int argc, char *argv[]) 47 | { 48 | if (argc != 1) 49 | { 50 | std::fprintf(stderr, "Usage: %s\n", argv[0]); 51 | return EXIT_FAILURE; 52 | } 53 | 54 | #define DEFENCODING(xxx_names, xxx, xxx_index, xxx_ifuncs1, xxx_ifuncs2, xxx_ofuncs1, xxx_ofuncs2) \ 55 | { \ 56 | static const char *const names[] = BRACIFY xxx_names; \ 57 | check_upper_case(names); \ 58 | } 59 | #define DEFCODEPAGE(codepage, xxx) 60 | #define DEFINDEX(alias, index) 61 | #define BRACIFY(...) \ 62 | { \ 63 | __VA_ARGS__ \ 64 | } 65 | 66 | #include "encodings.h.snippet" 67 | 68 | return EXIT_SUCCESS; 69 | } 70 | -------------------------------------------------------------------------------- /lib/converters/ucs2.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file ucs2.h 3 | * @brief UCS-2 4 | * @copyright Copyright (C) 1999-2024 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _UCS2_H_ 26 | #define _UCS2_H_ 27 | 28 | #include "reiconv_defines.h" 29 | 30 | #include 31 | 32 | /* Here we accept FFFE/FEFF marks as endianness indicators everywhere 33 | in the stream, not just at the beginning. The default is big-endian. */ 34 | /* The state is 0 if big-endian, 1 if little-endian. */ 35 | static int ucs2_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 36 | { 37 | state_t state = conv->ibyteorder; 38 | int count = 0; 39 | for (; n >= 2 && count <= RET_COUNT_MAX && count <= INT_MAX - 2;) 40 | { 41 | ucs4_t wc = (state ? s[0] + (s[1] << 8) : (s[0] << 8) + s[1]); 42 | if (wc == 0xfeff) 43 | { 44 | } 45 | else if (wc == 0xfffe) 46 | { 47 | state ^= 1; 48 | } 49 | else if (wc >= 0xd800 && wc < 0xe000) 50 | { 51 | conv->ibyteorder = state; 52 | return RET_SHIFT_ILSEQ(count); 53 | } 54 | else 55 | { 56 | *pwc = wc; 57 | conv->ibyteorder = state; 58 | return count + 2; 59 | } 60 | s += 2; 61 | n -= 2; 62 | count += 2; 63 | } 64 | conv->ibyteorder = state; 65 | return RET_TOOFEW(count); 66 | } 67 | 68 | /* But we output UCS-2 in big-endian order, without byte-order mark. */ 69 | /* RFC 2152 says: 70 | "ISO/IEC 10646-1:1993(E) specifies that when characters the UCS-2 form are 71 | serialized as octets, that the most significant octet appear first." */ 72 | static int ucs2_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 73 | { 74 | if (wc < 0x10000 && wc != 0xfffe && !(wc >= 0xd800 && wc < 0xe000)) 75 | { 76 | if (n >= 2) 77 | { 78 | r[0] = (unsigned char)(wc >> 8); 79 | r[1] = (unsigned char)wc; 80 | return 2; 81 | } 82 | else 83 | return RET_TOOSMALL; 84 | } 85 | else 86 | return RET_ILUNI; 87 | } 88 | 89 | #endif /* _UCS2_H_ */ 90 | -------------------------------------------------------------------------------- /lib/converters/ucs4.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file ucs4.h 3 | * @brief UCS-4 4 | * @copyright Copyright (C) 1999-2024 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _UCS4_H_ 26 | #define _UCS4_H_ 27 | 28 | #include "reiconv_defines.h" 29 | 30 | #include 31 | 32 | /* Here we accept FFFE0000/0000FEFF marks as endianness indicators everywhere 33 | in the stream, not just at the beginning. The default is big-endian. */ 34 | /* The state is 0 if big-endian, 1 if little-endian. */ 35 | static int ucs4_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 36 | { 37 | state_t state = conv->ibyteorder; 38 | int count = 0; 39 | for (; n >= 4 && count <= RET_COUNT_MAX && count <= INT_MAX - 4;) 40 | { 41 | ucs4_t wc = (state ? (ucs4_t)s[0] + ((ucs4_t)s[1] << 8) + ((ucs4_t)s[2] << 16) + ((ucs4_t)s[3] << 24) 42 | : ((ucs4_t)s[0] << 24) + ((ucs4_t)s[1] << 16) + ((ucs4_t)s[2] << 8) + (ucs4_t)s[3]); 43 | if (wc == 0x0000feff) 44 | { 45 | } 46 | else if (wc == 0xfffe0000u) 47 | { 48 | state ^= 1; 49 | } 50 | else if (wc <= 0x7fffffff) 51 | { 52 | *pwc = wc; 53 | conv->ibyteorder = state; 54 | return count + 4; 55 | } 56 | else 57 | { 58 | conv->ibyteorder = state; 59 | return RET_SHIFT_ILSEQ(count); 60 | } 61 | s += 4; 62 | n -= 4; 63 | count += 4; 64 | } 65 | conv->ibyteorder = state; 66 | return RET_TOOFEW(count); 67 | } 68 | 69 | /* But we output UCS-4 in big-endian order, without byte-order mark. */ 70 | static int ucs4_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 71 | { 72 | if (wc <= 0x7fffffff) 73 | { 74 | if (n >= 4) 75 | { 76 | r[0] = (unsigned char)(wc >> 24); 77 | r[1] = (unsigned char)(wc >> 16); 78 | r[2] = (unsigned char)(wc >> 8); 79 | r[3] = (unsigned char)wc; 80 | return 4; 81 | } 82 | else 83 | return RET_TOOSMALL; 84 | } 85 | else 86 | return RET_ILUNI; 87 | } 88 | 89 | #endif /* _UCS4_H_ */ 90 | -------------------------------------------------------------------------------- /lib/converters/utf16be.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file utf16be.h 3 | * @brief UTF-16BE 4 | * @copyright Copyright (C) 1999-2001, 2008, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _UTF16BE_H_ 26 | #define _UTF16BE_H_ 27 | 28 | #include "reiconv_defines.h" 29 | 30 | /* Specification: RFC 2781 */ 31 | 32 | static int utf16be_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 33 | { 34 | int count = 0; 35 | if (n >= 2) 36 | { 37 | ucs4_t wc = (s[0] << 8) + s[1]; 38 | if (wc >= 0xd800 && wc < 0xdc00) 39 | { 40 | if (n >= 4) 41 | { 42 | ucs4_t wc2 = (s[2] << 8) + s[3]; 43 | if (!(wc2 >= 0xdc00 && wc2 < 0xe000)) 44 | goto ilseq; 45 | *pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00); 46 | return count + 4; 47 | } 48 | } 49 | else if (wc >= 0xdc00 && wc < 0xe000) 50 | { 51 | goto ilseq; 52 | } 53 | else 54 | { 55 | *pwc = wc; 56 | return count + 2; 57 | } 58 | } 59 | return RET_TOOFEW(count); 60 | 61 | ilseq: 62 | return RET_SHIFT_ILSEQ(count); 63 | } 64 | 65 | static int utf16be_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 66 | { 67 | if (!(wc >= 0xd800 && wc < 0xe000)) 68 | { 69 | if (wc < 0x10000) 70 | { 71 | if (n >= 2) 72 | { 73 | r[0] = (unsigned char)(wc >> 8); 74 | r[1] = (unsigned char)wc; 75 | return 2; 76 | } 77 | else 78 | return RET_TOOSMALL; 79 | } 80 | else if (wc < 0x110000) 81 | { 82 | if (n >= 4) 83 | { 84 | ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10); 85 | ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff); 86 | r[0] = (unsigned char)(wc1 >> 8); 87 | r[1] = (unsigned char)wc1; 88 | r[2] = (unsigned char)(wc2 >> 8); 89 | r[3] = (unsigned char)wc2; 90 | return 4; 91 | } 92 | else 93 | return RET_TOOSMALL; 94 | } 95 | } 96 | return RET_ILUNI; 97 | } 98 | 99 | #endif /* _UTF16BE_H_ */ 100 | -------------------------------------------------------------------------------- /lib/converters/utf16le.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file utf16le.h 3 | * @brief UTF-16LE 4 | * @copyright Copyright (C) 1999-2001, 2008, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _UTF16LE_H_ 26 | #define _UTF16LE_H_ 27 | 28 | #include "reiconv_defines.h" 29 | 30 | /* Specification: RFC 2781 */ 31 | 32 | static int utf16le_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 33 | { 34 | int count = 0; 35 | if (n >= 2) 36 | { 37 | ucs4_t wc = s[0] + (s[1] << 8); 38 | if (wc >= 0xd800 && wc < 0xdc00) 39 | { 40 | if (n >= 4) 41 | { 42 | ucs4_t wc2 = s[2] + (s[3] << 8); 43 | if (!(wc2 >= 0xdc00 && wc2 < 0xe000)) 44 | goto ilseq; 45 | *pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00); 46 | return count + 4; 47 | } 48 | } 49 | else if (wc >= 0xdc00 && wc < 0xe000) 50 | { 51 | goto ilseq; 52 | } 53 | else 54 | { 55 | *pwc = wc; 56 | return count + 2; 57 | } 58 | } 59 | return RET_TOOFEW(count); 60 | 61 | ilseq: 62 | return RET_SHIFT_ILSEQ(count); 63 | } 64 | 65 | static int utf16le_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 66 | { 67 | if (!(wc >= 0xd800 && wc < 0xe000)) 68 | { 69 | if (wc < 0x10000) 70 | { 71 | if (n >= 2) 72 | { 73 | r[0] = (unsigned char)wc; 74 | r[1] = (unsigned char)(wc >> 8); 75 | return 2; 76 | } 77 | else 78 | return RET_TOOSMALL; 79 | } 80 | else if (wc < 0x110000) 81 | { 82 | if (n >= 4) 83 | { 84 | ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10); 85 | ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff); 86 | r[0] = (unsigned char)wc1; 87 | r[1] = (unsigned char)(wc1 >> 8); 88 | r[2] = (unsigned char)wc2; 89 | r[3] = (unsigned char)(wc2 >> 8); 90 | return 4; 91 | } 92 | else 93 | return RET_TOOSMALL; 94 | } 95 | } 96 | return RET_ILUNI; 97 | } 98 | 99 | #endif /* _UTF16LE_H_ */ 100 | -------------------------------------------------------------------------------- /lib/converters/iso8859_15.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file iso8859_15.h 3 | * @brief ISO-8859-15 4 | * @copyright Copyright (C) 1999-2001, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _ISO8859_15_H_ 26 | #define _ISO8859_15_H_ 27 | 28 | #include "reiconv_defines.h" 29 | 30 | static const unsigned short iso8859_15_2uni[32] = { 31 | /* 0xa0 */ 32 | 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7, 33 | 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 34 | /* 0xb0 */ 35 | 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7, 36 | 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf, 37 | }; 38 | 39 | static int iso8859_15_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 40 | { 41 | unsigned char c = *s; 42 | if (c >= 0xa0 && c < 0xc0) 43 | *pwc = (ucs4_t)iso8859_15_2uni[c - 0xa0]; 44 | else 45 | *pwc = (ucs4_t)c; 46 | return 1; 47 | } 48 | 49 | static const unsigned char iso8859_15_page00[32] = { 50 | 0xa0, 0xa1, 0xa2, 0xa3, 0x00, 0xa5, 0x00, 0xa7, /* 0xa0-0xa7 */ 51 | 0x00, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */ 52 | 0xb0, 0xb1, 0xb2, 0xb3, 0x00, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */ 53 | 0x00, 0xb9, 0xba, 0xbb, 0x00, 0x00, 0x00, 0xbf, /* 0xb8-0xbf */ 54 | }; 55 | static const unsigned char iso8859_15_page01[48] = { 56 | 0x00, 0x00, 0xbc, 0xbd, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 57 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 58 | 0xa6, 0xa8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 59 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 60 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 61 | 0xbe, 0x00, 0x00, 0x00, 0x00, 0xb4, 0xb8, 0x00, /* 0x78-0x7f */ 62 | }; 63 | 64 | static int iso8859_15_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 65 | { 66 | unsigned char c = 0; 67 | if (wc < 0x00a0) 68 | { 69 | *r = wc; 70 | return 1; 71 | } 72 | else if (wc >= 0x00a0 && wc < 0x00c0) 73 | c = iso8859_15_page00[wc - 0x00a0]; 74 | else if (wc >= 0x00c0 && wc < 0x0100) 75 | c = wc; 76 | else if (wc >= 0x0150 && wc < 0x0180) 77 | c = iso8859_15_page01[wc - 0x0150]; 78 | else if (wc == 0x20ac) 79 | c = 0xa4; 80 | if (c != 0) 81 | { 82 | *r = c; 83 | return 1; 84 | } 85 | return RET_ILUNI; 86 | } 87 | 88 | #endif /* _ISO8859_15_H_ */ 89 | -------------------------------------------------------------------------------- /tests/data/JIS_X0201.TXT: -------------------------------------------------------------------------------- 1 | 0x00 0x0000 2 | 0x01 0x0001 3 | 0x02 0x0002 4 | 0x03 0x0003 5 | 0x04 0x0004 6 | 0x05 0x0005 7 | 0x06 0x0006 8 | 0x07 0x0007 9 | 0x08 0x0008 10 | 0x09 0x0009 11 | 0x0A 0x000A 12 | 0x0B 0x000B 13 | 0x0C 0x000C 14 | 0x0D 0x000D 15 | 0x0E 0x000E 16 | 0x0F 0x000F 17 | 0x10 0x0010 18 | 0x11 0x0011 19 | 0x12 0x0012 20 | 0x13 0x0013 21 | 0x14 0x0014 22 | 0x15 0x0015 23 | 0x16 0x0016 24 | 0x17 0x0017 25 | 0x18 0x0018 26 | 0x19 0x0019 27 | 0x1A 0x001A 28 | 0x1B 0x001B 29 | 0x1C 0x001C 30 | 0x1D 0x001D 31 | 0x1E 0x001E 32 | 0x1F 0x001F 33 | 0x20 0x0020 34 | 0x21 0x0021 35 | 0x22 0x0022 36 | 0x23 0x0023 37 | 0x24 0x0024 38 | 0x25 0x0025 39 | 0x26 0x0026 40 | 0x27 0x0027 41 | 0x28 0x0028 42 | 0x29 0x0029 43 | 0x2A 0x002A 44 | 0x2B 0x002B 45 | 0x2C 0x002C 46 | 0x2D 0x002D 47 | 0x2E 0x002E 48 | 0x2F 0x002F 49 | 0x30 0x0030 50 | 0x31 0x0031 51 | 0x32 0x0032 52 | 0x33 0x0033 53 | 0x34 0x0034 54 | 0x35 0x0035 55 | 0x36 0x0036 56 | 0x37 0x0037 57 | 0x38 0x0038 58 | 0x39 0x0039 59 | 0x3A 0x003A 60 | 0x3B 0x003B 61 | 0x3C 0x003C 62 | 0x3D 0x003D 63 | 0x3E 0x003E 64 | 0x3F 0x003F 65 | 0x40 0x0040 66 | 0x41 0x0041 67 | 0x42 0x0042 68 | 0x43 0x0043 69 | 0x44 0x0044 70 | 0x45 0x0045 71 | 0x46 0x0046 72 | 0x47 0x0047 73 | 0x48 0x0048 74 | 0x49 0x0049 75 | 0x4A 0x004A 76 | 0x4B 0x004B 77 | 0x4C 0x004C 78 | 0x4D 0x004D 79 | 0x4E 0x004E 80 | 0x4F 0x004F 81 | 0x50 0x0050 82 | 0x51 0x0051 83 | 0x52 0x0052 84 | 0x53 0x0053 85 | 0x54 0x0054 86 | 0x55 0x0055 87 | 0x56 0x0056 88 | 0x57 0x0057 89 | 0x58 0x0058 90 | 0x59 0x0059 91 | 0x5A 0x005A 92 | 0x5B 0x005B 93 | 0x5C 0x00A5 94 | 0x5D 0x005D 95 | 0x5E 0x005E 96 | 0x5F 0x005F 97 | 0x60 0x0060 98 | 0x61 0x0061 99 | 0x62 0x0062 100 | 0x63 0x0063 101 | 0x64 0x0064 102 | 0x65 0x0065 103 | 0x66 0x0066 104 | 0x67 0x0067 105 | 0x68 0x0068 106 | 0x69 0x0069 107 | 0x6A 0x006A 108 | 0x6B 0x006B 109 | 0x6C 0x006C 110 | 0x6D 0x006D 111 | 0x6E 0x006E 112 | 0x6F 0x006F 113 | 0x70 0x0070 114 | 0x71 0x0071 115 | 0x72 0x0072 116 | 0x73 0x0073 117 | 0x74 0x0074 118 | 0x75 0x0075 119 | 0x76 0x0076 120 | 0x77 0x0077 121 | 0x78 0x0078 122 | 0x79 0x0079 123 | 0x7A 0x007A 124 | 0x7B 0x007B 125 | 0x7C 0x007C 126 | 0x7D 0x007D 127 | 0x7E 0x203E 128 | 0x7F 0x007F 129 | 0xA1 0xFF61 130 | 0xA2 0xFF62 131 | 0xA3 0xFF63 132 | 0xA4 0xFF64 133 | 0xA5 0xFF65 134 | 0xA6 0xFF66 135 | 0xA7 0xFF67 136 | 0xA8 0xFF68 137 | 0xA9 0xFF69 138 | 0xAA 0xFF6A 139 | 0xAB 0xFF6B 140 | 0xAC 0xFF6C 141 | 0xAD 0xFF6D 142 | 0xAE 0xFF6E 143 | 0xAF 0xFF6F 144 | 0xB0 0xFF70 145 | 0xB1 0xFF71 146 | 0xB2 0xFF72 147 | 0xB3 0xFF73 148 | 0xB4 0xFF74 149 | 0xB5 0xFF75 150 | 0xB6 0xFF76 151 | 0xB7 0xFF77 152 | 0xB8 0xFF78 153 | 0xB9 0xFF79 154 | 0xBA 0xFF7A 155 | 0xBB 0xFF7B 156 | 0xBC 0xFF7C 157 | 0xBD 0xFF7D 158 | 0xBE 0xFF7E 159 | 0xBF 0xFF7F 160 | 0xC0 0xFF80 161 | 0xC1 0xFF81 162 | 0xC2 0xFF82 163 | 0xC3 0xFF83 164 | 0xC4 0xFF84 165 | 0xC5 0xFF85 166 | 0xC6 0xFF86 167 | 0xC7 0xFF87 168 | 0xC8 0xFF88 169 | 0xC9 0xFF89 170 | 0xCA 0xFF8A 171 | 0xCB 0xFF8B 172 | 0xCC 0xFF8C 173 | 0xCD 0xFF8D 174 | 0xCE 0xFF8E 175 | 0xCF 0xFF8F 176 | 0xD0 0xFF90 177 | 0xD1 0xFF91 178 | 0xD2 0xFF92 179 | 0xD3 0xFF93 180 | 0xD4 0xFF94 181 | 0xD5 0xFF95 182 | 0xD6 0xFF96 183 | 0xD7 0xFF97 184 | 0xD8 0xFF98 185 | 0xD9 0xFF99 186 | 0xDA 0xFF9A 187 | 0xDB 0xFF9B 188 | 0xDC 0xFF9C 189 | 0xDD 0xFF9D 190 | 0xDE 0xFF9E 191 | 0xDF 0xFF9F 192 | -------------------------------------------------------------------------------- /lib/localecharset/lc_utils.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file lc_utils.h 3 | * @brief Utilities for locale charset. 4 | * @copyright Copyright (C) 2024 The C++ Plus Project. 5 | */ 6 | /* 7 | * This file is part of the cppp-reiconv library. 8 | * 9 | * The cppp-reiconv library is free software; you can redistribute it 10 | * and/or modify it under the terms of the GNU Lesser General Public 11 | * License as published by the Free Software Foundation; either version 3 12 | * of the License, or (at your option) any later version. 13 | * 14 | * The cppp-reiconv library is distributed in the hope that it will be 15 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 | * Lesser General Public License for more details. 18 | * 19 | * You should have received a copy of the GNU Lesser General Public 20 | * License along with the cppp-reiconv library; see the file LICENSE. 21 | * If not, see . 22 | */ 23 | 24 | #ifndef _LC_UTILS_H_ 25 | #define _LC_UTILS_H_ 26 | 27 | #include 28 | 29 | #if defined(__APPLE__) && defined(__MACH__) 30 | #define IS_MACOSX 1 31 | #else 32 | #define IS_MACOSX 0 33 | #endif // defined(__APPLE__) && defined(__MACH__) 34 | 35 | #define IS_DARWIN7 (__has_macos__ && IS_MACOSX && HAVE_LANGINFO_CODESET) 36 | 37 | #define IS_WINDOWS_NATIVE (__has_windows__ && !__has_cygwin__) 38 | 39 | #define IS_INVALID_LOCALE(locale) (locale == NULL || locale[0] == '\0') 40 | 41 | #define GET_LOCALE(locale) \ 42 | do \ 43 | { \ 44 | locale = getenv("LC_ALL"); \ 45 | if (IS_INVALID_LOCALE(locale)) \ 46 | { \ 47 | locale = getenv("LC_CTYPE"); \ 48 | if (IS_INVALID_LOCALE(locale)) \ 49 | { \ 50 | locale = getenv("LANG"); \ 51 | if (IS_INVALID_LOCALE(locale)) \ 52 | { \ 53 | locale = setlocale(LC_CTYPE, NULL); \ 54 | } \ 55 | } \ 56 | } \ 57 | } while (0) 58 | 59 | #endif // _LC_UTILS_H_ 60 | -------------------------------------------------------------------------------- /tests/data/CP1258.IRREVERSIBLE.TXT: -------------------------------------------------------------------------------- 1 | 0x41D2 0x1EA2 2 | 0x41DE 0x00C3 3 | 0x41F2 0x1EA0 4 | 0x42F2 0x1E04 5 | 0x43EC 0x0106 6 | 0x44F2 0x1E0C 7 | 0x45D2 0x1EBA 8 | 0x45DE 0x1EBC 9 | 0x45F2 0x1EB8 10 | 0x47EC 0x01F4 11 | 0x48F2 0x1E24 12 | 0x49CC 0x00CC 13 | 0x49D2 0x1EC8 14 | 0x49DE 0x0128 15 | 0x49F2 0x1ECA 16 | 0x4BEC 0x1E30 17 | 0x4BF2 0x1E32 18 | 0x4CEC 0x0139 19 | 0x4CF2 0x1E36 20 | 0x4DEC 0x1E3E 21 | 0x4DF2 0x1E42 22 | 0x4ECC 0x01F8 23 | 0x4EEC 0x0143 24 | 0x4EF2 0x1E46 25 | 0x4FCC 0x00D2 26 | 0x4FD2 0x1ECE 27 | 0x4FDE 0x00D5 28 | 0x4FF2 0x1ECC 29 | 0x50EC 0x1E54 30 | 0x52EC 0x0154 31 | 0x52F2 0x1E5A 32 | 0x53EC 0x015A 33 | 0x53F2 0x1E62 34 | 0x54F2 0x1E6C 35 | 0x55D2 0x1EE6 36 | 0x55DE 0x0168 37 | 0x55F2 0x1EE4 38 | 0x56DE 0x1E7C 39 | 0x56F2 0x1E7E 40 | 0x57CC 0x1E80 41 | 0x57EC 0x1E82 42 | 0x57F2 0x1E88 43 | 0x59CC 0x1EF2 44 | 0x59D2 0x1EF6 45 | 0x59DE 0x1EF8 46 | 0x59EC 0x00DD 47 | 0x59F2 0x1EF4 48 | 0x5AEC 0x0179 49 | 0x5AF2 0x1E92 50 | 0x61D2 0x1EA3 51 | 0x61DE 0x00E3 52 | 0x61F2 0x1EA1 53 | 0x62F2 0x1E05 54 | 0x63EC 0x0107 55 | 0x64F2 0x1E0D 56 | 0x65D2 0x1EBB 57 | 0x65DE 0x1EBD 58 | 0x65F2 0x1EB9 59 | 0x67EC 0x01F5 60 | 0x68F2 0x1E25 61 | 0x69CC 0x00EC 62 | 0x69D2 0x1EC9 63 | 0x69DE 0x0129 64 | 0x69F2 0x1ECB 65 | 0x6BEC 0x1E31 66 | 0x6BF2 0x1E33 67 | 0x6CEC 0x013A 68 | 0x6CF2 0x1E37 69 | 0x6DEC 0x1E3F 70 | 0x6DF2 0x1E43 71 | 0x6ECC 0x01F9 72 | 0x6EEC 0x0144 73 | 0x6EF2 0x1E47 74 | 0x6FCC 0x00F2 75 | 0x6FD2 0x1ECF 76 | 0x6FDE 0x00F5 77 | 0x6FF2 0x1ECD 78 | 0x70EC 0x1E55 79 | 0x72EC 0x0155 80 | 0x72F2 0x1E5B 81 | 0x73EC 0x015B 82 | 0x73F2 0x1E63 83 | 0x74F2 0x1E6D 84 | 0x75D2 0x1EE7 85 | 0x75DE 0x0169 86 | 0x75F2 0x1EE5 87 | 0x76DE 0x1E7D 88 | 0x76F2 0x1E7F 89 | 0x77CC 0x1E81 90 | 0x77EC 0x1E83 91 | 0x77F2 0x1E89 92 | 0x79CC 0x1EF3 93 | 0x79D2 0x1EF7 94 | 0x79DE 0x1EF9 95 | 0x79EC 0x00FD 96 | 0x79F2 0x1EF5 97 | 0x7AEC 0x017A 98 | 0x7AF2 0x1E93 99 | 0xA8CC 0x1FED 100 | 0xA8EC 0x0385 101 | 0xA8EC 0x1FEE 102 | 0xC2CC 0x1EA6 103 | 0xC2D2 0x1EA8 104 | 0xC2DE 0x1EAA 105 | 0xC2EC 0x1EA4 106 | 0xC2F2 0x1EAC 107 | 0xC3CC 0x1EB0 108 | 0xC3D2 0x1EB2 109 | 0xC3DE 0x1EB4 110 | 0xC3EC 0x1EAE 111 | 0xC3F2 0x1EB6 112 | 0xC5EC 0x01FA 113 | 0xC6EC 0x01FC 114 | 0xC7EC 0x1E08 115 | 0xCACC 0x1EC0 116 | 0xCAD2 0x1EC2 117 | 0xCADE 0x1EC4 118 | 0xCAEC 0x1EBE 119 | 0xCAF2 0x1EC6 120 | 0xCC 0x0340 121 | 0xCFEC 0x1E2E 122 | 0xD3DE 0x1E4C 123 | 0xD4CC 0x1ED2 124 | 0xD4D2 0x1ED4 125 | 0xD4DE 0x1ED6 126 | 0xD4EC 0x1ED0 127 | 0xD4F2 0x1ED8 128 | 0xD5CC 0x1EDC 129 | 0xD5D2 0x1EDE 130 | 0xD5DE 0x1EE0 131 | 0xD5EC 0x1EDA 132 | 0xD5F2 0x1EE2 133 | 0xD6DE 0x1E4E 134 | 0xD8EC 0x01FE 135 | 0xDADE 0x1E78 136 | 0xDCCC 0x01DB 137 | 0xDCEC 0x01D7 138 | 0xDDCC 0x1EEA 139 | 0xDDD2 0x1EEC 140 | 0xDDDE 0x1EEE 141 | 0xDDEC 0x1EE8 142 | 0xDDF2 0x1EF0 143 | 0xE2CC 0x1EA7 144 | 0xE2D2 0x1EA9 145 | 0xE2DE 0x1EAB 146 | 0xE2EC 0x1EA5 147 | 0xE2F2 0x1EAD 148 | 0xE3CC 0x1EB1 149 | 0xE3D2 0x1EB3 150 | 0xE3DE 0x1EB5 151 | 0xE3EC 0x1EAF 152 | 0xE3F2 0x1EB7 153 | 0xE5EC 0x01FB 154 | 0xE6EC 0x01FD 155 | 0xE7EC 0x1E09 156 | 0xEACC 0x1EC1 157 | 0xEAD2 0x1EC3 158 | 0xEADE 0x1EC5 159 | 0xEAEC 0x1EBF 160 | 0xEAF2 0x1EC7 161 | 0xEC 0x0341 162 | 0xEFEC 0x1E2F 163 | 0xF3DE 0x1E4D 164 | 0xF4CC 0x1ED3 165 | 0xF4D2 0x1ED5 166 | 0xF4DE 0x1ED7 167 | 0xF4EC 0x1ED1 168 | 0xF4F2 0x1ED9 169 | 0xF5CC 0x1EDD 170 | 0xF5D2 0x1EDF 171 | 0xF5DE 0x1EE1 172 | 0xF5EC 0x1EDB 173 | 0xF5F2 0x1EE3 174 | 0xF6DE 0x1E4F 175 | 0xF8EC 0x01FF 176 | 0xFADE 0x1E79 177 | 0xFCCC 0x01DC 178 | 0xFCEC 0x01D8 179 | 0xFDCC 0x1EEB 180 | 0xFDD2 0x1EED 181 | 0xFDDE 0x1EEF 182 | 0xFDEC 0x1EE9 183 | 0xFDF2 0x1EF1 184 | -------------------------------------------------------------------------------- /lib/converters/cp1161.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file cp1161.h 3 | * @brief CP1161 4 | * @copyright Copyright (C) 1999-2002, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _CP1161_H_ 26 | #define _CP1161_H_ 27 | 28 | #include "converters/cp874.h" 29 | #include "reiconv_defines.h" 30 | 31 | static const unsigned short cp1161_2uni[96] = 32 | { 33 | /* 0xa0 */ 34 | 0x0e48, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07, 35 | 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f, 36 | /* 0xb0 */ 37 | 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17, 38 | 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f, 39 | /* 0xc0 */ 40 | 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27, 41 | 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f, 42 | /* 0xd0 */ 43 | 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37, 44 | 0x0e38, 0x0e39, 0x0e3a, 0x0e49, 0x0e4a, 0x0e4b, 0x20ac, 0x0e3f, 45 | /* 0xe0 */ 46 | 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47, 47 | 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f, 48 | /* 0xf0 */ 49 | 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57, 50 | 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x00a2, 0x00ac, 0x00a6, 0x00a0, 51 | }; 52 | 53 | static int cp1161_mbtowc(conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 54 | { 55 | unsigned char c = *s; 56 | if (c < 0x80) 57 | { 58 | *pwc = (ucs4_t)c; 59 | return 1; 60 | } 61 | else if (c < 0xa0) 62 | { 63 | } 64 | else 65 | { 66 | *pwc = (ucs4_t)cp1161_2uni[c - 0xa0]; 67 | return 1; 68 | } 69 | return RET_ILSEQ; 70 | } 71 | 72 | static const unsigned char cp1161_page00[16] = 73 | { 74 | 0xff, 0x00, 0xfc, 0x00, 0x00, 0x00, 0xfe, 0x00, /* 0xa0-0xa7 */ 75 | 0x00, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 76 | }; 77 | 78 | static int cp1161_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 79 | { 80 | unsigned char c = 0; 81 | if (wc < 0x0080) 82 | { 83 | *r = wc; 84 | return 1; 85 | } 86 | else if (wc >= 0x00a0 && wc < 0x00b0) 87 | c = cp1161_page00[wc - 0x00a0]; 88 | else if (wc >= 0x0e48 && wc < 0x0e4c) 89 | c = wc - 0x0d60; 90 | else if (wc >= 0x0e00 && wc < 0x0e60) 91 | c = cp874_page0e[wc - 0x0e00]; 92 | else if (wc == 0x20ac) 93 | c = 0xde; 94 | if (c != 0) 95 | { 96 | *r = c; 97 | return 1; 98 | } 99 | return RET_ILUNI; 100 | } 101 | 102 | #endif /* _CP1161_H_ */ 103 | -------------------------------------------------------------------------------- /tools/genindexes.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file genaliases.cpp 3 | * @brief Generate index enum for reiconv users. 4 | * @copyright Copyright (C) 2024 The C++ Plus Project. 5 | */ 6 | /* 7 | * This file is part of the cppp-reiconv Library. 8 | * 9 | * The cppp-reiconv Library is free software; you can redistribute it 10 | * and/or modify it under the terms of the GNU Lesser General Public 11 | * License as published by the Free Software Foundation; either version 3 12 | * of the License, or (at your option) any later version. 13 | * 14 | * The cppp-reiconv Library is distributed in the hope that it will be 15 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 | * Lesser General Public License for more details. 18 | * 19 | * You should have received a copy of the GNU Lesser General Public 20 | * License along with the cppp-reiconv Library; see the file LICENSE. 21 | * If not, see . 22 | */ 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | #include "encoding_indexes.h" // IWYU pragma: keep 29 | 30 | #define DEFENCODING(xxx_names, xxx, xxx_index, xxx_ifuncs1, xxx_ifuncs2, xxx_ofuncs1, xxx_ofuncs2) \ 31 | {#xxx_index, ei_##xxx}, 32 | #define DEFCODEPAGE(codepage, xxx) 33 | #define DEFINDEX(alias, index) 34 | 35 | std::unordered_map enum_data = { 36 | #include "encodings.h.snippet" 37 | }; 38 | 39 | #undef DEFINDEX 40 | #undef DEFCODEPAGE 41 | #undef DEFENCODING 42 | 43 | #define DEFENCODING(xxx_names, xxx, xxx_index, xxx_ifuncs1, xxx_ifuncs2, xxx_ofuncs1, xxx_ofuncs2) 44 | #define DEFCODEPAGE(codepage, xxx) 45 | #define DEFINDEX(alias, index) {#alias, #index}, 46 | 47 | std::unordered_map aliases = { 48 | #include "encodings.h.snippet" 49 | }; 50 | 51 | #undef DEFINDEX 52 | #undef DEFCODEPAGE 53 | #undef DEFENCODING 54 | 55 | void cpp_gen() 56 | { 57 | std::puts("namespace reiconv::encoding"); 58 | std::puts("{"); 59 | std::puts(" enum class Encodings"); 60 | std::puts(" {"); 61 | for (auto &it : enum_data) 62 | { 63 | std::printf(" %s = %d,\n", it.first.c_str(), it.second); 64 | } 65 | for (auto &it : aliases) 66 | { 67 | std::printf(" %s = %s,\n", it.first.c_str(), it.second.c_str()); 68 | } 69 | std::puts(" };"); 70 | std::puts("} // reiconv::encoding"); 71 | std::fflush(stdout); 72 | } 73 | 74 | void c_gen() 75 | { 76 | std::puts("enum"); 77 | std::puts("{"); 78 | for (auto &it : enum_data) 79 | { 80 | std::printf(" ENCODING_%s = %d,\n", it.first.c_str(), it.second); 81 | } 82 | for (auto &it : aliases) 83 | { 84 | std::printf(" ENCODING_%s = ENCODING_%s,\n", it.first.c_str(), it.second.c_str()); 85 | } 86 | std::puts("};"); 87 | std::fflush(stdout); 88 | } 89 | 90 | int main(int argc, char* argv[]) 91 | { 92 | if (argc != 2) 93 | { 94 | std::fprintf(stderr, "Usage: %s [C | C++]\n", argv[0]); 95 | return EXIT_FAILURE; 96 | } 97 | 98 | if (std::string(argv[1]) == "C") 99 | { 100 | c_gen(); 101 | } 102 | else if (std::string(argv[1]) == "C++") 103 | { 104 | cpp_gen(); 105 | } 106 | else 107 | { 108 | std::fprintf(stderr, "Usage: %s [C | C++]\n", argv[0]); 109 | return EXIT_FAILURE; 110 | } 111 | 112 | return EXIT_SUCCESS; 113 | } 114 | -------------------------------------------------------------------------------- /tests/data-generator.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file reiconv-test.hpp 3 | * @brief Creates the beyond-BMP part of the GB18030.TXT reference table. 4 | * Or creates the UTF-8.TXT reference table. 5 | * @author ChenPi11 6 | * @copyright Copyright (C) 2005, 2012 Free Software Foundation, Inc. 7 | * @copyright Copyright (C) 2024 The C++ Plus Project. 8 | */ 9 | /** 10 | * This file is part of the cppp-reiconv library. 11 | * 12 | * The cppp-reiconv library is free software; you can redistribute it 13 | * and/or modify it under the terms of the GNU Lesser General Public 14 | * License as published by the Free Software Foundation; either version 3 15 | * of the License, or (at your option) any later version. 16 | * 17 | * The cppp-reiconv library is distributed in the hope that it will be 18 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | * Lesser General Public License for more details. 21 | * 22 | * You should have received a copy of the GNU Lesser General Public 23 | * License along with the cppp-reiconv library; see the file LICENSE. 24 | * If not, see . */ 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | /** 31 | * @brief Generates the GB18030 reference table. 32 | */ 33 | void gengb18030z() 34 | { 35 | int uc = 0x10000; 36 | 37 | for (int i1 = 0x90; i1 <= 0xe3; i1++) 38 | { 39 | for (int i2 = 0x30; i2 <= 0x39; i2++) 40 | { 41 | for (int i3 = 0x81; i3 <= 0xfe; i3++) 42 | { 43 | for (int i4 = 0x30; i4 <= 0x39; i4++) 44 | { 45 | std::printf("0x%02X%02X%02X%02X\t0x%X\n", i1, i2, i3, i4, uc); 46 | uc++; 47 | if (uc == 0x110000) 48 | { 49 | goto done; 50 | } 51 | } 52 | } 53 | } 54 | } 55 | done: 56 | 57 | std::fflush(stdout); 58 | } 59 | 60 | /** 61 | * @brief Generates the UTF-8 reference table. 62 | */ 63 | void genutf8() 64 | { 65 | int i1, i2, i3; 66 | 67 | /* Range 0x0000..0x007f */ 68 | for (int i1 = 0; i1 < 0x80; i1++) 69 | { 70 | std::printf("0x%02X\t0x%04X\n", i1, i1); 71 | } 72 | 73 | /* Range 0x0080..0x07ff */ 74 | for (i1 = 2; i1 < 32; i1++) 75 | { 76 | for (i2 = 0; i2 < 64; i2++) 77 | { 78 | std::printf("0x%02X%02X\t0x%04X\n", 0xc0 + i1, 0x80 + i2, (i1 << 6) + i2); 79 | } 80 | } 81 | /* Range 0x0800..0xffff, except 0xd800..0xdfff */ 82 | for (i1 = 0; i1 < 16; i1++) 83 | for (i2 = (i1 == 0 ? 32 : 0); i2 < 64; i2++) 84 | { 85 | for (i3 = 0; i3 < 64; i3++) 86 | { 87 | int u = (i1 << 12) + (i2 << 6) + i3; 88 | if (!(u >= 0xd800 && u < 0xe000)) 89 | { 90 | std::printf("0x%02X%02X%02X\t0x%04X\n", 0xe0 + i1, 0x80 + i2, 0x80 + i3, u); 91 | } 92 | } 93 | } 94 | } 95 | 96 | int main(int argc, char *argv[]) 97 | { 98 | if (argc != 2) 99 | { 100 | std::fprintf(stderr, "Usage: %s [utf-8 | gb18030z]\n", argv[0]); 101 | return EXIT_FAILURE; 102 | } 103 | 104 | if (std::string(argv[1]) == "utf-8") 105 | { 106 | genutf8(); 107 | return EXIT_SUCCESS; 108 | } 109 | 110 | gengb18030z(); 111 | return EXIT_SUCCESS; 112 | } 113 | -------------------------------------------------------------------------------- /benchmark/benchmark.cpp: -------------------------------------------------------------------------------- 1 | #include "benchmark.hpp" 2 | #include "cppp/reiconv.h" 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | char *test_string_utf8; 11 | std::size_t test_string_utf8_len; 12 | 13 | static void reiconv(benchmark::State &state) 14 | { 15 | char output[test_string_utf8_len * 4]; 16 | void *cd = reiconv_iconv_open(); 17 | for (auto _ : state) 18 | { 19 | reiconv_convert_static_size(cd, test_string_utf8, test_string_utf8_len, output, test_string_utf8_len * 4); 20 | } 21 | } 22 | BENCHMARK(reiconv); 23 | 24 | static void glibc(benchmark::State &state) 25 | { 26 | char output[test_string_utf8_len * 4]; 27 | void *cd = glibc_iconv_open(); 28 | for (auto _ : state) 29 | { 30 | glibc_static_size_convert(cd, test_string_utf8, test_string_utf8_len, output, test_string_utf8_len * 4); 31 | } 32 | } 33 | BENCHMARK(glibc); 34 | 35 | static void libiconv(benchmark::State &state) 36 | { 37 | char output[test_string_utf8_len * 4]; 38 | void *cd = libiconv_iconv_open(); 39 | for (auto _ : state) 40 | { 41 | libiconv_static_size_convert(cd, test_string_utf8, test_string_utf8_len, output, test_string_utf8_len * 4); 42 | } 43 | } 44 | BENCHMARK(libiconv); 45 | 46 | static void libicu(benchmark::State &state) 47 | { 48 | UErrorCode status{U_ZERO_ERROR}; 49 | char output[test_string_utf8_len * 4]; 50 | 51 | for (auto _ : state) 52 | { 53 | ucnv_convert("GB18030", "UTF-8", output, test_string_utf8_len * 4, test_string_utf8, test_string_utf8_len, 54 | &status); 55 | } 56 | } 57 | BENCHMARK(libicu); 58 | 59 | void init_test_string(std::size_t len) 60 | { 61 | constexpr const char *const test_string = 62 | "\u6211\u80fd\u541e\u4e0b\u8eab\u4f53\u800c\u4e0d\u4f24\u8eab\u4f53\u30021234567890ABCD"; 63 | constexpr const std::size_t test_string_len = 50; 64 | test_string_utf8 = new char[test_string_len * len]; 65 | test_string_utf8_len = test_string_len * len; 66 | for (std::size_t i = 0; i < len; ++i) 67 | { 68 | std::memcpy(test_string_utf8 + i * test_string_len, test_string, test_string_len); 69 | } 70 | } 71 | 72 | static inline bool isdigit(const std::string_view str) 73 | { 74 | for (const char c : str) 75 | { 76 | if (!std::isdigit(c)) 77 | { 78 | return false; 79 | } 80 | } 81 | return true; 82 | } 83 | 84 | const char *const_argv_default[2] = {"benchmark", "--benchmark_format=json"}; 85 | int main(int argc, char **argv) 86 | { 87 | if (argc != 2) 88 | { 89 | std::fputs("Usage: benchmark \n", stderr); 90 | return EXIT_FAILURE; 91 | } 92 | if (!isdigit(argv[1])) 93 | { 94 | std::fputs("Error: test_string_repeat_times must be a number.\n", stderr); 95 | return EXIT_FAILURE; 96 | } 97 | 98 | std::size_t len = std::atoi(argv[1]); 99 | init_test_string(len); 100 | 101 | int argc_default = 2; 102 | ::benchmark::Initialize(&argc_default, const_cast(const_argv_default)); 103 | if (::benchmark::ReportUnrecognizedArguments(argc_default, const_cast(const_argv_default))) 104 | { 105 | return EXIT_FAILURE; 106 | } 107 | ::benchmark::RunSpecifiedBenchmarks(); 108 | ::benchmark::Shutdown(); 109 | 110 | delete[] test_string_utf8; // Not necessary. 111 | return EXIT_SUCCESS; 112 | } 113 | -------------------------------------------------------------------------------- /lib/converters/utf8.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file utf8.h 3 | * @brief UTF-8 4 | * @copyright Copyright (C) 1999-2001, 2004, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _UTF8_H_ 26 | #define _UTF8_H_ 27 | 28 | #include "reiconv_defines.h" 29 | 30 | /* Specification: RFC 3629 */ 31 | 32 | static int 33 | utf8_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 34 | { 35 | unsigned char c = s[0]; 36 | 37 | if (c < 0x80) { 38 | *pwc = c; 39 | return 1; 40 | } else if (c < 0xc2) { 41 | return RET_ILSEQ; 42 | } else if (c < 0xe0) { 43 | if (n < 2) 44 | return RET_TOOFEW(0); 45 | if (!((s[1] ^ 0x80) < 0x40)) 46 | return RET_ILSEQ; 47 | *pwc = ((ucs4_t) (c & 0x1f) << 6) 48 | | (ucs4_t) (s[1] ^ 0x80); 49 | return 2; 50 | } else if (c < 0xf0) { 51 | if (n < 3) 52 | return RET_TOOFEW(0); 53 | if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 54 | && (c >= 0xe1 || s[1] >= 0xa0) 55 | && (c != 0xed || s[1] < 0xa0))) 56 | return RET_ILSEQ; 57 | *pwc = ((ucs4_t) (c & 0x0f) << 12) 58 | | ((ucs4_t) (s[1] ^ 0x80) << 6) 59 | | (ucs4_t) (s[2] ^ 0x80); 60 | return 3; 61 | } else if (c < 0xf8 && sizeof(ucs4_t)*8 >= 32) { 62 | if (n < 4) 63 | return RET_TOOFEW(0); 64 | if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 65 | && (s[3] ^ 0x80) < 0x40 66 | && (c >= 0xf1 || s[1] >= 0x90) 67 | && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)))) 68 | return RET_ILSEQ; 69 | *pwc = ((ucs4_t) (c & 0x07) << 18) 70 | | ((ucs4_t) (s[1] ^ 0x80) << 12) 71 | | ((ucs4_t) (s[2] ^ 0x80) << 6) 72 | | (ucs4_t) (s[3] ^ 0x80); 73 | return 4; 74 | } else 75 | return RET_ILSEQ; 76 | } 77 | 78 | static int 79 | utf8_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n) /* n == 0 is acceptable */ 80 | { 81 | int count; 82 | if (wc < 0x80) 83 | count = 1; 84 | else if (wc < 0x800) 85 | count = 2; 86 | else if (wc < 0x10000) { 87 | if (wc < 0xd800 || wc >= 0xe000) 88 | count = 3; 89 | else 90 | return RET_ILUNI; 91 | } else if (wc < 0x110000) 92 | count = 4; 93 | else 94 | return RET_ILUNI; 95 | if (n < count) 96 | return RET_TOOSMALL; 97 | switch (count) { /* note: code falls through cases! */ 98 | case 4: r[3] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x10000; 99 | case 3: r[2] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x800; 100 | case 2: r[1] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0xc0; 101 | case 1: r[0] = wc; 102 | } 103 | return count; 104 | } 105 | 106 | #endif /* _UTF8_H_ */ 107 | -------------------------------------------------------------------------------- /lib/converters/iso8859_9.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file iso8859_9.h 3 | * @brief ISO-8859-9 4 | * @copyright Copyright (C) 1999-2001, 2016 Free Software Foundation, Inc. 5 | * @copyright Copyright (C) 2024 The C++ Plus Project. 6 | */ 7 | /* 8 | * This file is part of the cppp-reiconv library. 9 | * 10 | * The cppp-reiconv library is free software; you can redistribute it 11 | * and/or modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either version 3 13 | * of the License, or (at your option) any later version. 14 | * 15 | * The cppp-reiconv library is distributed in the hope that it will be 16 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with the cppp-reiconv library; see the file LICENSE. 22 | * If not, see . 23 | */ 24 | 25 | #ifndef _ISO8859_9_H_ 26 | #define _ISO8859_9_H_ 27 | 28 | #include "reiconv_defines.h" 29 | 30 | static const unsigned short iso8859_9_2uni[48] = { 31 | /* 0xd0 */ 32 | 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 33 | 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df, 34 | /* 0xe0 */ 35 | 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 36 | 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 37 | /* 0xf0 */ 38 | 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 39 | 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff, 40 | }; 41 | 42 | static int 43 | iso8859_9_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) 44 | { 45 | unsigned char c = *s; 46 | if (c >= 0xd0) 47 | *pwc = (ucs4_t) iso8859_9_2uni[c-0xd0]; 48 | else 49 | *pwc = (ucs4_t) c; 50 | return 1; 51 | } 52 | 53 | static const unsigned char iso8859_9_page00[48] = { 54 | 0x00, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */ 55 | 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0x00, 0x00, 0xdf, /* 0xd8-0xdf */ 56 | 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */ 57 | 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */ 58 | 0x00, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */ 59 | 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x00, 0x00, 0xff, /* 0xf8-0xff */ 60 | }; 61 | static const unsigned char iso8859_9_page01[72] = { 62 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd0, 0xf0, /* 0x18-0x1f */ 63 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 64 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 65 | 0xdd, 0xfd, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 66 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 67 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 68 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 69 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 70 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, 0xfe, /* 0x58-0x5f */ 71 | }; 72 | 73 | static int iso8859_9_wctomb(conv_t conv, unsigned char *r, ucs4_t wc, size_t n) 74 | { 75 | unsigned char c = 0; 76 | if (wc < 0x00d0) 77 | { 78 | *r = wc; 79 | return 1; 80 | } 81 | else if (wc >= 0x00d0 && wc < 0x0100) 82 | c = iso8859_9_page00[wc - 0x00d0]; 83 | else if (wc >= 0x0118 && wc < 0x0160) 84 | c = iso8859_9_page01[wc - 0x0118]; 85 | if (c != 0) 86 | { 87 | *r = c; 88 | return 1; 89 | } 90 | return RET_ILUNI; 91 | } 92 | 93 | #endif /* _ISO8859_9_H_ */ 94 | -------------------------------------------------------------------------------- /tests/data/ISO-8859-6.TXT: -------------------------------------------------------------------------------- 1 | 0x00 0x0000 2 | 0x01 0x0001 3 | 0x02 0x0002 4 | 0x03 0x0003 5 | 0x04 0x0004 6 | 0x05 0x0005 7 | 0x06 0x0006 8 | 0x07 0x0007 9 | 0x08 0x0008 10 | 0x09 0x0009 11 | 0x0A 0x000A 12 | 0x0B 0x000B 13 | 0x0C 0x000C 14 | 0x0D 0x000D 15 | 0x0E 0x000E 16 | 0x0F 0x000F 17 | 0x10 0x0010 18 | 0x11 0x0011 19 | 0x12 0x0012 20 | 0x13 0x0013 21 | 0x14 0x0014 22 | 0x15 0x0015 23 | 0x16 0x0016 24 | 0x17 0x0017 25 | 0x18 0x0018 26 | 0x19 0x0019 27 | 0x1A 0x001A 28 | 0x1B 0x001B 29 | 0x1C 0x001C 30 | 0x1D 0x001D 31 | 0x1E 0x001E 32 | 0x1F 0x001F 33 | 0x20 0x0020 34 | 0x21 0x0021 35 | 0x22 0x0022 36 | 0x23 0x0023 37 | 0x24 0x0024 38 | 0x25 0x0025 39 | 0x26 0x0026 40 | 0x27 0x0027 41 | 0x28 0x0028 42 | 0x29 0x0029 43 | 0x2A 0x002A 44 | 0x2B 0x002B 45 | 0x2C 0x002C 46 | 0x2D 0x002D 47 | 0x2E 0x002E 48 | 0x2F 0x002F 49 | 0x30 0x0030 50 | 0x31 0x0031 51 | 0x32 0x0032 52 | 0x33 0x0033 53 | 0x34 0x0034 54 | 0x35 0x0035 55 | 0x36 0x0036 56 | 0x37 0x0037 57 | 0x38 0x0038 58 | 0x39 0x0039 59 | 0x3A 0x003A 60 | 0x3B 0x003B 61 | 0x3C 0x003C 62 | 0x3D 0x003D 63 | 0x3E 0x003E 64 | 0x3F 0x003F 65 | 0x40 0x0040 66 | 0x41 0x0041 67 | 0x42 0x0042 68 | 0x43 0x0043 69 | 0x44 0x0044 70 | 0x45 0x0045 71 | 0x46 0x0046 72 | 0x47 0x0047 73 | 0x48 0x0048 74 | 0x49 0x0049 75 | 0x4A 0x004A 76 | 0x4B 0x004B 77 | 0x4C 0x004C 78 | 0x4D 0x004D 79 | 0x4E 0x004E 80 | 0x4F 0x004F 81 | 0x50 0x0050 82 | 0x51 0x0051 83 | 0x52 0x0052 84 | 0x53 0x0053 85 | 0x54 0x0054 86 | 0x55 0x0055 87 | 0x56 0x0056 88 | 0x57 0x0057 89 | 0x58 0x0058 90 | 0x59 0x0059 91 | 0x5A 0x005A 92 | 0x5B 0x005B 93 | 0x5C 0x005C 94 | 0x5D 0x005D 95 | 0x5E 0x005E 96 | 0x5F 0x005F 97 | 0x60 0x0060 98 | 0x61 0x0061 99 | 0x62 0x0062 100 | 0x63 0x0063 101 | 0x64 0x0064 102 | 0x65 0x0065 103 | 0x66 0x0066 104 | 0x67 0x0067 105 | 0x68 0x0068 106 | 0x69 0x0069 107 | 0x6A 0x006A 108 | 0x6B 0x006B 109 | 0x6C 0x006C 110 | 0x6D 0x006D 111 | 0x6E 0x006E 112 | 0x6F 0x006F 113 | 0x70 0x0070 114 | 0x71 0x0071 115 | 0x72 0x0072 116 | 0x73 0x0073 117 | 0x74 0x0074 118 | 0x75 0x0075 119 | 0x76 0x0076 120 | 0x77 0x0077 121 | 0x78 0x0078 122 | 0x79 0x0079 123 | 0x7A 0x007A 124 | 0x7B 0x007B 125 | 0x7C 0x007C 126 | 0x7D 0x007D 127 | 0x7E 0x007E 128 | 0x7F 0x007F 129 | 0x80 0x0080 130 | 0x81 0x0081 131 | 0x82 0x0082 132 | 0x83 0x0083 133 | 0x84 0x0084 134 | 0x85 0x0085 135 | 0x86 0x0086 136 | 0x87 0x0087 137 | 0x88 0x0088 138 | 0x89 0x0089 139 | 0x8A 0x008A 140 | 0x8B 0x008B 141 | 0x8C 0x008C 142 | 0x8D 0x008D 143 | 0x8E 0x008E 144 | 0x8F 0x008F 145 | 0x90 0x0090 146 | 0x91 0x0091 147 | 0x92 0x0092 148 | 0x93 0x0093 149 | 0x94 0x0094 150 | 0x95 0x0095 151 | 0x96 0x0096 152 | 0x97 0x0097 153 | 0x98 0x0098 154 | 0x99 0x0099 155 | 0x9A 0x009A 156 | 0x9B 0x009B 157 | 0x9C 0x009C 158 | 0x9D 0x009D 159 | 0x9E 0x009E 160 | 0x9F 0x009F 161 | 0xA0 0x00A0 162 | 0xA4 0x00A4 163 | 0xAC 0x060C 164 | 0xAD 0x00AD 165 | 0xBB 0x061B 166 | 0xBF 0x061F 167 | 0xC1 0x0621 168 | 0xC2 0x0622 169 | 0xC3 0x0623 170 | 0xC4 0x0624 171 | 0xC5 0x0625 172 | 0xC6 0x0626 173 | 0xC7 0x0627 174 | 0xC8 0x0628 175 | 0xC9 0x0629 176 | 0xCA 0x062A 177 | 0xCB 0x062B 178 | 0xCC 0x062C 179 | 0xCD 0x062D 180 | 0xCE 0x062E 181 | 0xCF 0x062F 182 | 0xD0 0x0630 183 | 0xD1 0x0631 184 | 0xD2 0x0632 185 | 0xD3 0x0633 186 | 0xD4 0x0634 187 | 0xD5 0x0635 188 | 0xD6 0x0636 189 | 0xD7 0x0637 190 | 0xD8 0x0638 191 | 0xD9 0x0639 192 | 0xDA 0x063A 193 | 0xE0 0x0640 194 | 0xE1 0x0641 195 | 0xE2 0x0642 196 | 0xE3 0x0643 197 | 0xE4 0x0644 198 | 0xE5 0x0645 199 | 0xE6 0x0646 200 | 0xE7 0x0647 201 | 0xE8 0x0648 202 | 0xE9 0x0649 203 | 0xEA 0x064A 204 | 0xEB 0x064B 205 | 0xEC 0x064C 206 | 0xED 0x064D 207 | 0xEE 0x064E 208 | 0xEF 0x064F 209 | 0xF0 0x0650 210 | 0xF1 0x0651 211 | 0xF2 0x0652 212 | -------------------------------------------------------------------------------- /tests/data/MacHebrew.TXT: -------------------------------------------------------------------------------- 1 | 0x00 0x0000 2 | 0x01 0x0001 3 | 0x02 0x0002 4 | 0x03 0x0003 5 | 0x04 0x0004 6 | 0x05 0x0005 7 | 0x06 0x0006 8 | 0x07 0x0007 9 | 0x08 0x0008 10 | 0x09 0x0009 11 | 0x0A 0x000A 12 | 0x0B 0x000B 13 | 0x0C 0x000C 14 | 0x0D 0x000D 15 | 0x0E 0x000E 16 | 0x0F 0x000F 17 | 0x10 0x0010 18 | 0x11 0x0011 19 | 0x12 0x0012 20 | 0x13 0x0013 21 | 0x14 0x0014 22 | 0x15 0x0015 23 | 0x16 0x0016 24 | 0x17 0x0017 25 | 0x18 0x0018 26 | 0x19 0x0019 27 | 0x1A 0x001A 28 | 0x1B 0x001B 29 | 0x1C 0x001C 30 | 0x1D 0x001D 31 | 0x1E 0x001E 32 | 0x1F 0x001F 33 | 0x20 0x0020 34 | 0x21 0x0021 35 | 0x22 0x0022 36 | 0x23 0x0023 37 | 0x24 0x0024 38 | 0x25 0x0025 39 | 0x26 0x0026 40 | 0x27 0x0027 41 | 0x28 0x0028 42 | 0x29 0x0029 43 | 0x2A 0x002A 44 | 0x2B 0x002B 45 | 0x2C 0x002C 46 | 0x2D 0x002D 47 | 0x2E 0x002E 48 | 0x2F 0x002F 49 | 0x30 0x0030 50 | 0x31 0x0031 51 | 0x32 0x0032 52 | 0x33 0x0033 53 | 0x34 0x0034 54 | 0x35 0x0035 55 | 0x36 0x0036 56 | 0x37 0x0037 57 | 0x38 0x0038 58 | 0x39 0x0039 59 | 0x3A 0x003A 60 | 0x3B 0x003B 61 | 0x3C 0x003C 62 | 0x3D 0x003D 63 | 0x3E 0x003E 64 | 0x3F 0x003F 65 | 0x40 0x0040 66 | 0x41 0x0041 67 | 0x42 0x0042 68 | 0x43 0x0043 69 | 0x44 0x0044 70 | 0x45 0x0045 71 | 0x46 0x0046 72 | 0x47 0x0047 73 | 0x48 0x0048 74 | 0x49 0x0049 75 | 0x4A 0x004A 76 | 0x4B 0x004B 77 | 0x4C 0x004C 78 | 0x4D 0x004D 79 | 0x4E 0x004E 80 | 0x4F 0x004F 81 | 0x50 0x0050 82 | 0x51 0x0051 83 | 0x52 0x0052 84 | 0x53 0x0053 85 | 0x54 0x0054 86 | 0x55 0x0055 87 | 0x56 0x0056 88 | 0x57 0x0057 89 | 0x58 0x0058 90 | 0x59 0x0059 91 | 0x5A 0x005A 92 | 0x5B 0x005B 93 | 0x5C 0x005C 94 | 0x5D 0x005D 95 | 0x5E 0x005E 96 | 0x5F 0x005F 97 | 0x60 0x0060 98 | 0x61 0x0061 99 | 0x62 0x0062 100 | 0x63 0x0063 101 | 0x64 0x0064 102 | 0x65 0x0065 103 | 0x66 0x0066 104 | 0x67 0x0067 105 | 0x68 0x0068 106 | 0x69 0x0069 107 | 0x6A 0x006A 108 | 0x6B 0x006B 109 | 0x6C 0x006C 110 | 0x6D 0x006D 111 | 0x6E 0x006E 112 | 0x6F 0x006F 113 | 0x70 0x0070 114 | 0x71 0x0071 115 | 0x72 0x0072 116 | 0x73 0x0073 117 | 0x74 0x0074 118 | 0x75 0x0075 119 | 0x76 0x0076 120 | 0x77 0x0077 121 | 0x78 0x0078 122 | 0x79 0x0079 123 | 0x7A 0x007A 124 | 0x7B 0x007B 125 | 0x7C 0x007C 126 | 0x7D 0x007D 127 | 0x7E 0x007E 128 | 0x7F 0x007F 129 | 0x80 0x00C4 130 | 0x81 0xFB1F 131 | 0x82 0x00C7 132 | 0x83 0x00C9 133 | 0x84 0x00D1 134 | 0x85 0x00D6 135 | 0x86 0x00DC 136 | 0x87 0x00E1 137 | 0x88 0x00E0 138 | 0x89 0x00E2 139 | 0x8A 0x00E4 140 | 0x8B 0x00E3 141 | 0x8C 0x00E5 142 | 0x8D 0x00E7 143 | 0x8E 0x00E9 144 | 0x8F 0x00E8 145 | 0x90 0x00EA 146 | 0x91 0x00EB 147 | 0x92 0x00ED 148 | 0x93 0x00EC 149 | 0x94 0x00EE 150 | 0x95 0x00EF 151 | 0x96 0x00F1 152 | 0x97 0x00F3 153 | 0x98 0x00F2 154 | 0x99 0x00F4 155 | 0x9A 0x00F6 156 | 0x9B 0x00F5 157 | 0x9C 0x00FA 158 | 0x9D 0x00F9 159 | 0x9E 0x00FB 160 | 0x9F 0x00FC 161 | 0xA6 0x20AA 162 | 0xC1 0x201E 163 | 0xC6 0x05BC 164 | 0xC7 0xFB4B 165 | 0xC8 0xFB35 166 | 0xC9 0x2026 167 | 0xCA 0x00A0 168 | 0xCB 0x05B8 169 | 0xCC 0x05B7 170 | 0xCD 0x05B5 171 | 0xCE 0x05B6 172 | 0xCF 0x05B4 173 | 0xD0 0x2013 174 | 0xD1 0x2014 175 | 0xD2 0x201C 176 | 0xD3 0x201D 177 | 0xD4 0x2018 178 | 0xD5 0x2019 179 | 0xD6 0xFB2A 180 | 0xD7 0xFB2B 181 | 0xD8 0x05BF 182 | 0xD9 0x05B0 183 | 0xDA 0x05B2 184 | 0xDB 0x05B1 185 | 0xDC 0x05BB 186 | 0xDD 0x05B9 187 | 0xDF 0x05B3 188 | 0xE0 0x05D0 189 | 0xE1 0x05D1 190 | 0xE2 0x05D2 191 | 0xE3 0x05D3 192 | 0xE4 0x05D4 193 | 0xE5 0x05D5 194 | 0xE6 0x05D6 195 | 0xE7 0x05D7 196 | 0xE8 0x05D8 197 | 0xE9 0x05D9 198 | 0xEA 0x05DA 199 | 0xEB 0x05DB 200 | 0xEC 0x05DC 201 | 0xED 0x05DD 202 | 0xEE 0x05DE 203 | 0xEF 0x05DF 204 | 0xF0 0x05E0 205 | 0xF1 0x05E1 206 | 0xF2 0x05E2 207 | 0xF3 0x05E3 208 | 0xF4 0x05E4 209 | 0xF5 0x05E5 210 | 0xF6 0x05E6 211 | 0xF7 0x05E7 212 | 0xF8 0x05E8 213 | 0xF9 0x05E9 214 | 0xFA 0x05EA 215 | -------------------------------------------------------------------------------- /benchmark/run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """Run the benchmark and plot the results.""" 4 | 5 | from __future__ import annotations 6 | 7 | import json 8 | import sys 9 | from pathlib import Path 10 | from subprocess import PIPE, Popen 11 | 12 | import rich 13 | import rich.progress 14 | from matplotlib import pyplot as plt 15 | 16 | TEST_STRING_LEN = 50 17 | MAX_REPET_TIME = 100 18 | 19 | data: dict[str, list[list[float]]] = {} 20 | 21 | 22 | def command(args: list[str] | str) -> str: 23 | """Generate shell command from a list of arguments. 24 | 25 | Args: 26 | args (list[str] | str): The list of arguments. 27 | 28 | Returns: 29 | str: The shell command. 30 | 31 | """ 32 | if isinstance(args, str): 33 | return args 34 | 35 | res_command = "" 36 | for arg in args: 37 | if " " in arg: 38 | if '"' in arg: 39 | arg = arg.replace('"', '\\"') # noqa: PLW2901 40 | res_command += f'"{arg}" ' 41 | else: 42 | res_command += f"{arg} " 43 | return res_command.strip() 44 | 45 | 46 | def add_data(str_repet_time: int, benchmark_name: str, time: float) -> None: 47 | """Add data to the data dict. 48 | 49 | Args: 50 | str_repet_time (int): The number of times the test string is 51 | repeated. 52 | benchmark_name (str): The name of the test. 53 | time (float): The time taken to run the test. 54 | 55 | """ 56 | if benchmark_name not in data: 57 | data[benchmark_name] = [] 58 | data[benchmark_name].append([str_repet_time * TEST_STRING_LEN, time]) 59 | 60 | 61 | if __name__ == "__main__": 62 | cwd = (Path(sys.argv[0]) / "..").resolve() 63 | executable = cwd / "benchmark" 64 | executable.chmod(0o755) 65 | 66 | with rich.progress.Progress() as progress: 67 | task = progress.add_task("Running", total=MAX_REPET_TIME) 68 | for test_string_repet_time in range(MAX_REPET_TIME): 69 | with Popen( # noqa: S603 70 | [str(executable), str(test_string_repet_time)], 71 | cwd=cwd, 72 | shell=False, 73 | stdout=PIPE, 74 | ) as proc: 75 | arg1 = command([str(executable)]) 76 | rich.print( 77 | f"Running benchmark: [green]{arg1}[/green] " 78 | f"{test_string_repet_time} ...", 79 | ) 80 | if proc.wait() != 0: 81 | rich.print( 82 | f"[red]Error: Process failed with code " 83 | f"{proc.returncode}[/red]", 84 | ) 85 | sys.exit(1) 86 | proc.stdout.flush() # type: ignore[attr-defined] 87 | test_data = json.loads( 88 | proc.stdout.read().decode( # type: ignore[attr-defined] 89 | "UTF-8", 90 | ), 91 | ) 92 | for unit_data in test_data["benchmarks"]: 93 | add_data( 94 | test_string_repet_time, 95 | unit_data["run_name"], 96 | unit_data["real_time"], 97 | ) 98 | progress.update(task, advance=1) 99 | 100 | rich.print("[green]All benchmarks have been run successfully.[/green]") 101 | rich.print(data) 102 | 103 | for name, values in data.items(): 104 | plt.plot(*zip(*values), label=name) 105 | plt.legend() 106 | plt.xlabel("Test string length (bytes)") 107 | plt.ylabel("Time (ns)") 108 | plt.title("Benchmark") 109 | plt.show() 110 | -------------------------------------------------------------------------------- /tests/data/CP856.TXT: -------------------------------------------------------------------------------- 1 | 0x00 0x0000 2 | 0x01 0x0001 3 | 0x02 0x0002 4 | 0x03 0x0003 5 | 0x04 0x0004 6 | 0x05 0x0005 7 | 0x06 0x0006 8 | 0x07 0x0007 9 | 0x08 0x0008 10 | 0x09 0x0009 11 | 0x0A 0x000A 12 | 0x0B 0x000B 13 | 0x0C 0x000C 14 | 0x0D 0x000D 15 | 0x0E 0x000E 16 | 0x0F 0x000F 17 | 0x10 0x0010 18 | 0x11 0x0011 19 | 0x12 0x0012 20 | 0x13 0x0013 21 | 0x14 0x0014 22 | 0x15 0x0015 23 | 0x16 0x0016 24 | 0x17 0x0017 25 | 0x18 0x0018 26 | 0x19 0x0019 27 | 0x1A 0x001A 28 | 0x1B 0x001B 29 | 0x1C 0x001C 30 | 0x1D 0x001D 31 | 0x1E 0x001E 32 | 0x1F 0x001F 33 | 0x20 0x0020 34 | 0x21 0x0021 35 | 0x22 0x0022 36 | 0x23 0x0023 37 | 0x24 0x0024 38 | 0x25 0x0025 39 | 0x26 0x0026 40 | 0x27 0x0027 41 | 0x28 0x0028 42 | 0x29 0x0029 43 | 0x2A 0x002A 44 | 0x2B 0x002B 45 | 0x2C 0x002C 46 | 0x2D 0x002D 47 | 0x2E 0x002E 48 | 0x2F 0x002F 49 | 0x30 0x0030 50 | 0x31 0x0031 51 | 0x32 0x0032 52 | 0x33 0x0033 53 | 0x34 0x0034 54 | 0x35 0x0035 55 | 0x36 0x0036 56 | 0x37 0x0037 57 | 0x38 0x0038 58 | 0x39 0x0039 59 | 0x3A 0x003A 60 | 0x3B 0x003B 61 | 0x3C 0x003C 62 | 0x3D 0x003D 63 | 0x3E 0x003E 64 | 0x3F 0x003F 65 | 0x40 0x0040 66 | 0x41 0x0041 67 | 0x42 0x0042 68 | 0x43 0x0043 69 | 0x44 0x0044 70 | 0x45 0x0045 71 | 0x46 0x0046 72 | 0x47 0x0047 73 | 0x48 0x0048 74 | 0x49 0x0049 75 | 0x4A 0x004A 76 | 0x4B 0x004B 77 | 0x4C 0x004C 78 | 0x4D 0x004D 79 | 0x4E 0x004E 80 | 0x4F 0x004F 81 | 0x50 0x0050 82 | 0x51 0x0051 83 | 0x52 0x0052 84 | 0x53 0x0053 85 | 0x54 0x0054 86 | 0x55 0x0055 87 | 0x56 0x0056 88 | 0x57 0x0057 89 | 0x58 0x0058 90 | 0x59 0x0059 91 | 0x5A 0x005A 92 | 0x5B 0x005B 93 | 0x5C 0x005C 94 | 0x5D 0x005D 95 | 0x5E 0x005E 96 | 0x5F 0x005F 97 | 0x60 0x0060 98 | 0x61 0x0061 99 | 0x62 0x0062 100 | 0x63 0x0063 101 | 0x64 0x0064 102 | 0x65 0x0065 103 | 0x66 0x0066 104 | 0x67 0x0067 105 | 0x68 0x0068 106 | 0x69 0x0069 107 | 0x6A 0x006A 108 | 0x6B 0x006B 109 | 0x6C 0x006C 110 | 0x6D 0x006D 111 | 0x6E 0x006E 112 | 0x6F 0x006F 113 | 0x70 0x0070 114 | 0x71 0x0071 115 | 0x72 0x0072 116 | 0x73 0x0073 117 | 0x74 0x0074 118 | 0x75 0x0075 119 | 0x76 0x0076 120 | 0x77 0x0077 121 | 0x78 0x0078 122 | 0x79 0x0079 123 | 0x7A 0x007A 124 | 0x7B 0x007B 125 | 0x7C 0x007C 126 | 0x7D 0x007D 127 | 0x7E 0x007E 128 | 0x7F 0x007F 129 | 0x80 0x05D0 130 | 0x81 0x05D1 131 | 0x82 0x05D2 132 | 0x83 0x05D3 133 | 0x84 0x05D4 134 | 0x85 0x05D5 135 | 0x86 0x05D6 136 | 0x87 0x05D7 137 | 0x88 0x05D8 138 | 0x89 0x05D9 139 | 0x8A 0x05DA 140 | 0x8B 0x05DB 141 | 0x8C 0x05DC 142 | 0x8D 0x05DD 143 | 0x8E 0x05DE 144 | 0x8F 0x05DF 145 | 0x90 0x05E0 146 | 0x91 0x05E1 147 | 0x92 0x05E2 148 | 0x93 0x05E3 149 | 0x94 0x05E4 150 | 0x95 0x05E5 151 | 0x96 0x05E6 152 | 0x97 0x05E7 153 | 0x98 0x05E8 154 | 0x99 0x05E9 155 | 0x9A 0x05EA 156 | 0x9C 0x00A3 157 | 0x9E 0x00D7 158 | 0xA9 0x00AE 159 | 0xAA 0x00AC 160 | 0xAB 0x00BD 161 | 0xAC 0x00BC 162 | 0xAE 0x00AB 163 | 0xAF 0x00BB 164 | 0xB0 0x2591 165 | 0xB1 0x2592 166 | 0xB2 0x2593 167 | 0xB3 0x2502 168 | 0xB4 0x2524 169 | 0xB8 0x00A9 170 | 0xB9 0x2563 171 | 0xBA 0x2551 172 | 0xBB 0x2557 173 | 0xBC 0x255D 174 | 0xBD 0x00A2 175 | 0xBE 0x00A5 176 | 0xBF 0x2510 177 | 0xC0 0x2514 178 | 0xC1 0x2534 179 | 0xC2 0x252C 180 | 0xC3 0x251C 181 | 0xC4 0x2500 182 | 0xC5 0x253C 183 | 0xC8 0x255A 184 | 0xC9 0x2554 185 | 0xCA 0x2569 186 | 0xCB 0x2566 187 | 0xCC 0x2560 188 | 0xCD 0x2550 189 | 0xCE 0x256C 190 | 0xCF 0x00A4 191 | 0xD9 0x2518 192 | 0xDA 0x250C 193 | 0xDB 0x2588 194 | 0xDC 0x2584 195 | 0xDD 0x00A6 196 | 0xDF 0x2580 197 | 0xE6 0x00B5 198 | 0xEE 0x00AF 199 | 0xEF 0x00B4 200 | 0xF0 0x00AD 201 | 0xF1 0x00B1 202 | 0xF2 0x2017 203 | 0xF3 0x00BE 204 | 0xF4 0x00B6 205 | 0xF5 0x00A7 206 | 0xF6 0x00F7 207 | 0xF7 0x00B8 208 | 0xF8 0x00B0 209 | 0xF9 0x00A8 210 | 0xFA 0x00B7 211 | 0xFB 0x00B9 212 | 0xFC 0x00B3 213 | 0xFD 0x00B2 214 | 0xFE 0x25A0 215 | 0xFF 0x00A0 216 | -------------------------------------------------------------------------------- /tests/data/TIS-620.TXT: -------------------------------------------------------------------------------- 1 | 0x00 0x0000 2 | 0x01 0x0001 3 | 0x02 0x0002 4 | 0x03 0x0003 5 | 0x04 0x0004 6 | 0x05 0x0005 7 | 0x06 0x0006 8 | 0x07 0x0007 9 | 0x08 0x0008 10 | 0x09 0x0009 11 | 0x0A 0x000A 12 | 0x0B 0x000B 13 | 0x0C 0x000C 14 | 0x0D 0x000D 15 | 0x0E 0x000E 16 | 0x0F 0x000F 17 | 0x10 0x0010 18 | 0x11 0x0011 19 | 0x12 0x0012 20 | 0x13 0x0013 21 | 0x14 0x0014 22 | 0x15 0x0015 23 | 0x16 0x0016 24 | 0x17 0x0017 25 | 0x18 0x0018 26 | 0x19 0x0019 27 | 0x1A 0x001A 28 | 0x1B 0x001B 29 | 0x1C 0x001C 30 | 0x1D 0x001D 31 | 0x1E 0x001E 32 | 0x1F 0x001F 33 | 0x20 0x0020 34 | 0x21 0x0021 35 | 0x22 0x0022 36 | 0x23 0x0023 37 | 0x24 0x0024 38 | 0x25 0x0025 39 | 0x26 0x0026 40 | 0x27 0x0027 41 | 0x28 0x0028 42 | 0x29 0x0029 43 | 0x2A 0x002A 44 | 0x2B 0x002B 45 | 0x2C 0x002C 46 | 0x2D 0x002D 47 | 0x2E 0x002E 48 | 0x2F 0x002F 49 | 0x30 0x0030 50 | 0x31 0x0031 51 | 0x32 0x0032 52 | 0x33 0x0033 53 | 0x34 0x0034 54 | 0x35 0x0035 55 | 0x36 0x0036 56 | 0x37 0x0037 57 | 0x38 0x0038 58 | 0x39 0x0039 59 | 0x3A 0x003A 60 | 0x3B 0x003B 61 | 0x3C 0x003C 62 | 0x3D 0x003D 63 | 0x3E 0x003E 64 | 0x3F 0x003F 65 | 0x40 0x0040 66 | 0x41 0x0041 67 | 0x42 0x0042 68 | 0x43 0x0043 69 | 0x44 0x0044 70 | 0x45 0x0045 71 | 0x46 0x0046 72 | 0x47 0x0047 73 | 0x48 0x0048 74 | 0x49 0x0049 75 | 0x4A 0x004A 76 | 0x4B 0x004B 77 | 0x4C 0x004C 78 | 0x4D 0x004D 79 | 0x4E 0x004E 80 | 0x4F 0x004F 81 | 0x50 0x0050 82 | 0x51 0x0051 83 | 0x52 0x0052 84 | 0x53 0x0053 85 | 0x54 0x0054 86 | 0x55 0x0055 87 | 0x56 0x0056 88 | 0x57 0x0057 89 | 0x58 0x0058 90 | 0x59 0x0059 91 | 0x5A 0x005A 92 | 0x5B 0x005B 93 | 0x5C 0x005C 94 | 0x5D 0x005D 95 | 0x5E 0x005E 96 | 0x5F 0x005F 97 | 0x60 0x0060 98 | 0x61 0x0061 99 | 0x62 0x0062 100 | 0x63 0x0063 101 | 0x64 0x0064 102 | 0x65 0x0065 103 | 0x66 0x0066 104 | 0x67 0x0067 105 | 0x68 0x0068 106 | 0x69 0x0069 107 | 0x6A 0x006A 108 | 0x6B 0x006B 109 | 0x6C 0x006C 110 | 0x6D 0x006D 111 | 0x6E 0x006E 112 | 0x6F 0x006F 113 | 0x70 0x0070 114 | 0x71 0x0071 115 | 0x72 0x0072 116 | 0x73 0x0073 117 | 0x74 0x0074 118 | 0x75 0x0075 119 | 0x76 0x0076 120 | 0x77 0x0077 121 | 0x78 0x0078 122 | 0x79 0x0079 123 | 0x7A 0x007A 124 | 0x7B 0x007B 125 | 0x7C 0x007C 126 | 0x7D 0x007D 127 | 0x7E 0x007E 128 | 0x7F 0x007F 129 | 0xA1 0x0E01 130 | 0xA2 0x0E02 131 | 0xA3 0x0E03 132 | 0xA4 0x0E04 133 | 0xA5 0x0E05 134 | 0xA6 0x0E06 135 | 0xA7 0x0E07 136 | 0xA8 0x0E08 137 | 0xA9 0x0E09 138 | 0xAA 0x0E0A 139 | 0xAB 0x0E0B 140 | 0xAC 0x0E0C 141 | 0xAD 0x0E0D 142 | 0xAE 0x0E0E 143 | 0xAF 0x0E0F 144 | 0xB0 0x0E10 145 | 0xB1 0x0E11 146 | 0xB2 0x0E12 147 | 0xB3 0x0E13 148 | 0xB4 0x0E14 149 | 0xB5 0x0E15 150 | 0xB6 0x0E16 151 | 0xB7 0x0E17 152 | 0xB8 0x0E18 153 | 0xB9 0x0E19 154 | 0xBA 0x0E1A 155 | 0xBB 0x0E1B 156 | 0xBC 0x0E1C 157 | 0xBD 0x0E1D 158 | 0xBE 0x0E1E 159 | 0xBF 0x0E1F 160 | 0xC0 0x0E20 161 | 0xC1 0x0E21 162 | 0xC2 0x0E22 163 | 0xC3 0x0E23 164 | 0xC4 0x0E24 165 | 0xC5 0x0E25 166 | 0xC6 0x0E26 167 | 0xC7 0x0E27 168 | 0xC8 0x0E28 169 | 0xC9 0x0E29 170 | 0xCA 0x0E2A 171 | 0xCB 0x0E2B 172 | 0xCC 0x0E2C 173 | 0xCD 0x0E2D 174 | 0xCE 0x0E2E 175 | 0xCF 0x0E2F 176 | 0xD0 0x0E30 177 | 0xD1 0x0E31 178 | 0xD2 0x0E32 179 | 0xD3 0x0E33 180 | 0xD4 0x0E34 181 | 0xD5 0x0E35 182 | 0xD6 0x0E36 183 | 0xD7 0x0E37 184 | 0xD8 0x0E38 185 | 0xD9 0x0E39 186 | 0xDA 0x0E3A 187 | 0xDF 0x0E3F 188 | 0xE0 0x0E40 189 | 0xE1 0x0E41 190 | 0xE2 0x0E42 191 | 0xE3 0x0E43 192 | 0xE4 0x0E44 193 | 0xE5 0x0E45 194 | 0xE6 0x0E46 195 | 0xE7 0x0E47 196 | 0xE8 0x0E48 197 | 0xE9 0x0E49 198 | 0xEA 0x0E4A 199 | 0xEB 0x0E4B 200 | 0xEC 0x0E4C 201 | 0xED 0x0E4D 202 | 0xEE 0x0E4E 203 | 0xEF 0x0E4F 204 | 0xF0 0x0E50 205 | 0xF1 0x0E51 206 | 0xF2 0x0E52 207 | 0xF3 0x0E53 208 | 0xF4 0x0E54 209 | 0xF5 0x0E55 210 | 0xF6 0x0E56 211 | 0xF7 0x0E57 212 | 0xF8 0x0E58 213 | 0xF9 0x0E59 214 | 0xFA 0x0E5A 215 | 0xFB 0x0E5B 216 | -------------------------------------------------------------------------------- /tests/data/IBM-424.TXT: -------------------------------------------------------------------------------- 1 | 0x00 0x0000 2 | 0x01 0x0001 3 | 0x02 0x0002 4 | 0x03 0x0003 5 | 0x04 0x009C 6 | 0x05 0x0009 7 | 0x06 0x0086 8 | 0x07 0x007F 9 | 0x08 0x0097 10 | 0x09 0x008D 11 | 0x0A 0x008E 12 | 0x0B 0x000B 13 | 0x0C 0x000C 14 | 0x0D 0x000D 15 | 0x0E 0x000E 16 | 0x0F 0x000F 17 | 0x10 0x0010 18 | 0x11 0x0011 19 | 0x12 0x0012 20 | 0x13 0x0013 21 | 0x14 0x009D 22 | 0x15 0x0085 23 | 0x16 0x0008 24 | 0x17 0x0087 25 | 0x18 0x0018 26 | 0x19 0x0019 27 | 0x1A 0x0092 28 | 0x1B 0x008F 29 | 0x1C 0x001C 30 | 0x1D 0x001D 31 | 0x1E 0x001E 32 | 0x1F 0x001F 33 | 0x20 0x0080 34 | 0x21 0x0081 35 | 0x22 0x0082 36 | 0x23 0x0083 37 | 0x24 0x0084 38 | 0x25 0x000A 39 | 0x26 0x0017 40 | 0x27 0x001B 41 | 0x28 0x0088 42 | 0x29 0x0089 43 | 0x2A 0x008A 44 | 0x2B 0x008B 45 | 0x2C 0x008C 46 | 0x2D 0x0005 47 | 0x2E 0x0006 48 | 0x2F 0x0007 49 | 0x30 0x0090 50 | 0x31 0x0091 51 | 0x32 0x0016 52 | 0x33 0x0093 53 | 0x34 0x0094 54 | 0x35 0x0095 55 | 0x36 0x0096 56 | 0x37 0x0004 57 | 0x38 0x0098 58 | 0x39 0x0099 59 | 0x3A 0x009A 60 | 0x3B 0x009B 61 | 0x3C 0x0014 62 | 0x3D 0x0015 63 | 0x3E 0x009E 64 | 0x3F 0x001A 65 | 0x40 0x0020 66 | 0x41 0x05D0 67 | 0x42 0x05D1 68 | 0x43 0x05D2 69 | 0x44 0x05D3 70 | 0x45 0x05D4 71 | 0x46 0x05D5 72 | 0x47 0x05D6 73 | 0x48 0x05D7 74 | 0x49 0x05D8 75 | 0x4A 0x00A2 76 | 0x4B 0x002E 77 | 0x4C 0x003C 78 | 0x4D 0x0028 79 | 0x4E 0x002B 80 | 0x4F 0x007C 81 | 0x50 0x0026 82 | 0x51 0x05D9 83 | 0x52 0x05DA 84 | 0x53 0x05DB 85 | 0x54 0x05DC 86 | 0x55 0x05DD 87 | 0x56 0x05DE 88 | 0x57 0x05DF 89 | 0x58 0x05E0 90 | 0x59 0x05E1 91 | 0x5A 0x0021 92 | 0x5B 0x0024 93 | 0x5C 0x002A 94 | 0x5D 0x0029 95 | 0x5E 0x003B 96 | 0x5F 0x00AC 97 | 0x60 0x002D 98 | 0x61 0x002F 99 | 0x62 0x05E2 100 | 0x63 0x05E3 101 | 0x64 0x05E4 102 | 0x65 0x05E5 103 | 0x66 0x05E6 104 | 0x67 0x05E7 105 | 0x68 0x05E8 106 | 0x69 0x05E9 107 | 0x6A 0x00A6 108 | 0x6B 0x002C 109 | 0x6C 0x0025 110 | 0x6D 0x005F 111 | 0x6E 0x003E 112 | 0x6F 0x003F 113 | 0x71 0x05EA 114 | 0x74 0x00A0 115 | 0x78 0x2017 116 | 0x79 0x0060 117 | 0x7A 0x003A 118 | 0x7B 0x0023 119 | 0x7C 0x0040 120 | 0x7D 0x0027 121 | 0x7E 0x003D 122 | 0x7F 0x0022 123 | 0x81 0x0061 124 | 0x82 0x0062 125 | 0x83 0x0063 126 | 0x84 0x0064 127 | 0x85 0x0065 128 | 0x86 0x0066 129 | 0x87 0x0067 130 | 0x88 0x0068 131 | 0x89 0x0069 132 | 0x8A 0x00AB 133 | 0x8B 0x00BB 134 | 0x8F 0x00B1 135 | 0x90 0x00B0 136 | 0x91 0x006A 137 | 0x92 0x006B 138 | 0x93 0x006C 139 | 0x94 0x006D 140 | 0x95 0x006E 141 | 0x96 0x006F 142 | 0x97 0x0070 143 | 0x98 0x0071 144 | 0x99 0x0072 145 | 0x9D 0x00B8 146 | 0x9F 0x00A4 147 | 0xA0 0x00B5 148 | 0xA1 0x007E 149 | 0xA2 0x0073 150 | 0xA3 0x0074 151 | 0xA4 0x0075 152 | 0xA5 0x0076 153 | 0xA6 0x0077 154 | 0xA7 0x0078 155 | 0xA8 0x0079 156 | 0xA9 0x007A 157 | 0xAF 0x00AE 158 | 0xB0 0x005E 159 | 0xB1 0x00A3 160 | 0xB2 0x00A5 161 | 0xB3 0x2022 162 | 0xB4 0x00A9 163 | 0xB5 0x00A7 164 | 0xB6 0x00B6 165 | 0xB7 0x00BC 166 | 0xB8 0x00BD 167 | 0xB9 0x00BE 168 | 0xBA 0x005B 169 | 0xBB 0x005D 170 | 0xBC 0x203E 171 | 0xBD 0x00A8 172 | 0xBE 0x00B4 173 | 0xBF 0x00D7 174 | 0xC0 0x007B 175 | 0xC1 0x0041 176 | 0xC2 0x0042 177 | 0xC3 0x0043 178 | 0xC4 0x0044 179 | 0xC5 0x0045 180 | 0xC6 0x0046 181 | 0xC7 0x0047 182 | 0xC8 0x0048 183 | 0xC9 0x0049 184 | 0xCA 0x00AD 185 | 0xD0 0x007D 186 | 0xD1 0x004A 187 | 0xD2 0x004B 188 | 0xD3 0x004C 189 | 0xD4 0x004D 190 | 0xD5 0x004E 191 | 0xD6 0x004F 192 | 0xD7 0x0050 193 | 0xD8 0x0051 194 | 0xD9 0x0052 195 | 0xDA 0x00B9 196 | 0xE0 0x005C 197 | 0xE1 0x00F7 198 | 0xE2 0x0053 199 | 0xE3 0x0054 200 | 0xE4 0x0055 201 | 0xE5 0x0056 202 | 0xE6 0x0057 203 | 0xE7 0x0058 204 | 0xE8 0x0059 205 | 0xE9 0x005A 206 | 0xEA 0x00B2 207 | 0xF0 0x0030 208 | 0xF1 0x0031 209 | 0xF2 0x0032 210 | 0xF3 0x0033 211 | 0xF4 0x0034 212 | 0xF5 0x0035 213 | 0xF6 0x0036 214 | 0xF7 0x0037 215 | 0xF8 0x0038 216 | 0xF9 0x0039 217 | 0xFA 0x00B3 218 | 0xFF 0x009F 219 | --------------------------------------------------------------------------------