├── .gitignore ├── .gitmodules ├── Changelog ├── Contributors ├── LICENSE ├── Makefile ├── Makefile.win ├── README.md ├── Todo ├── doc ├── Makefile ├── api.rst ├── conf.py ├── index.rst └── tex │ └── bsdconv.tex ├── modules ├── filter │ ├── ANSI.c │ ├── ARABIC.c │ ├── ARMENIAN.c │ ├── ARROWS.c │ ├── BRAILLE.c │ ├── BYTE.c │ ├── CHEROKEE.c │ ├── CJK.c │ ├── CJK.man │ ├── CNS11643.c │ ├── CUNEIFORM.c │ ├── CURRENCY.c │ ├── CYRILLIC.c │ ├── DEVANAGARI.c │ ├── EGYPTIAN.c │ ├── EMOTICON.c │ ├── ETHIOPIC.c │ ├── GEORGIAN.c │ ├── GREEK.c │ ├── HANGUL.c │ ├── HEBREW.c │ ├── HIRAGANA.c │ ├── IPA.c │ ├── JAVANESE.c │ ├── KANNADA.c │ ├── KATAKANA.c │ ├── KHMER.c │ ├── LAO.c │ ├── LATIN.c │ ├── LATIN1.c │ ├── MAHJONG.c │ ├── MALAYALAM.c │ ├── MATH.c │ ├── MIAO.c │ ├── MONGOLIAN.c │ ├── MUSIC.c │ ├── MYANMAR.c │ ├── PHONETIC.c │ ├── PRINT.c │ ├── PUA.c │ ├── PUNCTUATION.c │ ├── ROMAN.c │ ├── SAMARITAN.c │ ├── SINHALA.c │ ├── SUNDANESE.c │ ├── SYRIAC.c │ ├── TAGALOG.c │ ├── TAMIL.c │ ├── TELUGU.c │ ├── THAI.c │ ├── TIBETAN.c │ ├── TIFINAGH.c │ ├── UNICODE.c │ ├── YI.c │ ├── alias │ ├── type.c │ └── unicode_range.c ├── from │ ├── 00.man │ ├── 00.txt │ ├── ANSI-CONTROL.c │ ├── ANSI-CONTROL.man │ ├── ANSI-CONTROL.txt │ ├── ANY.c │ ├── ANY.man │ ├── ANY.txt │ ├── ASCII-NAMED-HTML-ENTITY.man │ ├── ASCII-NAMED-HTML-ENTITY.txt │ ├── ASCII-NUMERIC-HTML-ENTITY.c │ ├── ASCII-NUMERIC-HTML-ENTITY.man │ ├── ASCII-NUMERIC-HTML-ENTITY.txt │ ├── ASCII.man │ ├── ASCII.txt │ ├── BIG5-5C.man │ ├── BIG5-5C.txt │ ├── BSDCONV-KEYWORD.man │ ├── BSDCONV-KEYWORD.txt │ ├── BSDCONV-LOG.c │ ├── BSDCONV-LOG.man │ ├── BSDCONV-LOG.txt │ ├── BSDCONV.c │ ├── BSDCONV.man │ ├── BSDCONV.txt │ ├── BYTE.man │ ├── BYTE.txt │ ├── CCCII.man │ ├── CCCII.txt │ ├── CP950-UDA.c │ ├── CP950-UDA.man │ ├── CP950-UDA.txt │ ├── EBCDIC.c │ ├── ESCAPE.c │ ├── ESCAPE.man │ ├── ESCAPE.txt │ ├── FALLBACK-UNICODE.man │ ├── FALLBACK-UNICODE.txt │ ├── HEX.txt │ ├── IBM-37.man │ ├── IBM-37.txt │ ├── IBM-930.c │ ├── IBM-930.man │ ├── IBM-930.txt │ ├── IBM-933.c │ ├── IBM-933.man │ ├── IBM-933.txt │ ├── IBM-935.c │ ├── IBM-935.man │ ├── IBM-935.txt │ ├── IBM-937.c │ ├── IBM-937.man │ ├── IBM-937.txt │ ├── IBM-939.c │ ├── IBM-939.man │ ├── IBM-939.txt │ ├── PASS.c │ ├── PASS.man │ ├── PASS.txt │ ├── UTF-16BE.c │ ├── UTF-16BE.man │ ├── UTF-16BE.txt │ ├── UTF-16LE.c │ ├── UTF-16LE.man │ ├── UTF-16LE.txt │ ├── UTF-32BE.c │ ├── UTF-32BE.man │ ├── UTF-32BE.txt │ ├── UTF-32LE.c │ ├── UTF-32LE.man │ ├── UTF-32LE.txt │ ├── _BIG5-2003.man │ ├── _BIG5-2003.txt │ ├── _BIG5-ETEN.man │ ├── _BIG5-ETEN.txt │ ├── _BIG5E.man │ ├── _BIG5E.txt │ ├── _CP1251.man │ ├── _CP1251.txt │ ├── _CP1252.man │ ├── _CP1252.txt │ ├── _CP1253.man │ ├── _CP1253.txt │ ├── _CP1254.txt │ ├── _CP1255.txt │ ├── _CP1256.txt │ ├── _CP1257.txt │ ├── _CP1258.txt │ ├── _CP874.man │ ├── _CP874.txt │ ├── _CP932.man │ ├── _CP932.txt │ ├── _CP936.man │ ├── _CP936.txt │ ├── _CP949.man │ ├── _CP949.txt │ ├── _CP950.man │ ├── _CP950.txt │ ├── _GB18030.c │ ├── _GB18030.man │ ├── _GB18030.txt │ ├── _GB2312.man │ ├── _GB2312.txt │ ├── _GBK.man │ ├── _GBK.txt │ ├── _HKSCS1999.txt │ ├── _HKSCS2001.txt │ ├── _HKSCS2004.txt │ ├── _ISO-8859-1.man │ ├── _ISO-8859-1.txt │ ├── _JIS.c │ ├── _JIS.txt │ ├── _JIS0201.txt │ ├── _JIS0208.txt │ ├── _JIS0212.txt │ ├── _SHIFT-JIS.man │ ├── _SHIFT-JIS.txt │ ├── _UAO241.man │ ├── _UAO241.txt │ ├── _UAO250.man │ ├── _UAO250.txt │ ├── _UTF-8.c │ ├── _UTF-8.man │ ├── _UTF-8.txt │ └── alias ├── inter │ ├── ALIAS-FILTER.txt │ ├── ALIAS-FROM.c │ ├── ALIAS-FROM.man │ ├── ALIAS-FROM.txt │ ├── ALIAS-INTER.c │ ├── ALIAS-INTER.txt │ ├── ALIAS-TO.c │ ├── ALIAS-TO.man │ ├── ALIAS-TO.txt │ ├── AMBIGUOUS-PAD.c │ ├── AMBIGUOUS-PAD.man │ ├── AMBIGUOUS-PAD.txt │ ├── AMBIGUOUS-UNPAD.c │ ├── AMBIGUOUS-UNPAD.man │ ├── AMBIGUOUS-UNPAD.txt │ ├── ASCIIFOLD.txt │ ├── BIG5-DEFRAG.c │ ├── BIG5-DEFRAG.man │ ├── BIG5-DEFRAG.txt │ ├── BMP-TRANS-CN.txt │ ├── BMP-TRANS-TW.txt │ ├── BONUS.c │ ├── CASEFOLD.txt │ ├── COUNT.c │ ├── COUNT.man │ ├── COUNT.txt │ ├── FULL.man │ ├── FULL.txt │ ├── HALF.man │ ├── HALF.txt │ ├── HANJA.txt │ ├── INSERT.c │ ├── INSERT.man │ ├── INSERT.txt │ ├── KANA-PHONETIC.man │ ├── KANA-PHONETIC.txt │ ├── KANJI.man │ ├── KANJI.txt │ ├── LOWER.man │ ├── LOWER.txt │ ├── MAC.man │ ├── MAC.txt │ ├── NL2BR.man │ ├── NL2BR.txt │ ├── NULL.c │ ├── NULL.man │ ├── NULL.txt │ ├── PASS.c │ ├── PASS.man │ ├── PASS.txt │ ├── REPLACE.c │ ├── REPLACE.man │ ├── REPLACE.txt │ ├── SCORE-TRAIN.c │ ├── SCORE-TRAIN.man │ ├── SCORE-TRAIN.txt │ ├── SCORE.c │ ├── SCORE.man │ ├── SCORE.txt │ ├── STRINGS.c │ ├── STRINGS.man │ ├── STRINGS.txt │ ├── SUB.txt │ ├── SUPER.txt │ ├── TRIM-WIDTH.c │ ├── TRIM-WIDTH.man │ ├── TRIM-WIDTH.txt │ ├── UNIX.man │ ├── UNIX.txt │ ├── UPPER.man │ ├── UPPER.txt │ ├── UPSIDEDOWN.man │ ├── UPSIDEDOWN.txt │ ├── WHITESPACE-DERAIL.c │ ├── WHITESPACE-DERAIL.man │ ├── WHITESPACE-DERAIL.txt │ ├── WHITESPACE-RERAIL.c │ ├── WHITESPACE-RERAIL.man │ ├── WHITESPACE-RERAIL.txt │ ├── WHITESPACE.h │ ├── WIDTH.c │ ├── WIDTH.man │ ├── WIDTH.txt │ ├── WIN.man │ ├── WIN.txt │ ├── ZH-BONUS-PHRASE.c │ ├── ZH-BONUS-PHRASE.txt │ ├── ZH-BONUS.c │ ├── ZH-BONUS.txt │ ├── ZH-FUZZY-CN.man │ ├── ZH-FUZZY-CN.txt │ ├── ZH-FUZZY-TW.man │ ├── ZH-FUZZY-TW.txt │ ├── ZHCN.man │ ├── ZHCN.txt │ ├── ZHTW-WORDS.man │ ├── ZHTW-WORDS.txt │ ├── ZHTW.man │ ├── ZHTW.txt │ ├── _AMBIGUOUS.h │ ├── _NF-CCC.h │ ├── _NF-HANGUL-COMPOSITION.c │ ├── _NF-HANGUL-COMPOSITION.txt │ ├── _NF-HANGUL-DECOMPOSITION.c │ ├── _NF-HANGUL-DECOMPOSITION.txt │ ├── _NF-ORDER.c │ ├── _NF-ORDER.txt │ ├── _NFC-MAP.txt │ ├── _NFC.c │ ├── _NFC.txt │ ├── _NFD.txt │ ├── _NFKD.txt │ ├── _WIDTH.h │ └── alias ├── scorer │ ├── CJK.c │ ├── LATIN1.c │ └── unicode_range.c ├── src │ └── ZH-BONUS.txt └── to │ ├── 00.txt │ ├── ANY.c │ ├── ANY.man │ ├── ANY.txt │ ├── ASCII-HTML-INFO.c │ ├── ASCII-HTML-INFO.man │ ├── ASCII-HTML-INFO.txt │ ├── ASCII-HTML-UNICODE-IMG.c │ ├── ASCII-HTML-UNICODE-IMG.man │ ├── ASCII-HTML-UNICODE-IMG.txt │ ├── ASCII-NAMED-HTML-ENTITY.man │ ├── ASCII-NAMED-HTML-ENTITY.txt │ ├── ASCII.man │ ├── ASCII.txt │ ├── BIG5-5C.man │ ├── BIG5-5C.txt │ ├── BSDCONV-KEYWORD.man │ ├── BSDCONV-KEYWORD.txt │ ├── BSDCONV-LOG.c │ ├── BSDCONV-LOG.man │ ├── BSDCONV-LOG.txt │ ├── BSDCONV-OUTPUT.c │ ├── BSDCONV-OUTPUT.man │ ├── BSDCONV-OUTPUT.txt │ ├── BSDCONV.c │ ├── BSDCONV.man │ ├── BSDCONV.txt │ ├── BYTE.man │ ├── BYTE.txt │ ├── CCCII.man │ ├── CCCII.txt │ ├── CP936-TRANS.man │ ├── CP936-TRANS.txt │ ├── CP950-TRANS.man │ ├── CP950-TRANS.txt │ ├── EBCDIC.c │ ├── ESCAPE.c │ ├── ESCAPE.man │ ├── ESCAPE.txt │ ├── GB2312-TRANS.txt │ ├── GBK-TRANS.txt │ ├── HEX.txt │ ├── IBM-37.man │ ├── IBM-37.txt │ ├── IBM-930.c │ ├── IBM-930.man │ ├── IBM-930.txt │ ├── IBM-933.c │ ├── IBM-933.man │ ├── IBM-933.txt │ ├── IBM-935.c │ ├── IBM-935.man │ ├── IBM-935.txt │ ├── IBM-937.c │ ├── IBM-937.man │ ├── IBM-937.txt │ ├── IBM-939.c │ ├── IBM-939.man │ ├── IBM-939.txt │ ├── NULL.c │ ├── NULL.man │ ├── NULL.txt │ ├── PASS.c │ ├── PASS.man │ ├── PASS.txt │ ├── RAW.c │ ├── RAW.man │ ├── RAW.txt │ ├── UCS-2BE.c │ ├── UCS-2BE.man │ ├── UCS-2BE.txt │ ├── UCS-2LE.c │ ├── UCS-2LE.man │ ├── UCS-2LE.txt │ ├── UTF-16BE.c │ ├── UTF-16BE.man │ ├── UTF-16BE.txt │ ├── UTF-16LE.c │ ├── UTF-16LE.man │ ├── UTF-16LE.txt │ ├── UTF-32BE.c │ ├── UTF-32BE.man │ ├── UTF-32BE.txt │ ├── UTF-32LE.c │ ├── UTF-32LE.man │ ├── UTF-32LE.txt │ ├── _CP1251.man │ ├── _CP1251.txt │ ├── _CP1252.man │ ├── _CP1252.txt │ ├── _CP1253.man │ ├── _CP1253.txt │ ├── _CP874.man │ ├── _CP874.txt │ ├── _CP936.man │ ├── _CP936.txt │ ├── _CP949.man │ ├── _CP949.txt │ ├── _CP950.man │ ├── _CP950.txt │ ├── _GB18030.c │ ├── _GB18030.man │ ├── _GB18030.txt │ ├── _GB2312.man │ ├── _GB2312.txt │ ├── _GBK.man │ ├── _GBK.txt │ ├── _ISO-8859-1.man │ ├── _ISO-8859-1.txt │ ├── _JIS0212.txt │ ├── _SHIFT-JIS.man │ ├── _SHIFT-JIS.txt │ ├── _UAO241.man │ ├── _UAO241.txt │ ├── _UAO250.man │ ├── _UAO250.txt │ ├── _UTF-8.c │ ├── _UTF-8.man │ ├── _UTF-8.txt │ └── alias ├── src ├── bsdconv-completion.c ├── bsdconv-dbg.c ├── bsdconv-man.c ├── bsdconv-mktable.c ├── bsdconv.c ├── bsdconv.h ├── fmalloc.h ├── libbsdconv.c ├── libbsdconv_counter.c ├── libbsdconv_filter.c ├── libbsdconv_hash.c ├── libbsdconv_module.c ├── libbsdconv_scorer.c ├── libbsdconv_util.c ├── libfmalloc.c ├── missing_func.c └── portable_endian.h ├── testsuite ├── api.c └── conversion.py └── tools ├── AexcludeBCD.py ├── _bsdconv-completion.zsh ├── codepage.py ├── findAinB.py ├── gen_asciifold.py ├── gen_hex.py ├── mkalias.py ├── mkbonus.py ├── nfkc_gen.py ├── setEnvVar.bat ├── simple_gen.py ├── unicode_gen.py ├── viewer.html └── zh_component.py /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | tmp 3 | testsuite/api 4 | bsdconv.aux 5 | bsdconv.log 6 | bsdconv.out 7 | bsdconv.pdf 8 | bsdconv.synctex.gz 9 | bsdconv.toc 10 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "doc/python-bsdconv"] 2 | path = doc/python-bsdconv 3 | url = https://github.com/buganini/python-bsdconv.git 4 | [submodule "doc/php-bsdconv"] 5 | path = doc/php-bsdconv 6 | url = https://github.com/buganini/php-bsdconv.git 7 | [submodule "doc/ruby-bsdconv"] 8 | path = doc/ruby-bsdconv 9 | url = https://github.com/buganini/ruby-bsdconv.git 10 | [submodule "doc/go-bsdconv"] 11 | path = doc/go-bsdconv 12 | url = https://github.com/buganini/go-bsdconv.git 13 | -------------------------------------------------------------------------------- /Contributors: -------------------------------------------------------------------------------- 1 | adamv 2 | Artoria2e5 3 | buganini 4 | godfat 5 | kcwu 6 | PkmX 7 | roytam1 8 | Thomas-Tsai 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009-2016 Kuan-Chung Chiu 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions 5 | are met: 6 | 1. Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | 2. Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | 12 | THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 13 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 14 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 15 | ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 16 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 17 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 18 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 19 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 20 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 21 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 22 | SUCH DAMAGE. 23 | -------------------------------------------------------------------------------- /Makefile.win: -------------------------------------------------------------------------------- 1 | PREFIX?=C:/bsdconv 2 | BSDCONV_PATH?=${PREFIX} 3 | CFLAGS+=-Wall -O2 -DPREFIX='"${PREFIX}"' -DBSDCONV_PATH='"${BSDCONV_PATH}"' -DWIN32 -D_BSDCONV_INTERNAL 4 | CC?=mingw32-cc 5 | 6 | all: builddir libbsdconv bsdconv_mktable bsdconv modules meta 7 | 8 | builddir: 9 | mkdir -p build/ 10 | mkdir -p build/include 11 | mkdir -p build/modules 12 | mkdir -p build/modules/filter 13 | mkdir -p build/modules/from 14 | mkdir -p build/modules/inter 15 | mkdir -p build/modules/scorer 16 | mkdir -p build/modules/src 17 | mkdir -p build/modules/to 18 | 19 | libbsdconv: builddir 20 | $(CC) ${CFLAGS} src/missing_func.c src/libbsdconv.c -shared -o build/libbsdconv.dll 21 | 22 | bsdconv: builddir libbsdconv src/bsdconv.c 23 | $(CC) ${CFLAGS} -lbsdconv -L./build/ src/bsdconv.c -o build/bsdconv.exe 24 | 25 | bsdconv_mktable: builddir src/bsdconv-mktable.c 26 | $(CC) ${CFLAGS} src/missing_func.c src/bsdconv-mktable.c -o build/bsdconv_mktable.exe 27 | 28 | codecs_table: builddir bsdconv_mktable 29 | cd modules && \ 30 | find */*.txt -type f | awk -F. '{cmd="bsdconv_mktable ../modules/"$$1"."$$2" modules/"$$1; print(cmd);}' > ../build/mk_table.bat 31 | 32 | codecs_callback: builddir libbsdconv 33 | cd modules && \ 34 | find */*.c -type f | awk -F. '{cmd="$(CC) ${CFLAGS} -shared -lbsdconv -L../build/ -o ../build/modules/"$$1".dll "$$1"."$$2" -lwsock32"; system(cmd);}' 35 | 36 | modules: builddir codecs_table codecs_callback 37 | 38 | meta: 39 | cp src/bsdconv.h build/include/ 40 | cp tools/setEnvVar.bat build/ 41 | 42 | clean: 43 | rm -rf build 44 | -------------------------------------------------------------------------------- /Todo: -------------------------------------------------------------------------------- 1 | update score table 2 | to/ASCII-TRANS 3 | {} 4 | () 5 | Auto Scorer 6 | Unpaired UTF-16/U+D800..U+DFFF 7 | -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to bsdconv's documentation! 2 | =================================== 3 | 4 | Contents: 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | 9 | api 10 | go-bsdconv/doc/api 11 | php-bsdconv/doc/api 12 | python-bsdconv/doc/api 13 | ruby-bsdconv/doc/api 14 | 15 | 16 | Indices and tables 17 | ================== 18 | 19 | * :ref:`genindex` 20 | * :ref:`search` 21 | 22 | -------------------------------------------------------------------------------- /modules/filter/ANSI.c: -------------------------------------------------------------------------------- 1 | #include "../../src/bsdconv.h" 2 | 3 | #define TYPE 0x1b 4 | 5 | #include "type.c" 6 | -------------------------------------------------------------------------------- /modules/filter/ARABIC.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x0600, 0x06FF }, // Arabic 9 | { 0x0750, 0x077F }, // Arabic Supplement 10 | { 0x08A0, 0x08FF }, // Arabic Extended-A 11 | { 0xFB50, 0xFDFF }, // Arabic Presentation Forms-A 12 | { 0xFE70, 0xFEFF }, // Arabic Presentation Forms-B 13 | { 0x10A60, 0x10A7F }, // Old South Arabian 14 | { 0x10A80, 0x10A9F }, // Old North Arabian 15 | { 0x1EE00, 0x1EEFF }, // Arabic Mathematical Alphabetic Symbols 16 | }; 17 | #include "unicode_range.c" 18 | -------------------------------------------------------------------------------- /modules/filter/ARMENIAN.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x0530, 0x058F }, // Armenian 9 | }; 10 | #include "unicode_range.c" 11 | -------------------------------------------------------------------------------- /modules/filter/ARROWS.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x2190, 0x21FF }, // Arrows 9 | { 0x27F0, 0x27FF }, // Supplemental Arrows-A 10 | { 0x2900, 0x297F }, // Supplemental Arrows-B 11 | { 0x2B00, 0x2BFF }, // Miscellaneous Symbols and Arrows 12 | { 0x1F800, 0x1F8FF }, // Supplemental Arrows-C 13 | }; 14 | #include "unicode_range.c" 15 | -------------------------------------------------------------------------------- /modules/filter/BRAILLE.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x2800, 0x28FF }, // Braille Patterns 9 | }; 10 | #include "unicode_range.c" 11 | -------------------------------------------------------------------------------- /modules/filter/BYTE.c: -------------------------------------------------------------------------------- 1 | #include "../../src/bsdconv.h" 2 | 3 | #define TYPE 3 4 | 5 | #include "type.c" 6 | -------------------------------------------------------------------------------- /modules/filter/CHEROKEE.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x13A0, 0x13FF }, // Cherokee 9 | }; 10 | #include "unicode_range.c" 11 | -------------------------------------------------------------------------------- /modules/filter/CJK.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x1100, 0x11FF }, // Hangul Jamo 9 | { 0x2E80, 0x2EFF }, // CJK Radicals Supplement 10 | { 0x2F00, 0x2FDF }, // Kangxi Radicals 11 | { 0x2FF0, 0x2FFF }, // Ideographic Description Characters 12 | { 0x3000, 0x303F }, // CJK Symbols and Punctuation 13 | { 0x3040, 0x309F }, // Hiragana 14 | { 0x30A0, 0x30FF }, // Katakana 15 | { 0x3100, 0x312F }, // Bopomofo 16 | { 0x3130, 0x318F }, // Hangul Compatibility Jamo 17 | { 0x3190, 0x319F }, // Kanbun 18 | { 0x31A0, 0x31BF }, // Bopomofo Extended 19 | { 0x31C0, 0x31EF }, // CJK Strokes 20 | { 0x31F0, 0x31FF }, // Katakana Phonetic Extensions 21 | { 0x3200, 0x32FF }, // Enclosed CJK Letters and Months 22 | { 0x3300, 0x33FF }, // CJK Compatibility 23 | { 0x3400, 0x4DBF }, // CJK Unified Ideographs Extension A 24 | { 0x4DC0, 0x4DFF }, // Yijing Hexagram Symbols 25 | { 0x4E00, 0x9FFF }, // CJK Unified Ideographs 26 | { 0xA000, 0xA48F }, // Yi Syllables 27 | { 0xA490, 0xA4CF }, // Yi Radicals 28 | { 0xA960, 0xA97F }, // Hangul Jamo Extended-A 29 | { 0xAC00, 0xD7AF }, // Hangul Syllables 30 | { 0xD7B0, 0xD7FF }, // Hangul Jamo Extended-B 31 | { 0xF900, 0xFAFF }, // CJK Compatibility Ideographs 32 | { 0xFE30, 0xFE4F }, // CJK Compatibility Forms 33 | { 0x1B000, 0x1B0FF }, // Kana Supplement 34 | { 0x1B100, 0x1B12F }, // Kana Extended-A 35 | { 0x1D300, 0x1D35F }, // Tai Xuan Jing Symbols 36 | { 0x20000, 0x2A6DF }, // CJK Unified Ideographs Extension B 37 | { 0x2A700, 0x2B73F }, // CJK Unified Ideographs Extension C 38 | { 0x2B740, 0x2B81F }, // CJK Unified Ideographs Extension D 39 | { 0x2B820, 0x2CEAF }, // CJK Unified Ideographs Extension E 40 | { 0x2CEB0, 0x2EBEF }, // CJK Unified Ideographs Extension F 41 | { 0x2F800, 0x2FA1F }, // CJK Compatibility Ideographs Supplement 42 | { 0x30000, 0x3134F }, // CJK Unified Ideographs Extension G 43 | }; 44 | #include "unicode_range.c" 45 | -------------------------------------------------------------------------------- /modules/filter/CJK.man: -------------------------------------------------------------------------------- 1 | U+3400..U+4DB5 CJK Unified Ideographs Extension A 3.0 2 | U+4E00..U+9FA5 CJK Unified Ideographs 1.1 3 | U+9FA6..U+9FBB CJK Unified Ideographs 4.1 4 | U+F900..U+FA2D CJK Compatibility Ideographs 1.1 5 | U+FA30..U+FA6A CJK Compatibility Ideographs 3.2 6 | U+FA70..U+FAD9 CJK Compatibility Ideographs 4.1 7 | U+20000..U+2A6D6 CJK Unified Ideographs Extension B 3.1 8 | U+2F800..U+2FA1D CJK Compatibility Supplement 3.1 9 | -------------------------------------------------------------------------------- /modules/filter/CNS11643.c: -------------------------------------------------------------------------------- 1 | #include "../../src/bsdconv.h" 2 | 3 | #define TYPE 2 4 | 5 | #include "type.c" 6 | -------------------------------------------------------------------------------- /modules/filter/CUNEIFORM.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x12000, 0x123FF }, // Cuneiform 9 | { 0x12400, 0x1247F }, // Cuneiform Numbers and Punctuation 10 | }; 11 | #include "unicode_range.c" 12 | -------------------------------------------------------------------------------- /modules/filter/CURRENCY.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x20A0, 0x20CF }, // Currency Symbols 9 | }; 10 | #include "unicode_range.c" 11 | -------------------------------------------------------------------------------- /modules/filter/CYRILLIC.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x0400, 0x04FF }, // Cyrillic 9 | { 0x0500, 0x052F }, // Cyrillic Supplement 10 | { 0x1C80, 0x1C8F }, // Cyrillic Extended-C 11 | { 0x2DE0, 0x2DFF }, // Cyrillic Extended-A 12 | { 0xA640, 0xA69F }, // Cyrillic Extended-B 13 | }; 14 | #include "unicode_range.c" 15 | -------------------------------------------------------------------------------- /modules/filter/DEVANAGARI.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x0900, 0x097F }, // Devanagari 9 | { 0xA8E0, 0xA8FF }, // Devanagari Extended 10 | }; 11 | #include "unicode_range.c" 12 | -------------------------------------------------------------------------------- /modules/filter/EGYPTIAN.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x13000, 0x1342F }, // Egyptian Hieroglyphs 9 | { 0x13430, 0x1343F }, // Egyptian Hieroglyph Format Controls 10 | }; 11 | #include "unicode_range.c" 12 | -------------------------------------------------------------------------------- /modules/filter/EMOTICON.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x1F600, 0x1F64F }, // Emoticons 9 | }; 10 | #include "unicode_range.c" 11 | -------------------------------------------------------------------------------- /modules/filter/ETHIOPIC.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x1200, 0x137F }, // Ethiopic 9 | { 0x1380, 0x139F }, // Ethiopic Supplement 10 | { 0x2D80, 0x2DDF }, // Ethiopic Extended 11 | { 0xAB00, 0xAB2F }, // Ethiopic Extended-A 12 | }; 13 | #include "unicode_range.c" 14 | -------------------------------------------------------------------------------- /modules/filter/GEORGIAN.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x10A0, 0x10FF }, // Georgian 9 | { 0x1C90, 0x1CBF }, // Georgian Extended 10 | { 0x2D00, 0x2D2F }, // Georgian Supplement 11 | }; 12 | #include "unicode_range.c" 13 | -------------------------------------------------------------------------------- /modules/filter/GREEK.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x0370, 0x03FF }, // Greek and Coptic 9 | { 0x1F00, 0x1FFF }, // Greek Extended 10 | { 0x10140, 0x1018F }, // Ancient Greek Numbers 11 | { 0x1D200, 0x1D24F }, // Ancient Greek Musical Notation 12 | }; 13 | #include "unicode_range.c" 14 | -------------------------------------------------------------------------------- /modules/filter/HANGUL.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x1100, 0x11FF }, // Hangul Jamo 9 | { 0x3130, 0x318F }, // Hangul Compatibility Jamo 10 | { 0xA960, 0xA97F }, // Hangul Jamo Extended-A 11 | { 0xAC00, 0xD7AF }, // Hangul Syllables 12 | { 0xD7B0, 0xD7FF }, // Hangul Jamo Extended-B 13 | }; 14 | #include "unicode_range.c" 15 | -------------------------------------------------------------------------------- /modules/filter/HEBREW.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x0590, 0x05FF }, // Hebrew 9 | }; 10 | #include "unicode_range.c" 11 | -------------------------------------------------------------------------------- /modules/filter/HIRAGANA.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x3040, 0x309F }, // Hiragana 9 | }; 10 | #include "unicode_range.c" 11 | -------------------------------------------------------------------------------- /modules/filter/IPA.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x0250, 0x02AF }, // IPA Extensions 9 | }; 10 | #include "unicode_range.c" 11 | -------------------------------------------------------------------------------- /modules/filter/JAVANESE.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0xA980, 0xA9DF }, // Javanese 9 | }; 10 | #include "unicode_range.c" 11 | -------------------------------------------------------------------------------- /modules/filter/KANNADA.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x0C80, 0x0CFF }, // Kannada 9 | }; 10 | #include "unicode_range.c" 11 | -------------------------------------------------------------------------------- /modules/filter/KATAKANA.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x30A0, 0x30FF }, // Katakana 9 | { 0x31F0, 0x31FF }, // Katakana Phonetic Extensions 10 | }; 11 | #include "unicode_range.c" 12 | -------------------------------------------------------------------------------- /modules/filter/KHMER.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x1780, 0x17FF }, // Khmer 9 | { 0x19E0, 0x19FF }, // Khmer Symbols 10 | }; 11 | #include "unicode_range.c" 12 | -------------------------------------------------------------------------------- /modules/filter/LAO.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x0E80, 0x0EFF }, // Lao 9 | }; 10 | #include "unicode_range.c" 11 | -------------------------------------------------------------------------------- /modules/filter/LATIN.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x0000, 0x007F }, // Basic Latin 9 | { 0x0080, 0x00FF }, // Latin-1 Supplement 10 | { 0x0100, 0x017F }, // Latin Extended-A 11 | { 0x0180, 0x024F }, // Latin Extended-B 12 | { 0x1E00, 0x1EFF }, // Latin Extended Additional 13 | { 0x2C60, 0x2C7F }, // Latin Extended-C 14 | { 0xA720, 0xA7FF }, // Latin Extended-D 15 | { 0xAB30, 0xAB6F }, // Latin Extended-E 16 | }; 17 | #include "unicode_range.c" 18 | -------------------------------------------------------------------------------- /modules/filter/LATIN1.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Reference: http://en.wikipedia.org/wiki/Windows-1252 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x0, 0x80 }, 9 | { 0x82, 0x8C }, 10 | { 0x8E, 0x8E }, 11 | { 0x91, 0x9C }, 12 | { 0x9E, 0xFF }, 13 | }; 14 | 15 | #include "unicode_range.c" 16 | -------------------------------------------------------------------------------- /modules/filter/MAHJONG.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x1F000, 0x1F02F }, // Mahjong Tiles 9 | }; 10 | #include "unicode_range.c" 11 | -------------------------------------------------------------------------------- /modules/filter/MALAYALAM.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x0D00, 0x0D7F }, // Malayalam 9 | }; 10 | #include "unicode_range.c" 11 | -------------------------------------------------------------------------------- /modules/filter/MATH.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x2200, 0x22FF }, // Mathematical Operators 9 | { 0x27C0, 0x27EF }, // Miscellaneous Mathematical Symbols-A 10 | { 0x2980, 0x29FF }, // Miscellaneous Mathematical Symbols-B 11 | { 0x2A00, 0x2AFF }, // Supplemental Mathematical Operators 12 | { 0x1D400, 0x1D7FF }, // Mathematical Alphanumeric Symbols 13 | { 0x1EE00, 0x1EEFF }, // Arabic Mathematical Alphabetic Symbols 14 | }; 15 | #include "unicode_range.c" 16 | -------------------------------------------------------------------------------- /modules/filter/MIAO.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x16F00, 0x16F9F }, // Miao 9 | }; 10 | #include "unicode_range.c" 11 | -------------------------------------------------------------------------------- /modules/filter/MONGOLIAN.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x1800, 0x18AF }, // Mongolian 9 | }; 10 | #include "unicode_range.c" 11 | -------------------------------------------------------------------------------- /modules/filter/MUSIC.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x1D000, 0x1D0FF }, // Byzantine Musical Symbols 9 | { 0x1D100, 0x1D1FF }, // Musical Symbols 10 | { 0x1D200, 0x1D24F }, // Ancient Greek Musical Notation 11 | }; 12 | #include "unicode_range.c" 13 | -------------------------------------------------------------------------------- /modules/filter/MYANMAR.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x1000, 0x109F }, // Myanmar 9 | { 0xA9E0, 0xA9FF }, // Myanmar Extended-B 10 | { 0xAA60, 0xAA7F }, // Myanmar Extended-A 11 | }; 12 | #include "unicode_range.c" 13 | -------------------------------------------------------------------------------- /modules/filter/PHONETIC.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x0250, 0x02AF }, // IPA Extensions 9 | { 0x1D00, 0x1D7F }, // Phonetic Extensions 10 | { 0x1D80, 0x1DBF }, // Phonetic Extensions Supplement 11 | }; 12 | #include "unicode_range.c" 13 | -------------------------------------------------------------------------------- /modules/filter/PRINT.c: -------------------------------------------------------------------------------- 1 | #include "../../src/bsdconv.h" 2 | 3 | static const struct uint32_range ranges[] = { 4 | { 0x1F, 0x7E }, 5 | }; 6 | 7 | #include "unicode_range.c" 8 | -------------------------------------------------------------------------------- /modules/filter/PUA.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0xE000, 0xF8FF }, // Private Use Area 9 | { 0xF0000, 0xFFFFF }, // Supplementary Private Use Area-A 10 | { 0x100000, 0x10FFFF }, // Supplementary Private Use Area-B 11 | }; 12 | #include "unicode_range.c" 13 | -------------------------------------------------------------------------------- /modules/filter/PUNCTUATION.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x2000, 0x206F }, // General Punctuation 9 | { 0x2E00, 0x2E7F }, // Supplemental Punctuation 10 | { 0x3000, 0x303F }, // CJK Symbols and Punctuation 11 | { 0x12400, 0x1247F }, // Cuneiform Numbers and Punctuation 12 | { 0x16FE0, 0x16FFF }, // Ideographic Symbols and Punctuation 13 | }; 14 | #include "unicode_range.c" 15 | -------------------------------------------------------------------------------- /modules/filter/ROMAN.c: -------------------------------------------------------------------------------- 1 | #include "../../src/bsdconv.h" 2 | 3 | static const struct uint32_range ranges[] = { 4 | { 0x30, 0x39 }, 5 | { 0x41, 0x5A }, 6 | { 0x61, 0x7A }, 7 | }; 8 | 9 | #include "unicode_range.c" 10 | -------------------------------------------------------------------------------- /modules/filter/SAMARITAN.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x0800, 0x083F }, // Samaritan 9 | }; 10 | #include "unicode_range.c" 11 | -------------------------------------------------------------------------------- /modules/filter/SINHALA.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x0D80, 0x0DFF }, // Sinhala 9 | { 0x111E0, 0x111FF }, // Sinhala Archaic Numbers 10 | }; 11 | #include "unicode_range.c" 12 | -------------------------------------------------------------------------------- /modules/filter/SUNDANESE.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x1B80, 0x1BBF }, // Sundanese 9 | { 0x1CC0, 0x1CCF }, // Sundanese Supplement 10 | }; 11 | #include "unicode_range.c" 12 | -------------------------------------------------------------------------------- /modules/filter/SYRIAC.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x0700, 0x074F }, // Syriac 9 | }; 10 | #include "unicode_range.c" 11 | -------------------------------------------------------------------------------- /modules/filter/TAGALOG.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x1700, 0x171F }, // Tagalog 9 | }; 10 | #include "unicode_range.c" 11 | -------------------------------------------------------------------------------- /modules/filter/TAMIL.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x0B80, 0x0BFF }, // Tamil 9 | }; 10 | #include "unicode_range.c" 11 | -------------------------------------------------------------------------------- /modules/filter/TELUGU.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x0C00, 0x0C7F }, // Telugu 9 | }; 10 | #include "unicode_range.c" 11 | -------------------------------------------------------------------------------- /modules/filter/THAI.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x0E00, 0x0E7F }, // Thai 9 | }; 10 | #include "unicode_range.c" 11 | -------------------------------------------------------------------------------- /modules/filter/TIBETAN.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x0F00, 0x0FFF }, // Tibetan 9 | }; 10 | #include "unicode_range.c" 11 | -------------------------------------------------------------------------------- /modules/filter/TIFINAGH.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0x2D30, 0x2D7F }, // Tifinagh 9 | }; 10 | #include "unicode_range.c" 11 | -------------------------------------------------------------------------------- /modules/filter/UNICODE.c: -------------------------------------------------------------------------------- 1 | #include "../../src/bsdconv.h" 2 | 3 | #define TYPE 1 4 | 5 | #include "type.c" 6 | -------------------------------------------------------------------------------- /modules/filter/YI.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range ranges[] = { 8 | { 0xA000, 0xA48F }, // Yi Syllables 9 | { 0xA490, 0xA4CF }, // Yi Radicals 10 | }; 11 | #include "unicode_range.c" 12 | -------------------------------------------------------------------------------- /modules/filter/alias: -------------------------------------------------------------------------------- 1 | 01 UNICODE 2 | 1 UNICODE 3 | 02 CNS11643 4 | 2 CNS11643 5 | 03 BYTE 6 | 3 BYTE 7 | 1B ANSI 8 | -------------------------------------------------------------------------------- /modules/filter/type.c: -------------------------------------------------------------------------------- 1 | #include "../../src/bsdconv.h" 2 | 3 | int cbfilter(struct data_rt *data){ 4 | if(data->len>0 && UCP(data->data)[0]==TYPE) 5 | return 1; 6 | else 7 | return 0; 8 | } 9 | -------------------------------------------------------------------------------- /modules/filter/unicode_range.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Some code come from http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | int cbfilter(struct data_rt *data){ 8 | uint32_t ucs=0; 9 | int i; 10 | int max=sizeof(ranges) / sizeof(struct uint32_range) - 1; 11 | int min = 0; 12 | int mid; 13 | 14 | if(data->len<1 || UCP(data->data)[0]!=1){ 15 | return 0; 16 | } 17 | 18 | for(i=1;ilen;++i){ 19 | ucs<<=8; 20 | ucs|=UCP(data->data)[i]; 21 | } 22 | 23 | if (ucs < ranges[0].first || ucs > ranges[max].last){ 24 | //noop 25 | }else while (max >= min) { 26 | mid = (min + max) / 2; 27 | if (ucs > ranges[mid].last) 28 | min = mid + 1; 29 | else if (ucs < ranges[mid].first) 30 | max = mid - 1; 31 | else{ 32 | return 1; 33 | } 34 | } 35 | 36 | return 0; 37 | } 38 | -------------------------------------------------------------------------------- /modules/from/00.man: -------------------------------------------------------------------------------- 1 | INPUT 2 | DESC Null byte ('\0') 3 | EXAMPLE 00 4 | 5 | OUTPUT 6 | TYPE 01 (UNICODE) 7 | DESC U+0000 8 | EXAMPLE 0100 9 | -------------------------------------------------------------------------------- /modules/from/00.txt: -------------------------------------------------------------------------------- 1 | 00 0100 2 | -------------------------------------------------------------------------------- /modules/from/ANSI-CONTROL.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "../../src/bsdconv.h" 4 | 5 | #define F_CLEAR 0 6 | #define F_A 1 7 | #define F_B 2 8 | 9 | struct my_s { 10 | char *buf; 11 | char *p,f; 12 | }; 13 | 14 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){ 15 | struct my_s *r=malloc(sizeof(struct my_s)); 16 | r->buf=malloc(32); 17 | THIS_CODEC(ins)->priv=r; 18 | return 0; 19 | } 20 | 21 | void cbinit(struct bsdconv_instance *ins){ 22 | struct my_s *r=THIS_CODEC(ins)->priv; 23 | r->p=r->buf; 24 | r->f=0; 25 | } 26 | 27 | void cbdestroy(struct bsdconv_instance *ins){ 28 | struct my_s *r=THIS_CODEC(ins)->priv; 29 | free(r->buf); 30 | free(r); 31 | } 32 | 33 | void cbconv(struct bsdconv_instance *ins){ 34 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 35 | struct my_s *t=THIS_CODEC(ins)->priv; 36 | char d=CP(this_phase->curr->data)[this_phase->i]; 37 | 38 | if(t->f){ 39 | *(t->p)=d; 40 | t->p+=1; 41 | this_phase->state.status=CONTINUE; 42 | if((d>='a' && d<='z') || (d>='A' && d<='N') || (d>='P' && d<='Z') || (t->p - t->buf)==30 || ((t->p - t->buf)==2 && d!='[')){ 43 | DATA_MALLOC(ins, this_phase->data_tail->next); 44 | this_phase->data_tail=this_phase->data_tail->next; 45 | this_phase->data_tail->next=NULL; 46 | this_phase->data_tail->len=t->p - t->buf; 47 | this_phase->data_tail->flags=F_FREE; 48 | this_phase->state.status=NEXTPHASE; 49 | this_phase->data_tail->data=t->buf; 50 | t->f=0; 51 | t->buf=malloc(32); 52 | t->p=t->buf; 53 | } 54 | }else if(d==0x1b){ 55 | t->f=1; 56 | *(t->p)=d; 57 | t->p+=1; 58 | this_phase->state.status=CONTINUE; 59 | }else{ 60 | this_phase->state.status=DEADEND; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /modules/from/ANSI-CONTROL.man: -------------------------------------------------------------------------------- 1 | INPUT 2 | DESC ANSI control sequence 3 | EXAMPLE "\x1B[30m" 4 | 5 | OUTPUT 6 | TYPE 1B (ANSI CONTROL SEQUENCE) 7 | DESC Input as-is 8 | EXAMPLE 1B5B33306D 9 | 10 | EXAMPLE 11 | #used in https://github.com/buganini/bug5 12 | #used with inter/BIG5-DEFRAG 13 | > perl -e 'print "a\033[1mb"' | bsdconv ansi-control,utf-8:bsdconv-stdout 14 | 0161 15 | 1B5B316D ( FREE ) 16 | 0162 17 | -------------------------------------------------------------------------------- /modules/from/ANSI-CONTROL.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/from/ANY.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "../../src/bsdconv.h" 4 | 5 | struct my_st { 6 | struct data_rt *data; 7 | bsdconv_counter_t *counter; 8 | }; 9 | 10 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){ 11 | struct my_st *r=malloc(sizeof(struct my_st)); 12 | struct data_rt *bak; 13 | int e; 14 | r->data=str2data("013F", &e, ins); 15 | r->counter=NULL; 16 | while(arg){ 17 | if(strcasecmp(arg->key, "ERROR")==0){ 18 | if(arg->ptr) 19 | r->counter=bsdconv_counter(ins, arg->ptr); 20 | else 21 | r->counter=bsdconv_counter(ins, "IERR"); 22 | }else if(strcasecmp(arg->key, "DROP")==0){ 23 | DATA_FREE(ins, r->data); 24 | r->data = NULL; 25 | }else{ 26 | bak=r->data; 27 | r->data=str2data(arg->key, &e, ins); 28 | DATA_FREE(ins, bak); 29 | if(e){ 30 | DATA_FREE(ins, r->data); 31 | free(r); 32 | return e; 33 | } 34 | } 35 | arg=arg->next; 36 | } 37 | THIS_CODEC(ins)->priv=r; 38 | return 0; 39 | } 40 | 41 | void cbdestroy(struct bsdconv_instance *ins){ 42 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 43 | struct my_st *r=this_phase->codec[this_phase->index].priv; 44 | DATA_FREE(ins, r->data); 45 | free(r); 46 | } 47 | 48 | void cbconv(struct bsdconv_instance *ins){ 49 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 50 | struct my_st *r=this_phase->codec[this_phase->index].priv; 51 | 52 | LISTCPY(ins, this_phase->data_tail, r->data); 53 | 54 | this_phase->state.status=NEXTPHASE; 55 | 56 | if(r->counter) 57 | *(r->counter)+=1; 58 | return; 59 | } 60 | -------------------------------------------------------------------------------- /modules/from/ANY.man: -------------------------------------------------------------------------------- 1 | INPUT 2 | DESC Any byte 3 | EXAMPLE * 4 | 5 | OUTPUT 6 | TYPE Sepcified by argument 7 | DESC Sepcified by argument 8 | 9 | ARGUMENT 10 | ERROR 11 | DESC Increase counter (IERR if no counter name specified) 12 | DROP 13 | DESC Don't output any data 14 | $DataList 15 | DESC Output value 16 | EXAMPLE 013F 17 | EXAMPLE 013F.012F 18 | 19 | EXAMPLE 20 | > echo test測試test | bsdconv ascii,any#013f&error:ascii 21 | test??????test 22 | > echo 陶喆測試|bsdconv utf-8:uao250|bsdconv cp950,3f:utf-8 #alias 23 | 陶?穘?試 24 | > echo 陶喆測試|bsdconv utf-8:uao250|bsdconv cp950,any#01fffd:utf-8 25 | 陶�穘�試 26 | > echo 陶喆測試|bsdconv utf-8:uao250|bsdconv cp950,sub:utf-8 #alias 27 | 陶�穘�試 28 | -------------------------------------------------------------------------------- /modules/from/ANY.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/from/ASCII-NAMED-HTML-ENTITY.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | > echo 'ü' | bsdconv ascii-named-html-entity:utf-8 3 | ü 4 | -------------------------------------------------------------------------------- /modules/from/ASCII-NUMERIC-HTML-ENTITY.c: -------------------------------------------------------------------------------- 1 | #define USE_HEX_MAP 2 | #define USE_DEC_MAP 3 | 4 | #include 5 | #include 6 | #include 7 | #include "../../src/bsdconv.h" 8 | 9 | struct my_s{ 10 | int status; 11 | int *tbl; 12 | int b; 13 | union { 14 | char c[4]; 15 | uint32_t i; 16 | } buf; 17 | }; 18 | 19 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){ 20 | THIS_CODEC(ins)->priv=malloc(sizeof(struct my_s)); 21 | return 0; 22 | } 23 | 24 | void cbinit(struct bsdconv_instance *ins){ 25 | struct my_s *r=THIS_CODEC(ins)->priv; 26 | r->status=0; 27 | } 28 | 29 | void cbdestroy(struct bsdconv_instance *ins){ 30 | void *p=THIS_CODEC(ins)->priv; 31 | free(p); 32 | } 33 | 34 | #define DEADEND() do{ \ 35 | this_phase->state.status=DEADEND; \ 36 | t->status=0; \ 37 | return; \ 38 | }while(0); 39 | 40 | void cbconv(struct bsdconv_instance *ins){ 41 | char ob[8], *p; 42 | int i,j=0; 43 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 44 | struct my_s *t=THIS_CODEC(ins)->priv; 45 | char d; 46 | 47 | for(;this_phase->icurr->len;this_phase->i+=1){ 48 | d=CP(this_phase->curr->data)[this_phase->i]; 49 | if(d==';' && t->status){ 50 | //put data 51 | t->buf.i=htobe32(t->buf.i); 52 | for(i=0;i<4;i++){ 53 | if(t->buf.c[i] || j) 54 | ob[j++]=t->buf.c[i]; 55 | } 56 | DATA_MALLOC(ins, this_phase->data_tail->next); 57 | this_phase->data_tail=this_phase->data_tail->next; 58 | this_phase->data_tail->next=NULL; 59 | this_phase->data_tail->flags=F_FREE; 60 | this_phase->data_tail->len=j+1; 61 | p=this_phase->data_tail->data=malloc(j+1); 62 | p[0]=0x01; 63 | memcpy(&p[1], ob, j); 64 | this_phase->state.status=NEXTPHASE; 65 | t->status=0; 66 | return; 67 | } 68 | if(t->status){ 69 | ++t->status; 70 | if(t->tbl[(unsigned char)d]==-1) DEADEND(); 71 | t->buf.i*=t->b; 72 | t->buf.i+=t->tbl[(unsigned char)d]; 73 | }else{ 74 | if(d=='x'){ 75 | t->status=1000; 76 | t->tbl=hex; 77 | t->b=16; 78 | t->buf.i=0; 79 | continue; 80 | } 81 | t->b=10; 82 | t->tbl=dec; 83 | if(t->tbl[(unsigned char)d]==-1) DEADEND(); 84 | t->buf.i=t->tbl[(unsigned char)d]; 85 | t->status=1; 86 | } 87 | } 88 | this_phase->state.status=CONTINUE; 89 | return; 90 | } 91 | -------------------------------------------------------------------------------- /modules/from/ASCII-NUMERIC-HTML-ENTITY.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | > echo '測試' | bsdconv ascii-numeric-html-entity:utf-8 3 | 測試 4 | -------------------------------------------------------------------------------- /modules/from/ASCII-NUMERIC-HTML-ENTITY.txt: -------------------------------------------------------------------------------- 1 | 2623* ? 2 | -------------------------------------------------------------------------------- /modules/from/ASCII.man: -------------------------------------------------------------------------------- 1 | #just ascii (U+0001 ~ U+007F) 2 | -------------------------------------------------------------------------------- /modules/from/ASCII.txt: -------------------------------------------------------------------------------- 1 | 01 0101 2 | 02 0102 3 | 03 0103 4 | 04 0104 5 | 05 0105 6 | 06 0106 7 | 07 0107 8 | 08 0108 9 | 09 0109 10 | 0A 010A 11 | 0B 010B 12 | 0C 010C 13 | 0D 010D 14 | 0E 010E 15 | 0F 010F 16 | 10 0110 17 | 11 0111 18 | 12 0112 19 | 13 0113 20 | 14 0114 21 | 15 0115 22 | 16 0116 23 | 17 0117 24 | 18 0118 25 | 19 0119 26 | 1A 011A 27 | 1B 011B 28 | 1C 011C 29 | 1D 011D 30 | 1E 011E 31 | 1F 011F 32 | 20 0120 33 | 21 0121 34 | 22 0122 35 | 23 0123 36 | 24 0124 37 | 25 0125 38 | 26 0126 39 | 27 0127 40 | 28 0128 41 | 29 0129 42 | 2A 012A 43 | 2B 012B 44 | 2C 012C 45 | 2D 012D 46 | 2E 012E 47 | 2F 012F 48 | 30 0130 49 | 31 0131 50 | 32 0132 51 | 33 0133 52 | 34 0134 53 | 35 0135 54 | 36 0136 55 | 37 0137 56 | 38 0138 57 | 39 0139 58 | 3A 013A 59 | 3B 013B 60 | 3C 013C 61 | 3D 013D 62 | 3E 013E 63 | 3F 013F 64 | 40 0140 65 | 41 0141 66 | 42 0142 67 | 43 0143 68 | 44 0144 69 | 45 0145 70 | 46 0146 71 | 47 0147 72 | 48 0148 73 | 49 0149 74 | 4A 014A 75 | 4B 014B 76 | 4C 014C 77 | 4D 014D 78 | 4E 014E 79 | 4F 014F 80 | 50 0150 81 | 51 0151 82 | 52 0152 83 | 53 0153 84 | 54 0154 85 | 55 0155 86 | 56 0156 87 | 57 0157 88 | 58 0158 89 | 59 0159 90 | 5A 015A 91 | 5B 015B 92 | 5C 015C 93 | 5D 015D 94 | 5E 015E 95 | 5F 015F 96 | 60 0160 97 | 61 0161 98 | 62 0162 99 | 63 0163 100 | 64 0164 101 | 65 0165 102 | 66 0166 103 | 67 0167 104 | 68 0168 105 | 69 0169 106 | 6A 016A 107 | 6B 016B 108 | 6C 016C 109 | 6D 016D 110 | 6E 016E 111 | 6F 016F 112 | 70 0170 113 | 71 0171 114 | 72 0172 115 | 73 0173 116 | 74 0174 117 | 75 0175 118 | 76 0176 119 | 77 0177 120 | 78 0178 121 | 79 0179 122 | 7A 017A 123 | 7B 017B 124 | 7C 017C 125 | 7D 017D 126 | 7E 017E 127 | 7F 017F 128 | -------------------------------------------------------------------------------- /modules/from/BIG5-5C.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | > perl -e 'print "\xa5\x5c\x5c\xaf\xe0"' | bsdconv big5:utf-8 3 | 功\能 4 | > perl -e 'print "\xa5\x5c\x5c\xaf\xe0"' | bsdconv big5-5c,big5:utf-8 5 | 功能 6 | -------------------------------------------------------------------------------- /modules/from/BIG5-5C.txt: -------------------------------------------------------------------------------- 1 | #extract from cp950 2 | A15C5C 01FE4F 3 | A25C5C 01515D 4 | A35C5C 0103B1 5 | A45C5C 014E48 6 | A55C5C 01529F 7 | A65C5C 015412 8 | A75C5C 01542D 9 | A85C5C 016C94 10 | A95C5C 01577C 11 | AA5C5C 016B7F 12 | AB5C5C 014FDE 13 | AC5C5C 0167AF 14 | AD5C5C 0182D2 15 | AE5C5C 015A09 16 | AF5C5C 0173EE 17 | B05C5C 018C79 18 | B15C5C 015D24 19 | B25C5C 016DDA 20 | B35C5C 018A31 21 | B45C5C 015EC4 22 | B55C5C 017435 23 | B65C5C 018DDA 24 | B75C5C 016127 25 | B85C5C 017A1E 26 | B95C5C 01923E 27 | BA5C5C 01669D 28 | BB5C5C 0184CB 29 | BC5C5C 0158A6 30 | BD5C5C 017A40 31 | BE5C5C 0195B1 32 | BF5C5C 01749E 33 | C05C5C 019910 34 | C15C5C 017E37 35 | C25C5C 0164FA 36 | C35C5C 019EE0 37 | C45C5C 015B40 38 | C55C5C 019ACF 39 | C65C5C 018EA1 40 | C95C5C 015C10 41 | CA5C5C 014F62 42 | CB5C5C 016C7B 43 | CC5C5C 015CA4 44 | CD5C5C 0172D6 45 | CE5C5C 0157A5 46 | CF5C5C 0167E6 47 | D05C5C 0180D0 48 | D15C5C 015A16 49 | D25C5C 016D82 50 | D35C5C 017F61 51 | D45C5C 015045 52 | D55C5C 0160DD 53 | D65C5C 01727E 54 | D75C5C 01838D 55 | D85C5C 01509C 56 | D95C5C 0163CA 57 | DA5C5C 01712E 58 | DB5C5C 01833B 59 | DC5C5C 019103 60 | DD5C5C 015E4B 61 | DE5C5C 016EDC 62 | DF5C5C 017D85 63 | E05C5C 018D68 64 | E15C5C 01587F 65 | E25C5C 0169D9 66 | E35C5C 017BA4 67 | E45C5C 018E0A 68 | E55C5C 015AF9 69 | E65C5C 016F7F 70 | E75C5C 01850C 71 | E85C5C 019186 72 | E95C5C 015B1E 73 | EA5C5C 017366 74 | EB5C5C 01878F 75 | EC5C5C 019924 76 | ED5C5C 0171E1 77 | EE5C5C 0187B0 78 | EF5C5C 0199F9 79 | F05C5C 017912 80 | F15C5C 0193AA 81 | F25C5C 017019 82 | F35C5C 019140 83 | F45C5C 017035 84 | F55C5C 019A31 85 | F65C5C 019145 86 | F75C5C 018D15 87 | F85C5C 019C4B 88 | F95C5C 019C6D 89 | -------------------------------------------------------------------------------- /modules/from/BSDCONV-KEYWORD.man: -------------------------------------------------------------------------------- 1 | DESC 2 | Usually used for maintain bsdconv inter-mapping tables 3 | 4 | INPUT 5 | DESC Control characters for bsdconv table format 6 | EXAMPLE \n\t,?\\ 7 | 8 | OUTPUT 9 | DESC Properly escaped form for to/BSDCONV-KEYWORD 10 | 11 | EXAMPLE 12 | > printf "測,試\t测,试\n" | bsdconv bsdconv-keyword,utf-8:bsdconv-keyword,bsdconv 13 | 016E2C,018A66 016D4B,018BD5 14 | > printf "測,試\t测,试\n" | bsdconv bsdconv-keyword,utf-8:bsdconv-keyword,bsdconv | bsdconv bsdconv-keyword,bsdconv:bsdconv-keyword,utf-8 15 | 測,試 测,试 16 | -------------------------------------------------------------------------------- /modules/from/BSDCONV-KEYWORD.txt: -------------------------------------------------------------------------------- 1 | 2C 002C 2 | 09 0009 3 | 20 0020 4 | 3F 003F 5 | 0A 000A 6 | 0D 000D 7 | 303132302C 005C20,002C 8 | 303132432C 005C2C,002C 9 | 303135432C 005C5C,002C 10 | 5C20 0120 11 | 5C2C 012C 12 | 5C5C 015C 13 | -------------------------------------------------------------------------------- /modules/from/BSDCONV-LOG.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "../../src/bsdconv.h" 4 | 5 | #define TAILIZE(p) while(*p){ p++ ;} 6 | 7 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){ 8 | THIS_CODEC(ins)->priv=fopen(getenv("BSDCONV_FROM_LOG"),"a"); 9 | return 0; 10 | } 11 | 12 | void cbdestroy(struct bsdconv_instance *ins){ 13 | void *fp=THIS_CODEC(ins)->priv; 14 | fclose(fp); 15 | } 16 | 17 | void cbconv(struct bsdconv_instance *ins){ 18 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 19 | FILE *fp=this_phase->codec[this_phase->index].priv; 20 | fprintf(fp,"%02X\n", (int)UCP(this_phase->curr->data)[this_phase->i]); 21 | this_phase->state.status=NEXTPHASE; 22 | fflush(fp); 23 | } 24 | -------------------------------------------------------------------------------- /modules/from/BSDCONV-LOG.man: -------------------------------------------------------------------------------- 1 | INPUT 2 | DESC Any 3 | 4 | OUTPUT 5 | DESC None, data is written into log file 6 | 7 | ENV 8 | BSDCONV_FROM_LOG log file 9 | 10 | EXAMPLE 11 | #background debug tool 12 | > echo 測試 | env BSDCONV_FROM_LOG=/tmp/bsdconv.log bsdconv bsdconv-log:utf-8 13 | > cat /tmp/bsdconv.log 14 | E6 15 | B8 16 | AC 17 | E8 18 | A9 19 | A6 20 | 0A 21 | -------------------------------------------------------------------------------- /modules/from/BSDCONV-LOG.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/from/BSDCONV.man: -------------------------------------------------------------------------------- 1 | INPUT 2 | DESC internal format in hexadecimal form 3 | EXAMPLE 019644 4 | 5 | OUTPUT 6 | DESC binary form of input 7 | 8 | EXAMPLE 9 | > echo 016e2c|bsdconv bsdconv:utf-8 10 | 測 11 | > echo 016e2c,018a66 | bsdconv bsdconv:utf-8 12 | 測試 13 | -------------------------------------------------------------------------------- /modules/from/BSDCONV.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/from/BYTE.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | > perl -e 'print "\xaa\xbb\xcc\xdd"' | bsdconv byte:byte|hexdump -C 3 | 00000000 aa bb cc dd |....| 4 | -------------------------------------------------------------------------------- /modules/from/CCCII.man: -------------------------------------------------------------------------------- 1 | #just CCCII, it's incompatible with ASCII 2 | -------------------------------------------------------------------------------- /modules/from/CP950-UDA.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "../../src/bsdconv.h" 4 | 5 | // Ref: http://kanji-database.sourceforge.net/charcode/big5.html 6 | 7 | struct my_s{ 8 | int h; 9 | int x; 10 | int y; 11 | }; 12 | 13 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){ 14 | struct my_s *r = malloc(sizeof(struct my_s)); 15 | THIS_CODEC(ins)->priv = r; 16 | r->h = 0; 17 | return 0; 18 | } 19 | 20 | void cbdestroy(struct bsdconv_instance *ins){ 21 | struct my_s *r = THIS_CODEC(ins)->priv; 22 | free(r); 23 | } 24 | 25 | void cbconv(struct bsdconv_instance *ins){ 26 | struct bsdconv_phase *this_phase = THIS_PHASE(ins); 27 | struct my_s *r = THIS_CODEC(ins)->priv; 28 | 29 | unsigned char d = UCP(this_phase->curr->data)[this_phase->i]; 30 | 31 | if(r->h==0){ 32 | if(d>=0xFA && d<=0xFE){ 33 | r->h = d; 34 | r->x = 0xE000; 35 | r->y = 0xFA; 36 | this_phase->state.status = CONTINUE; 37 | return; 38 | }else if(d>=0x8E && d<=0xA0){ 39 | r->h = d; 40 | r->x = 0xE311; 41 | r->y = 0x8E; 42 | this_phase->state.status = CONTINUE; 43 | return; 44 | }else if(d>=0x81 && d<=0x8D){ 45 | r->h = d; 46 | r->x = 0xEEB8; 47 | r->y = 0x81; 48 | this_phase->state.status = CONTINUE; 49 | return; 50 | }else if(d>=0xC6 && d<=0xC8){ 51 | r->h = d; 52 | r->x = 0xF672; 53 | r->y = 0xC6; 54 | this_phase->state.status = CONTINUE; 55 | return; 56 | }else{ 57 | this_phase->state.status = DEADEND; 58 | return; 59 | } 60 | }else{ 61 | uint32_t b = (r->h<<8)|d; 62 | if( 63 | (b>=0xFA40 && b<=0xFEFE) 64 | || 65 | (b>=0x8E40 && b<=0xA0FE) 66 | || 67 | (b>=0x8140 && b<=0x8DFE) 68 | || 69 | (b>=0xC6A1 && b<=0xC8FE) 70 | ){ 71 | uint32_t u = r->x + (157 * (r->h - r->y)) + (d<0x80?d-0x40:d-0x62); 72 | unsigned char *c; 73 | DATA_MALLOC(ins, this_phase->data_tail->next); 74 | this_phase->data_tail=this_phase->data_tail->next; 75 | this_phase->data_tail->next=NULL; 76 | this_phase->data_tail->len=3; 77 | this_phase->data_tail->data=c=malloc(3); 78 | this_phase->data_tail->flags=F_FREE; 79 | this_phase->state.status=NEXTPHASE; 80 | c[0] = 0x01; 81 | c[1] = (u >> 8) & 0xFF; 82 | c[2] = u & 0xFF; 83 | }else{ 84 | this_phase->state.status = DEADEND; 85 | } 86 | r->h = 0; 87 | return; 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /modules/from/CP950-UDA.man: -------------------------------------------------------------------------------- 1 | DESC 2 | Mapping CP950 UDA (User-Defined Areas) to Unicode PUA (Private Use Areas) 3 | -------------------------------------------------------------------------------- /modules/from/CP950-UDA.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/from/EBCDIC.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "../../src/bsdconv.h" 4 | 5 | struct my_s{ 6 | int status; 7 | }; 8 | 9 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){ 10 | struct my_s *r=malloc(sizeof(struct my_s)); 11 | THIS_CODEC(ins)->priv=r; 12 | return 0; 13 | } 14 | 15 | void cbinit(struct bsdconv_instance *ins){ 16 | struct my_s *r=THIS_CODEC(ins)->priv; 17 | r->status=0; 18 | } 19 | 20 | void cbdestroy(struct bsdconv_instance *ins){ 21 | struct my_s *r=THIS_CODEC(ins)->priv; 22 | free(r); 23 | } 24 | 25 | void cbconv(struct bsdconv_instance *ins){ 26 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 27 | struct my_s *t=THIS_CODEC(ins)->priv; 28 | struct data_st data; 29 | unsigned char *c; 30 | 31 | memcpy(&data, (char *)(this_phase->codec[this_phase->index].data_z+(uintptr_t)this_phase->state.data), sizeof(struct data_st)); 32 | c=UCP(this_phase->codec[this_phase->index].data_z+de_offset(data.data)); 33 | 34 | if(data.len==2 && c[0]=='\x01'){ 35 | if(c[1]=='\x0E'){ 36 | t->status=1; 37 | this_phase->state.status=NEXTPHASE; 38 | return; 39 | }else if(c[1]=='\x0F'){ 40 | t->status=0; 41 | this_phase->state.status=NEXTPHASE; 42 | return; 43 | } 44 | } 45 | 46 | if(t->status==0){ 47 | this_phase->state.status=MATCH; 48 | }else{ 49 | this_phase->state.status=SUBMATCH; 50 | } 51 | 52 | return; 53 | } 54 | -------------------------------------------------------------------------------- /modules/from/ESCAPE.man: -------------------------------------------------------------------------------- 1 | INPUT 2 | DESC URL escaped or C-style escaped sequence 3 | EXAMPLE %XX %uUUUU \uUUUU \xXX \OOO 4 | 5 | OUTPUT 6 | TYPE Unicode (for %uUUUU and \uUUUU) or Byte (others) 7 | 8 | EXAMPLE 9 | > echo %u6e2c | bsdconv escape:utf-8 10 | 測 11 | > echo %u6e2c%e8%a9%a6 | bsdconv escape:bsdconv-stdout 12 | 016E2C ( FREE ) 13 | 03E8 ( FREE ) 14 | 03A9 ( FREE ) 15 | 03A6 ( FREE ) 16 | > echo %u6e2c%e8%a9%a6 | bsdconv 'escape:unicode,byte|skip,utf-8:utf-8' 17 | 測試 18 | > echo %u6e2c%b8%d5功能 | bsdconv 'escape,utf-8:unicode,byte|skip,big5:utf-8' 19 | 測試功能 20 | > echo '%u6e2c%b8%d5功能' | bsdconv 'escape,ascii-numeric-html-entity,utf-8:unicode,byte|skip,big5:utf-8' 21 | 測試功能 22 | > echo '\346\270\254\350\251\246' | bsdconv 'escape:unicode,byte|skip,utf-8:utf-8' 23 | 測試 24 | -------------------------------------------------------------------------------- /modules/from/ESCAPE.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/from/FALLBACK-UNICODE.man: -------------------------------------------------------------------------------- 1 | DESC 2 | This could be used with windows codepages to achieve the behavior described in MICSFT/WindowsBestFit 3 | 4 | INPUT 5 | DESC Any 6 | 7 | OUTPUT 8 | TYPE Unicode 9 | DESC Same value of codepoint as input 10 | 11 | EXAMPLE 12 | > printf "\x8E\x81" | bsdconv cp1252:bsdconv-stdout 13 | 01017D 14 | > printf "\x8E\x81" | bsdconv cp1252,fallback-unicode:bsdconv-stdout 15 | 01017D 16 | 0181 17 | -------------------------------------------------------------------------------- /modules/from/IBM-37.man: -------------------------------------------------------------------------------- 1 | #EBCDIC codecs 2 | -------------------------------------------------------------------------------- /modules/from/IBM-930.c: -------------------------------------------------------------------------------- 1 | #include "EBCDIC.c" 2 | -------------------------------------------------------------------------------- /modules/from/IBM-930.man: -------------------------------------------------------------------------------- 1 | .redirect from/IBM-37 2 | -------------------------------------------------------------------------------- /modules/from/IBM-933.c: -------------------------------------------------------------------------------- 1 | #include "EBCDIC.c" 2 | -------------------------------------------------------------------------------- /modules/from/IBM-933.man: -------------------------------------------------------------------------------- 1 | .redirect from/IBM-37 2 | -------------------------------------------------------------------------------- /modules/from/IBM-935.c: -------------------------------------------------------------------------------- 1 | #include "EBCDIC.c" 2 | -------------------------------------------------------------------------------- /modules/from/IBM-935.man: -------------------------------------------------------------------------------- 1 | .redirect from/IBM-37 2 | -------------------------------------------------------------------------------- /modules/from/IBM-937.c: -------------------------------------------------------------------------------- 1 | #include "EBCDIC.c" 2 | -------------------------------------------------------------------------------- /modules/from/IBM-937.man: -------------------------------------------------------------------------------- 1 | .redirect from/IBM-37 2 | -------------------------------------------------------------------------------- /modules/from/IBM-939.c: -------------------------------------------------------------------------------- 1 | #include "EBCDIC.c" 2 | -------------------------------------------------------------------------------- /modules/from/IBM-939.man: -------------------------------------------------------------------------------- 1 | .redirect from/IBM-37 2 | -------------------------------------------------------------------------------- /modules/from/PASS.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "../../src/bsdconv.h" 5 | 6 | struct my_s{ 7 | struct bsdconv_filter *filter; 8 | int unmark; 9 | }; 10 | 11 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){ 12 | struct my_s *r=malloc(sizeof(struct my_s)); 13 | THIS_CODEC(ins)->priv=r; 14 | r->filter=NULL; 15 | r->unmark=0; 16 | 17 | char *filter=NULL; 18 | while(arg){ 19 | if(strcasecmp(arg->key, "UNMARK")==0){ 20 | r->unmark=1; 21 | }else if(strcasecmp(arg->key, "FOR")==0){ 22 | filter=arg->ptr; 23 | }else{ 24 | free(r); 25 | return EINVAL; 26 | } 27 | arg=arg->next; 28 | } 29 | if(filter!=NULL){ 30 | r->filter=load_filter(filter); 31 | if(r->filter==NULL){ 32 | free(r); 33 | return EOPNOTSUPP; 34 | } 35 | } 36 | return 0; 37 | } 38 | 39 | void cbdestroy(struct bsdconv_instance *ins){ 40 | struct my_s *r=THIS_CODEC(ins)->priv; 41 | if(r->filter) 42 | unload_filter(r->filter); 43 | free(r); 44 | } 45 | 46 | void cbconv(struct bsdconv_instance *ins){ 47 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 48 | struct my_s *t=THIS_CODEC(ins)->priv; 49 | int pass=1; 50 | 51 | if(this_phase->i!=0) 52 | pass=0; 53 | else if(t->filter!=NULL && !t->filter->cbfilter(this_phase->curr)) 54 | pass=0; 55 | else if(t->unmark && !(this_phase->curr->flags & F_MARK)) 56 | pass=0; 57 | 58 | if(pass){ 59 | this_phase->data_tail->next=dup_data_rt(ins, this_phase->curr); 60 | this_phase->data_tail=this_phase->data_tail->next; 61 | this_phase->data_tail->next=NULL; 62 | 63 | if(t->unmark) 64 | this_phase->data_tail->flags &= ~F_MARK; 65 | 66 | this_phase->i=this_phase->curr->len-1; 67 | this_phase->state.status=NEXTPHASE; 68 | }else{ 69 | this_phase->state.status=DEADEND; 70 | } 71 | 72 | return; 73 | } 74 | -------------------------------------------------------------------------------- /modules/from/PASS.man: -------------------------------------------------------------------------------- 1 | INPUT 2 | TYPE Any 3 | 4 | OUTPUT 5 | TYPE Any 6 | DESC Filtered or decorated according to arguments 7 | 8 | ARGUMENT 9 | UNMARK 10 | DESC Remove "MARK" flag 11 | FOR 12 | FORMAT Filter 13 | 14 | EXAMPLE 15 | #pass through packets queue 16 | > echo -n abc | bsdconv 'ascii:pass|pass:bsdconv-stdout' 17 | 0161 ( SKIP ) 18 | 0162 ( SKIP ) 19 | 0163 ( SKIP ) 20 | > echo -n 測試 | bsdconv 'utf-8:utf-16le|pass:bsdconv-stdout' 21 | 2C6E ( FREE ) 22 | 668A ( FREE ) 23 | -------------------------------------------------------------------------------- /modules/from/PASS.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/from/UTF-16BE.man: -------------------------------------------------------------------------------- 1 | UTF-16 2 | -------------------------------------------------------------------------------- /modules/from/UTF-16BE.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/from/UTF-16LE.man: -------------------------------------------------------------------------------- 1 | .redirect from/UTF-16BE 2 | -------------------------------------------------------------------------------- /modules/from/UTF-16LE.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/from/UTF-32BE.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "../../src/bsdconv.h" 4 | 5 | 6 | struct my_s{ 7 | int status; 8 | char buf[4]; 9 | }; 10 | 11 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){ 12 | THIS_CODEC(ins)->priv=malloc(sizeof(struct my_s)); 13 | return 0; 14 | } 15 | 16 | void cbinit(struct bsdconv_instance *ins){ 17 | struct my_s *r=THIS_CODEC(ins)->priv; 18 | r->status=0; 19 | } 20 | 21 | void cbdestroy(struct bsdconv_instance *ins){ 22 | struct my_s *p=THIS_CODEC(ins)->priv; 23 | free(p); 24 | } 25 | 26 | void cbconv(struct bsdconv_instance *ins){ 27 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 28 | struct my_s *t=THIS_CODEC(ins)->priv; 29 | char d; 30 | int i; 31 | size_t l; 32 | 33 | for(;this_phase->icurr->len;this_phase->i+=1){ 34 | d=CP(this_phase->curr->data)[this_phase->i]; 35 | switch(t->status){ 36 | case 0: 37 | t->buf[0]=d; 38 | t->status=1; 39 | continue; 40 | break; 41 | case 1: 42 | t->buf[1]=d; 43 | t->status=2; 44 | continue; 45 | break; 46 | case 2: 47 | t->buf[2]=d; 48 | t->status=3; 49 | continue; 50 | break; 51 | case 3: 52 | t->buf[3]=d; 53 | t->status=0; 54 | for(i=0;i<4;++i){ 55 | if(t->buf[i]) break; 56 | } 57 | l=(4-i)+1; 58 | DATA_MALLOC(ins, this_phase->data_tail->next); 59 | this_phase->data_tail=this_phase->data_tail->next; 60 | this_phase->data_tail->next=NULL; 61 | this_phase->data_tail->len=l; 62 | this_phase->data_tail->flags=F_FREE; 63 | this_phase->data_tail->data=malloc(l); 64 | CP(this_phase->data_tail->data)[0]=0x01; 65 | memcpy(CP(this_phase->data_tail->data)+1, &t->buf[i], l-1); 66 | this_phase->state.status=NEXTPHASE; 67 | return; 68 | break; 69 | } 70 | } 71 | this_phase->state.status=CONTINUE; 72 | return; 73 | } 74 | -------------------------------------------------------------------------------- /modules/from/UTF-32BE.man: -------------------------------------------------------------------------------- 1 | UTF-32 2 | -------------------------------------------------------------------------------- /modules/from/UTF-32BE.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/from/UTF-32LE.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "../../src/bsdconv.h" 4 | 5 | 6 | struct my_s{ 7 | int status; 8 | char buf[4]; 9 | }; 10 | 11 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){ 12 | THIS_CODEC(ins)->priv=malloc(sizeof(struct my_s)); 13 | return 0; 14 | } 15 | 16 | void cbinit(struct bsdconv_instance *ins){ 17 | struct my_s *r=THIS_CODEC(ins)->priv; 18 | r->status=0; 19 | } 20 | 21 | void cbdestroy(struct bsdconv_instance *ins){ 22 | struct my_s *p=THIS_CODEC(ins)->priv; 23 | free(p); 24 | } 25 | 26 | void cbconv(struct bsdconv_instance *ins){ 27 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 28 | struct my_s *t=THIS_CODEC(ins)->priv; 29 | char d; 30 | int i; 31 | size_t l; 32 | 33 | for(;this_phase->icurr->len;this_phase->i+=1){ 34 | d=CP(this_phase->curr->data)[this_phase->i]; 35 | switch(t->status){ 36 | case 0: 37 | t->buf[3]=d; 38 | t->status=1; 39 | continue; 40 | break; 41 | case 1: 42 | t->buf[2]=d; 43 | t->status=2; 44 | continue; 45 | break; 46 | case 2: 47 | t->buf[1]=d; 48 | t->status=3; 49 | continue; 50 | break; 51 | case 3: 52 | t->buf[0]=d; 53 | t->status=0; 54 | for(i=0;i<4;++i){ 55 | if(t->buf[i]) break; 56 | } 57 | l=(4-i)+1; 58 | DATA_MALLOC(ins, this_phase->data_tail->next); 59 | this_phase->data_tail=this_phase->data_tail->next; 60 | this_phase->data_tail->next=NULL; 61 | this_phase->data_tail->len=l; 62 | this_phase->data_tail->flags=F_FREE; 63 | this_phase->data_tail->data=malloc(l); 64 | CP(this_phase->data_tail->data)[0]=0x01; 65 | memcpy(CP(this_phase->data_tail->data)+1, &t->buf[i], l-1); 66 | this_phase->state.status=NEXTPHASE; 67 | return; 68 | break; 69 | } 70 | } 71 | this_phase->state.status=CONTINUE; 72 | return; 73 | } 74 | -------------------------------------------------------------------------------- /modules/from/UTF-32LE.man: -------------------------------------------------------------------------------- 1 | .redirect from/UTF-32BE 2 | -------------------------------------------------------------------------------- /modules/from/UTF-32LE.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/from/_BIG5-2003.man: -------------------------------------------------------------------------------- 1 | DESC 2 | Traditional Chinese 3 | ASCII is excluded, use same name without '_' prefixed to get ASCII involved 4 | -------------------------------------------------------------------------------- /modules/from/_BIG5-ETEN.man: -------------------------------------------------------------------------------- 1 | .redirect from/_BIG5-2003 2 | -------------------------------------------------------------------------------- /modules/from/_BIG5E.man: -------------------------------------------------------------------------------- 1 | .redirect from/_BIG5-2003 2 | -------------------------------------------------------------------------------- /modules/from/_CP1251.man: -------------------------------------------------------------------------------- 1 | DESC 2 | cyrillic alphabet 3 | ascii is excluded, use same name without '_' prefixed to get ascii involved 4 | -------------------------------------------------------------------------------- /modules/from/_CP1251.txt: -------------------------------------------------------------------------------- 1 | # http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT 2 | 80 010402 3 | 81 010403 4 | 82 01201A 5 | 83 010453 6 | 84 01201E 7 | 85 012026 8 | 86 012020 9 | 87 012021 10 | 88 0120AC 11 | 89 012030 12 | 8A 010409 13 | 8B 012039 14 | 8C 01040A 15 | 8D 01040C 16 | 8E 01040B 17 | 8F 01040F 18 | 90 010452 19 | 91 012018 20 | 92 012019 21 | 93 01201C 22 | 94 01201D 23 | 95 012022 24 | 96 012013 25 | 97 012014 26 | 99 012122 27 | 9A 010459 28 | 9B 01203A 29 | 9C 01045A 30 | 9D 01045C 31 | 9E 01045B 32 | 9F 01045F 33 | A0 01A0 34 | A1 01040E 35 | A2 01045E 36 | A3 010408 37 | A4 01A4 38 | A5 010490 39 | A6 01A6 40 | A7 01A7 41 | A8 010401 42 | A9 01A9 43 | AA 010404 44 | AB 01AB 45 | AC 01AC 46 | AD 01AD 47 | AE 01AE 48 | AF 010407 49 | B0 01B0 50 | B1 01B1 51 | B2 010406 52 | B3 010456 53 | B4 010491 54 | B5 01B5 55 | B6 01B6 56 | B7 01B7 57 | B8 010451 58 | B9 012116 59 | BA 010454 60 | BB 01BB 61 | BC 010458 62 | BD 010405 63 | BE 010455 64 | BF 010457 65 | C0 010410 66 | C1 010411 67 | C2 010412 68 | C3 010413 69 | C4 010414 70 | C5 010415 71 | C6 010416 72 | C7 010417 73 | C8 010418 74 | C9 010419 75 | CA 01041A 76 | CB 01041B 77 | CC 01041C 78 | CD 01041D 79 | CE 01041E 80 | CF 01041F 81 | D0 010420 82 | D1 010421 83 | D2 010422 84 | D3 010423 85 | D4 010424 86 | D5 010425 87 | D6 010426 88 | D7 010427 89 | D8 010428 90 | D9 010429 91 | DA 01042A 92 | DB 01042B 93 | DC 01042C 94 | DD 01042D 95 | DE 01042E 96 | DF 01042F 97 | E0 010430 98 | E1 010431 99 | E2 010432 100 | E3 010433 101 | E4 010434 102 | E5 010435 103 | E6 010436 104 | E7 010437 105 | E8 010438 106 | E9 010439 107 | EA 01043A 108 | EB 01043B 109 | EC 01043C 110 | ED 01043D 111 | EE 01043E 112 | EF 01043F 113 | F0 010440 114 | F1 010441 115 | F2 010442 116 | F3 010443 117 | F4 010444 118 | F5 010445 119 | F6 010446 120 | F7 010447 121 | F8 010448 122 | F9 010449 123 | FA 01044A 124 | FB 01044B 125 | FC 01044C 126 | FD 01044D 127 | FE 01044E 128 | FF 01044F 129 | -------------------------------------------------------------------------------- /modules/from/_CP1252.man: -------------------------------------------------------------------------------- 1 | DESC 2 | latin alphabet 3 | ascii is excluded, use same name without '_' prefixed to get ascii involved 4 | -------------------------------------------------------------------------------- /modules/from/_CP1252.txt: -------------------------------------------------------------------------------- 1 | # http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT 2 | 80 0120AC 3 | 82 01201A 4 | 83 010192 5 | 84 01201E 6 | 85 012026 7 | 86 012020 8 | 87 012021 9 | 88 0102C6 10 | 89 012030 11 | 8A 010160 12 | 8B 012039 13 | 8C 010152 14 | 8E 01017D 15 | 91 012018 16 | 92 012019 17 | 93 01201C 18 | 94 01201D 19 | 95 012022 20 | 96 012013 21 | 97 012014 22 | 98 0102DC 23 | 99 012122 24 | 9A 010161 25 | 9B 01203A 26 | 9C 010153 27 | 9E 01017E 28 | 9F 010178 29 | A0 01A0 30 | A1 01A1 31 | A2 01A2 32 | A3 01A3 33 | A4 01A4 34 | A5 01A5 35 | A6 01A6 36 | A7 01A7 37 | A8 01A8 38 | A9 01A9 39 | AA 01AA 40 | AB 01AB 41 | AC 01AC 42 | AD 01AD 43 | AE 01AE 44 | AF 01AF 45 | B0 01B0 46 | B1 01B1 47 | B2 01B2 48 | B3 01B3 49 | B4 01B4 50 | B5 01B5 51 | B6 01B6 52 | B7 01B7 53 | B8 01B8 54 | B9 01B9 55 | BA 01BA 56 | BB 01BB 57 | BC 01BC 58 | BD 01BD 59 | BE 01BE 60 | BF 01BF 61 | C0 01C0 62 | C1 01C1 63 | C2 01C2 64 | C3 01C3 65 | C4 01C4 66 | C5 01C5 67 | C6 01C6 68 | C7 01C7 69 | C8 01C8 70 | C9 01C9 71 | CA 01CA 72 | CB 01CB 73 | CC 01CC 74 | CD 01CD 75 | CE 01CE 76 | CF 01CF 77 | D0 01D0 78 | D1 01D1 79 | D2 01D2 80 | D3 01D3 81 | D4 01D4 82 | D5 01D5 83 | D6 01D6 84 | D7 01D7 85 | D8 01D8 86 | D9 01D9 87 | DA 01DA 88 | DB 01DB 89 | DC 01DC 90 | DD 01DD 91 | DE 01DE 92 | DF 01DF 93 | E0 01E0 94 | E1 01E1 95 | E2 01E2 96 | E3 01E3 97 | E4 01E4 98 | E5 01E5 99 | E6 01E6 100 | E7 01E7 101 | E8 01E8 102 | E9 01E9 103 | EA 01EA 104 | EB 01EB 105 | EC 01EC 106 | ED 01ED 107 | EE 01EE 108 | EF 01EF 109 | F0 01F0 110 | F1 01F1 111 | F2 01F2 112 | F3 01F3 113 | F4 01F4 114 | F5 01F5 115 | F6 01F6 116 | F7 01F7 117 | F8 01F8 118 | F9 01F9 119 | FA 01FA 120 | FB 01FB 121 | FC 01FC 122 | FD 01FD 123 | FE 01FE 124 | FF 01FF 125 | -------------------------------------------------------------------------------- /modules/from/_CP1253.man: -------------------------------------------------------------------------------- 1 | DESC 2 | greek alphabet 3 | ascii is excluded, use same name without '_' prefixed to get ascii involved 4 | -------------------------------------------------------------------------------- /modules/from/_CP1253.txt: -------------------------------------------------------------------------------- 1 | # http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1253.TXT 2 | 80 0120AC 3 | 82 01201A 4 | 83 010192 5 | 84 01201E 6 | 85 012026 7 | 86 012020 8 | 87 012021 9 | 89 012030 10 | 8B 012039 11 | 91 012018 12 | 92 012019 13 | 93 01201C 14 | 94 01201D 15 | 95 012022 16 | 96 012013 17 | 97 012014 18 | 99 012122 19 | 9B 01203A 20 | A0 01A0 21 | A1 010385 22 | A2 010386 23 | A3 01A3 24 | A4 01A4 25 | A5 01A5 26 | A6 01A6 27 | A7 01A7 28 | A8 01A8 29 | A9 01A9 30 | AB 01AB 31 | AC 01AC 32 | AD 01AD 33 | AE 01AE 34 | AF 012015 35 | B0 01B0 36 | B1 01B1 37 | B2 01B2 38 | B3 01B3 39 | B4 010384 40 | B5 01B5 41 | B6 01B6 42 | B7 01B7 43 | B8 010388 44 | B9 010389 45 | BA 01038A 46 | BB 01BB 47 | BC 01038C 48 | BD 01BD 49 | BE 01038E 50 | BF 01038F 51 | C0 010390 52 | C1 010391 53 | C2 010392 54 | C3 010393 55 | C4 010394 56 | C5 010395 57 | C6 010396 58 | C7 010397 59 | C8 010398 60 | C9 010399 61 | CA 01039A 62 | CB 01039B 63 | CC 01039C 64 | CD 01039D 65 | CE 01039E 66 | CF 01039F 67 | D0 0103A0 68 | D1 0103A1 69 | D3 0103A3 70 | D4 0103A4 71 | D5 0103A5 72 | D6 0103A6 73 | D7 0103A7 74 | D8 0103A8 75 | D9 0103A9 76 | DA 0103AA 77 | DB 0103AB 78 | DC 0103AC 79 | DD 0103AD 80 | DE 0103AE 81 | DF 0103AF 82 | E0 0103B0 83 | E1 0103B1 84 | E2 0103B2 85 | E3 0103B3 86 | E4 0103B4 87 | E5 0103B5 88 | E6 0103B6 89 | E7 0103B7 90 | E8 0103B8 91 | E9 0103B9 92 | EA 0103BA 93 | EB 0103BB 94 | EC 0103BC 95 | ED 0103BD 96 | EE 0103BE 97 | EF 0103BF 98 | F0 0103C0 99 | F1 0103C1 100 | F2 0103C2 101 | F3 0103C3 102 | F4 0103C4 103 | F5 0103C5 104 | F6 0103C6 105 | F7 0103C7 106 | F8 0103C8 107 | F9 0103C9 108 | FA 0103CA 109 | FB 0103CB 110 | FC 0103CC 111 | FD 0103CD 112 | FE 0103CE 113 | -------------------------------------------------------------------------------- /modules/from/_CP1254.txt: -------------------------------------------------------------------------------- 1 | # http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1254.TXT 2 | 80 0120AC 3 | 82 01201A 4 | 83 010192 5 | 84 01201E 6 | 85 012026 7 | 86 012020 8 | 87 012021 9 | 88 0102C6 10 | 89 012030 11 | 8A 010160 12 | 8B 012039 13 | 8C 010152 14 | 91 012018 15 | 92 012019 16 | 93 01201C 17 | 94 01201D 18 | 95 012022 19 | 96 012013 20 | 97 012014 21 | 98 0102DC 22 | 99 012122 23 | 9A 010161 24 | 9B 01203A 25 | 9C 010153 26 | 9F 010178 27 | A0 01A0 28 | A1 01A1 29 | A2 01A2 30 | A3 01A3 31 | A4 01A4 32 | A5 01A5 33 | A6 01A6 34 | A7 01A7 35 | A8 01A8 36 | A9 01A9 37 | AA 01AA 38 | AB 01AB 39 | AC 01AC 40 | AD 01AD 41 | AE 01AE 42 | AF 01AF 43 | B0 01B0 44 | B1 01B1 45 | B2 01B2 46 | B3 01B3 47 | B4 01B4 48 | B5 01B5 49 | B6 01B6 50 | B7 01B7 51 | B8 01B8 52 | B9 01B9 53 | BA 01BA 54 | BB 01BB 55 | BC 01BC 56 | BD 01BD 57 | BE 01BE 58 | BF 01BF 59 | C0 01C0 60 | C1 01C1 61 | C2 01C2 62 | C3 01C3 63 | C4 01C4 64 | C5 01C5 65 | C6 01C6 66 | C7 01C7 67 | C8 01C8 68 | C9 01C9 69 | CA 01CA 70 | CB 01CB 71 | CC 01CC 72 | CD 01CD 73 | CE 01CE 74 | CF 01CF 75 | D0 01011E 76 | D1 01D1 77 | D2 01D2 78 | D3 01D3 79 | D4 01D4 80 | D5 01D5 81 | D6 01D6 82 | D7 01D7 83 | D8 01D8 84 | D9 01D9 85 | DA 01DA 86 | DB 01DB 87 | DC 01DC 88 | DD 010130 89 | DE 01015E 90 | DF 01DF 91 | E0 01E0 92 | E1 01E1 93 | E2 01E2 94 | E3 01E3 95 | E4 01E4 96 | E5 01E5 97 | E6 01E6 98 | E7 01E7 99 | E8 01E8 100 | E9 01E9 101 | EA 01EA 102 | EB 01EB 103 | EC 01EC 104 | ED 01ED 105 | EE 01EE 106 | EF 01EF 107 | F0 01011F 108 | F1 01F1 109 | F2 01F2 110 | F3 01F3 111 | F4 01F4 112 | F5 01F5 113 | F6 01F6 114 | F7 01F7 115 | F8 01F8 116 | F9 01F9 117 | FA 01FA 118 | FB 01FB 119 | FC 01FC 120 | FD 010131 121 | FE 01015F 122 | FF 01FF 123 | -------------------------------------------------------------------------------- /modules/from/_CP1255.txt: -------------------------------------------------------------------------------- 1 | # http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1255.TXT 2 | 80 0120AC 3 | 82 01201A 4 | 83 010192 5 | 84 01201E 6 | 85 012026 7 | 86 012020 8 | 87 012021 9 | 88 0102C6 10 | 89 012030 11 | 8B 012039 12 | 91 012018 13 | 92 012019 14 | 93 01201C 15 | 94 01201D 16 | 95 012022 17 | 96 012013 18 | 97 012014 19 | 98 0102DC 20 | 99 012122 21 | 9B 01203A 22 | A0 01A0 23 | A1 01A1 24 | A2 01A2 25 | A3 01A3 26 | A4 0120AA 27 | A5 01A5 28 | A6 01A6 29 | A7 01A7 30 | A8 01A8 31 | A9 01A9 32 | AA 01D7 33 | AB 01AB 34 | AC 01AC 35 | AD 01AD 36 | AE 01AE 37 | AF 01AF 38 | B0 01B0 39 | B1 01B1 40 | B2 01B2 41 | B3 01B3 42 | B4 01B4 43 | B5 01B5 44 | B6 01B6 45 | B7 01B7 46 | B8 01B8 47 | B9 01B9 48 | BA 01F7 49 | BB 01BB 50 | BC 01BC 51 | BD 01BD 52 | BE 01BE 53 | BF 01BF 54 | C0 0105B0 55 | C1 0105B1 56 | C2 0105B2 57 | C3 0105B3 58 | C4 0105B4 59 | C5 0105B5 60 | C6 0105B6 61 | C7 0105B7 62 | C8 0105B8 63 | C9 0105B9 64 | CB 0105BB 65 | CC 0105BC 66 | CD 0105BD 67 | CE 0105BE 68 | CF 0105BF 69 | D0 0105C0 70 | D1 0105C1 71 | D2 0105C2 72 | D3 0105C3 73 | D4 0105F0 74 | D5 0105F1 75 | D6 0105F2 76 | D7 0105F3 77 | D8 0105F4 78 | E0 0105D0 79 | E1 0105D1 80 | E2 0105D2 81 | E3 0105D3 82 | E4 0105D4 83 | E5 0105D5 84 | E6 0105D6 85 | E7 0105D7 86 | E8 0105D8 87 | E9 0105D9 88 | EA 0105DA 89 | EB 0105DB 90 | EC 0105DC 91 | ED 0105DD 92 | EE 0105DE 93 | EF 0105DF 94 | F0 0105E0 95 | F1 0105E1 96 | F2 0105E2 97 | F3 0105E3 98 | F4 0105E4 99 | F5 0105E5 100 | F6 0105E6 101 | F7 0105E7 102 | F8 0105E8 103 | F9 0105E9 104 | FA 0105EA 105 | FD 01200E 106 | FE 01200F 107 | -------------------------------------------------------------------------------- /modules/from/_CP1256.txt: -------------------------------------------------------------------------------- 1 | # http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1256.TXT 2 | 80 0120AC 3 | 81 01067E 4 | 82 01201A 5 | 83 010192 6 | 84 01201E 7 | 85 012026 8 | 86 012020 9 | 87 012021 10 | 88 0102C6 11 | 89 012030 12 | 8A 010679 13 | 8B 012039 14 | 8C 010152 15 | 8D 010686 16 | 8E 010698 17 | 8F 010688 18 | 90 0106AF 19 | 91 012018 20 | 92 012019 21 | 93 01201C 22 | 94 01201D 23 | 95 012022 24 | 96 012013 25 | 97 012014 26 | 98 0106A9 27 | 99 012122 28 | 9A 010691 29 | 9B 01203A 30 | 9C 010153 31 | 9D 01200C 32 | 9E 01200D 33 | 9F 0106BA 34 | A0 01A0 35 | A1 01060C 36 | A2 01A2 37 | A3 01A3 38 | A4 01A4 39 | A5 01A5 40 | A6 01A6 41 | A7 01A7 42 | A8 01A8 43 | A9 01A9 44 | AA 0106BE 45 | AB 01AB 46 | AC 01AC 47 | AD 01AD 48 | AE 01AE 49 | AF 01AF 50 | B0 01B0 51 | B1 01B1 52 | B2 01B2 53 | B3 01B3 54 | B4 01B4 55 | B5 01B5 56 | B6 01B6 57 | B7 01B7 58 | B8 01B8 59 | B9 01B9 60 | BA 01061B 61 | BB 01BB 62 | BC 01BC 63 | BD 01BD 64 | BE 01BE 65 | BF 01061F 66 | C0 0106C1 67 | C1 010621 68 | C2 010622 69 | C3 010623 70 | C4 010624 71 | C5 010625 72 | C6 010626 73 | C7 010627 74 | C8 010628 75 | C9 010629 76 | CA 01062A 77 | CB 01062B 78 | CC 01062C 79 | CD 01062D 80 | CE 01062E 81 | CF 01062F 82 | D0 010630 83 | D1 010631 84 | D2 010632 85 | D3 010633 86 | D4 010634 87 | D5 010635 88 | D6 010636 89 | D7 01D7 90 | D8 010637 91 | D9 010638 92 | DA 010639 93 | DB 01063A 94 | DC 010640 95 | DD 010641 96 | DE 010642 97 | DF 010643 98 | E0 01E0 99 | E1 010644 100 | E2 01E2 101 | E3 010645 102 | E4 010646 103 | E5 010647 104 | E6 010648 105 | E7 01E7 106 | E8 01E8 107 | E9 01E9 108 | EA 01EA 109 | EB 01EB 110 | EC 010649 111 | ED 01064A 112 | EE 01EE 113 | EF 01EF 114 | F0 01064B 115 | F1 01064C 116 | F2 01064D 117 | F3 01064E 118 | F4 01F4 119 | F5 01064F 120 | F6 010650 121 | F7 01F7 122 | F8 010651 123 | F9 01F9 124 | FA 010652 125 | FB 01FB 126 | FC 01FC 127 | FD 01200E 128 | FE 01200F 129 | FF 0106D2 130 | -------------------------------------------------------------------------------- /modules/from/_CP1257.txt: -------------------------------------------------------------------------------- 1 | # http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1257.TXT 2 | 80 0120AC 3 | 82 01201A 4 | 84 01201E 5 | 85 012026 6 | 86 012020 7 | 87 012021 8 | 89 012030 9 | 8B 012039 10 | 8D 01A8 11 | 8E 0102C7 12 | 8F 01B8 13 | 91 012018 14 | 92 012019 15 | 93 01201C 16 | 94 01201D 17 | 95 012022 18 | 96 012013 19 | 97 012014 20 | 99 012122 21 | 9B 01203A 22 | 9D 01AF 23 | 9E 0102DB 24 | A0 01A0 25 | A2 01A2 26 | A3 01A3 27 | A4 01A4 28 | A6 01A6 29 | A7 01A7 30 | A8 01D8 31 | A9 01A9 32 | AA 010156 33 | AB 01AB 34 | AC 01AC 35 | AD 01AD 36 | AE 01AE 37 | AF 01C6 38 | B0 01B0 39 | B1 01B1 40 | B2 01B2 41 | B3 01B3 42 | B4 01B4 43 | B5 01B5 44 | B6 01B6 45 | B7 01B7 46 | B8 01F8 47 | B9 01B9 48 | BA 010157 49 | BB 01BB 50 | BC 01BC 51 | BD 01BD 52 | BE 01BE 53 | BF 01E6 54 | C0 010104 55 | C1 01012E 56 | C2 010100 57 | C3 010106 58 | C4 01C4 59 | C5 01C5 60 | C6 010118 61 | C7 010112 62 | C8 01010C 63 | C9 01C9 64 | CA 010179 65 | CB 010116 66 | CC 010122 67 | CD 010136 68 | CE 01012A 69 | CF 01013B 70 | D0 010160 71 | D1 010143 72 | D2 010145 73 | D3 01D3 74 | D4 01014C 75 | D5 01D5 76 | D6 01D6 77 | D7 01D7 78 | D8 010172 79 | D9 010141 80 | DA 01015A 81 | DB 01016A 82 | DC 01DC 83 | DD 01017B 84 | DE 01017D 85 | DF 01DF 86 | E0 010105 87 | E1 01012F 88 | E2 010101 89 | E3 010107 90 | E4 01E4 91 | E5 01E5 92 | E6 010119 93 | E7 010113 94 | E8 01010D 95 | E9 01E9 96 | EA 01017A 97 | EB 010117 98 | EC 010123 99 | ED 010137 100 | EE 01012B 101 | EF 01013C 102 | F0 010161 103 | F1 010144 104 | F2 010146 105 | F3 01F3 106 | F4 01014D 107 | F5 01F5 108 | F6 01F6 109 | F7 01F7 110 | F8 010173 111 | F9 010142 112 | FA 01015B 113 | FB 01016B 114 | FC 01FC 115 | FD 01017C 116 | FE 01017E 117 | FF 0102D9 118 | -------------------------------------------------------------------------------- /modules/from/_CP1258.txt: -------------------------------------------------------------------------------- 1 | # http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1258.TXT 2 | 80 0120AC 3 | 82 01201A 4 | 83 010192 5 | 84 01201E 6 | 85 012026 7 | 86 012020 8 | 87 012021 9 | 88 0102C6 10 | 89 012030 11 | 8B 012039 12 | 8C 010152 13 | 91 012018 14 | 92 012019 15 | 93 01201C 16 | 94 01201D 17 | 95 012022 18 | 96 012013 19 | 97 012014 20 | 98 0102DC 21 | 99 012122 22 | 9B 01203A 23 | 9C 010153 24 | 9F 010178 25 | A0 01A0 26 | A1 01A1 27 | A2 01A2 28 | A3 01A3 29 | A4 01A4 30 | A5 01A5 31 | A6 01A6 32 | A7 01A7 33 | A8 01A8 34 | A9 01A9 35 | AA 01AA 36 | AB 01AB 37 | AC 01AC 38 | AD 01AD 39 | AE 01AE 40 | AF 01AF 41 | B0 01B0 42 | B1 01B1 43 | B2 01B2 44 | B3 01B3 45 | B4 01B4 46 | B5 01B5 47 | B6 01B6 48 | B7 01B7 49 | B8 01B8 50 | B9 01B9 51 | BA 01BA 52 | BB 01BB 53 | BC 01BC 54 | BD 01BD 55 | BE 01BE 56 | BF 01BF 57 | C0 01C0 58 | C1 01C1 59 | C2 01C2 60 | C3 010102 61 | C4 01C4 62 | C5 01C5 63 | C6 01C6 64 | C7 01C7 65 | C8 01C8 66 | C9 01C9 67 | CA 01CA 68 | CB 01CB 69 | CC 010300 70 | CD 01CD 71 | CE 01CE 72 | CF 01CF 73 | D0 010110 74 | D1 01D1 75 | D2 010309 76 | D3 01D3 77 | D4 01D4 78 | D5 0101A0 79 | D6 01D6 80 | D7 01D7 81 | D8 01D8 82 | D9 01D9 83 | DA 01DA 84 | DB 01DB 85 | DC 01DC 86 | DD 0101AF 87 | DE 010303 88 | DF 01DF 89 | E0 01E0 90 | E1 01E1 91 | E2 01E2 92 | E3 010103 93 | E4 01E4 94 | E5 01E5 95 | E6 01E6 96 | E7 01E7 97 | E8 01E8 98 | E9 01E9 99 | EA 01EA 100 | EB 01EB 101 | EC 010301 102 | ED 01ED 103 | EE 01EE 104 | EF 01EF 105 | F0 010111 106 | F1 01F1 107 | F2 010323 108 | F3 01F3 109 | F4 01F4 110 | F5 0101A1 111 | F6 01F6 112 | F7 01F7 113 | F8 01F8 114 | F9 01F9 115 | FA 01FA 116 | FB 01FB 117 | FC 01FC 118 | FD 0101B0 119 | FE 0120AB 120 | FF 01FF 121 | -------------------------------------------------------------------------------- /modules/from/_CP874.man: -------------------------------------------------------------------------------- 1 | DESC 2 | thai characters 3 | ascii is excluded, use same name without '_' prefixed to get ascii involved 4 | -------------------------------------------------------------------------------- /modules/from/_CP874.txt: -------------------------------------------------------------------------------- 1 | #http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP874.TXT 2 | 80 0120AC 3 | 85 012026 4 | 91 012018 5 | 92 012019 6 | 93 01201C 7 | 94 01201D 8 | 95 012022 9 | 96 012013 10 | 97 012014 11 | A0 01A0 12 | A1 010E01 13 | A2 010E02 14 | A3 010E03 15 | A4 010E04 16 | A5 010E05 17 | A6 010E06 18 | A7 010E07 19 | A8 010E08 20 | A9 010E09 21 | AA 010E0A 22 | AB 010E0B 23 | AC 010E0C 24 | AD 010E0D 25 | AE 010E0E 26 | AF 010E0F 27 | B0 010E10 28 | B1 010E11 29 | B2 010E12 30 | B3 010E13 31 | B4 010E14 32 | B5 010E15 33 | B6 010E16 34 | B7 010E17 35 | B8 010E18 36 | B9 010E19 37 | BA 010E1A 38 | BB 010E1B 39 | BC 010E1C 40 | BD 010E1D 41 | BE 010E1E 42 | BF 010E1F 43 | C0 010E20 44 | C1 010E21 45 | C2 010E22 46 | C3 010E23 47 | C4 010E24 48 | C5 010E25 49 | C6 010E26 50 | C7 010E27 51 | C8 010E28 52 | C9 010E29 53 | CA 010E2A 54 | CB 010E2B 55 | CC 010E2C 56 | CD 010E2D 57 | CE 010E2E 58 | CF 010E2F 59 | D0 010E30 60 | D1 010E31 61 | D2 010E32 62 | D3 010E33 63 | D4 010E34 64 | D5 010E35 65 | D6 010E36 66 | D7 010E37 67 | D8 010E38 68 | D9 010E39 69 | DA 010E3A 70 | DF 010E3F 71 | E0 010E40 72 | E1 010E41 73 | E2 010E42 74 | E3 010E43 75 | E4 010E44 76 | E5 010E45 77 | E6 010E46 78 | E7 010E47 79 | E8 010E48 80 | E9 010E49 81 | EA 010E4A 82 | EB 010E4B 83 | EC 010E4C 84 | ED 010E4D 85 | EE 010E4E 86 | EF 010E4F 87 | F0 010E50 88 | F1 010E51 89 | F2 010E52 90 | F3 010E53 91 | F4 010E54 92 | F5 010E55 93 | F6 010E56 94 | F7 010E57 95 | F8 010E58 96 | F9 010E59 97 | FA 010E5A 98 | FB 010E5B 99 | -------------------------------------------------------------------------------- /modules/from/_CP932.man: -------------------------------------------------------------------------------- 1 | DESC 2 | japanese characters 3 | ascii is excluded, use same name without '_' prefixed to get ascii involved 4 | -------------------------------------------------------------------------------- /modules/from/_CP936.man: -------------------------------------------------------------------------------- 1 | DESC 2 | simplified chinese 3 | ascii is excluded, use same name without '_' prefixed to get ascii involved 4 | cp936_trans is for transliteration 5 | -------------------------------------------------------------------------------- /modules/from/_CP949.man: -------------------------------------------------------------------------------- 1 | DESC 2 | korean characters 3 | ascii is excluded, use same name without '_' prefixed to get ascii involved 4 | -------------------------------------------------------------------------------- /modules/from/_CP950.man: -------------------------------------------------------------------------------- 1 | #traditional chinese 2 | 3 | EXAMPLE 4 | #ascii is excluded, use same name without '_' prefixed to get ascii involved 5 | > perl -e 'print "\xa5\x5c\xaf\xe0"' | bsdconv _cp950:utf-8 6 | 功能 7 | > echo 喆 | bsdconv utf-8:_cp950,ascii | hexdump -C 8 | 00000000 0a |.| 9 | > echo 喆 | bsdconv utf-8:_uao241,ascii | hexdump -C 10 | 00000000 95 ed 0a |...| 11 | > echo 测试 | bsdconv utf-8:big5 | hexdump -C 12 | 00000000 0a |.| 13 | #cp950_trans is for transliteration 14 | > echo 测试 | bsdconv utf-8:big5,cp950_trans | hexdump -C 15 | 00000000 b4 fa b8 d5 0a |.....| 16 | 17 | SEEALSO 18 | from/CP950-UDA for UDA to PUA 19 | -------------------------------------------------------------------------------- /modules/from/_GB18030.man: -------------------------------------------------------------------------------- 1 | .redirect from/_CP936 2 | -------------------------------------------------------------------------------- /modules/from/_GB2312.man: -------------------------------------------------------------------------------- 1 | .redirect from/_CP936 2 | -------------------------------------------------------------------------------- /modules/from/_GBK.man: -------------------------------------------------------------------------------- 1 | .redirect from/_CP936 2 | -------------------------------------------------------------------------------- /modules/from/_ISO-8859-1.man: -------------------------------------------------------------------------------- 1 | DESC 2 | latin alphabet 3 | ascii is excluded, use same name without '_' prefixed to get ascii involved 4 | -------------------------------------------------------------------------------- /modules/from/_ISO-8859-1.txt: -------------------------------------------------------------------------------- 1 | A0 01A0 2 | A1 01A1 3 | A2 01A2 4 | A3 01A3 5 | A4 01A4 6 | A5 01A5 7 | A6 01A6 8 | A7 01A7 9 | A8 01A8 10 | A9 01A9 11 | AA 01AA 12 | AB 01AB 13 | AC 01AC 14 | AD 01AD 15 | AE 01AE 16 | AF 01AF 17 | B0 01B0 18 | B1 01B1 19 | B2 01B2 20 | B3 01B3 21 | B4 01B4 22 | B5 01B5 23 | B6 01B6 24 | B7 01B7 25 | B8 01B8 26 | B9 01B9 27 | BA 01BA 28 | BB 01BB 29 | BC 01BC 30 | BD 01BD 31 | BE 01BE 32 | BF 01BF 33 | C0 01C0 34 | C1 01C1 35 | C2 01C2 36 | C3 01C3 37 | C4 01C4 38 | C5 01C5 39 | C6 01C6 40 | C7 01C7 41 | C8 01C8 42 | C9 01C9 43 | CA 01CA 44 | CB 01CB 45 | CC 01CC 46 | CD 01CD 47 | CE 01CE 48 | CF 01CF 49 | D0 01D0 50 | D1 01D1 51 | D2 01D2 52 | D3 01D3 53 | D4 01D4 54 | D5 01D5 55 | D6 01D6 56 | D7 01D7 57 | D8 01D8 58 | D9 01D9 59 | DA 01DA 60 | DB 01DB 61 | DC 01DC 62 | DD 01DD 63 | DE 01DE 64 | DF 01DF 65 | E0 01E0 66 | E1 01E1 67 | E2 01E2 68 | E3 01E3 69 | E4 01E4 70 | E5 01E5 71 | E6 01E6 72 | E7 01E7 73 | E8 01E8 74 | E9 01E9 75 | EA 01EA 76 | EB 01EB 77 | EC 01EC 78 | ED 01ED 79 | EE 01EE 80 | EF 01EF 81 | F0 01F0 82 | F1 01F1 83 | F2 01F2 84 | F3 01F3 85 | F4 01F4 86 | F5 01F5 87 | F6 01F6 88 | F7 01F7 89 | F8 01F8 90 | F9 01F9 91 | FA 01FA 92 | FB 01FB 93 | FC 01FC 94 | FD 01FD 95 | FE 01FE 96 | FF 01FF 97 | -------------------------------------------------------------------------------- /modules/from/_JIS.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/from/_JIS0201.txt: -------------------------------------------------------------------------------- 1 | # source: ftp://unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0201.TXT 2 | 5C 01A5 3 | 7E 01203E 4 | A1 01FF61 5 | A2 01FF62 6 | A3 01FF63 7 | A4 01FF64 8 | A5 01FF65 9 | A6 01FF66 10 | A7 01FF67 11 | A8 01FF68 12 | A9 01FF69 13 | AA 01FF6A 14 | AB 01FF6B 15 | AC 01FF6C 16 | AD 01FF6D 17 | AE 01FF6E 18 | AF 01FF6F 19 | B0 01FF70 20 | B1 01FF71 21 | B2 01FF72 22 | B3 01FF73 23 | B4 01FF74 24 | B5 01FF75 25 | B6 01FF76 26 | B7 01FF77 27 | B8 01FF78 28 | B9 01FF79 29 | BA 01FF7A 30 | BB 01FF7B 31 | BC 01FF7C 32 | BD 01FF7D 33 | BE 01FF7E 34 | BF 01FF7F 35 | C0 01FF80 36 | C1 01FF81 37 | C2 01FF82 38 | C3 01FF83 39 | C4 01FF84 40 | C5 01FF85 41 | C6 01FF86 42 | C7 01FF87 43 | C8 01FF88 44 | C9 01FF89 45 | CA 01FF8A 46 | CB 01FF8B 47 | CC 01FF8C 48 | CD 01FF8D 49 | CE 01FF8E 50 | CF 01FF8F 51 | D0 01FF90 52 | D1 01FF91 53 | D2 01FF92 54 | D3 01FF93 55 | D4 01FF94 56 | D5 01FF95 57 | D6 01FF96 58 | D7 01FF97 59 | D8 01FF98 60 | D9 01FF99 61 | DA 01FF9A 62 | DB 01FF9B 63 | DC 01FF9C 64 | DD 01FF9D 65 | DE 01FF9E 66 | DF 01FF9F 67 | -------------------------------------------------------------------------------- /modules/from/_SHIFT-JIS.man: -------------------------------------------------------------------------------- 1 | DESC 2 | japanese 3 | ascii is excluded, use same name without '_' prefixed to get ascii involved 4 | -------------------------------------------------------------------------------- /modules/from/_UAO241.man: -------------------------------------------------------------------------------- 1 | .redirect from/_CP950 2 | -------------------------------------------------------------------------------- /modules/from/_UAO250.man: -------------------------------------------------------------------------------- 1 | .redirect from/_CP950 2 | -------------------------------------------------------------------------------- /modules/from/_UTF-8.man: -------------------------------------------------------------------------------- 1 | UTF-8, ASCII excluded 2 | 3 | ARGUMENT 4 | CESU 5 | DESC Decode surrogate pairs 6 | LOOSE 7 | DESC Accept undecoded surrogates 8 | NUL 9 | DESC Accept NUL character, must used with OVERLONG, since ASCII is excluded, so they must be overlong 10 | OVERLONG 11 | DESC Accept overlong sequence 12 | SUPER 13 | DESC Accept code point over U+10FFFF 14 | -------------------------------------------------------------------------------- /modules/from/_UTF-8.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/from/alias: -------------------------------------------------------------------------------- 1 | BIG5 UAO250 2 | BIG5E ASCII,_BIG5E 3 | BIG5-2003 ASCII,_BIG5-2003 4 | BIG5-ETEN ASCII,_BIG5-ETEN 5 | MOZ18 UAO241 6 | HTMLENTITY ASCII-NUMERIC-HTML-ENTITY 7 | NCR ASCII-NUMERIC-HTML-ENTITY 8 | NAMED-HTMLENTITY ASCII-NAMED-HTML-ENTITY 9 | UCS-2 UTF-16LE 10 | UCS-2BE UTF-16BE 11 | UCS-2LE UTF-16LE 12 | CNS11643 ASCII,_CNS11643 13 | CP1251 _CP1251,ASCII,FALLBACK-UNICODE 14 | CP1252 _CP1252,ASCII,FALLBACK-UNICODE 15 | CP1253 _CP1253,ASCII,FALLBACK-UNICODE 16 | CP1254 _CP1254,ASCII,FALLBACK-UNICODE 17 | CP1255 _CP1255,ASCII,FALLBACK-UNICODE 18 | CP1256 _CP1256,ASCII,FALLBACK-UNICODE 19 | CP1257 _CP1257,ASCII,FALLBACK-UNICODE 20 | CP1258 _CP1258,ASCII,FALLBACK-UNICODE 21 | CP874 ASCII,_CP874,FALLBACK-UNICODE 22 | CP932 ASCII,_CP932 23 | CP936 ASCII,_CP936 24 | CP949 ASCII,_CP949 25 | CP950 ASCII,_CP950,CP950-UDA 26 | GB18030 ASCII,_GB18030 27 | GB2312 ASCII,_GB2312 28 | GBK ASCII,_GBK 29 | HKSCS1999 ASCII,_CP950,_HKSCS1999 30 | HKSCS2001 ASCII,_CP950,_HKSCS2001 31 | HKSCS2004 ASCII,_CP950,_HKSCS2004 32 | ISO-8859-1 ASCII,_ISO-8859-1 33 | JIS _JIS,ASCII 34 | LATIN1 ISO-8859-1 35 | SHIFT-JIS ASCII,_SHIFT-JIS 36 | UAO241 ASCII,_UAO241 37 | UAO250 ASCII,_UAO250 38 | UTF-8 ASCII,_UTF-8 39 | 3F ANY#013F&ERROR 40 | SUB ANY#01FFFD 41 | # backward compatibility 42 | SKIP PASS#UNMARK 43 | -------------------------------------------------------------------------------- /modules/inter/ALIAS-FILTER.txt: -------------------------------------------------------------------------------- 1 | 0130,0131 0155,014E,0149,0143,014F,0144,0145 2 | 0131 0155,014E,0149,0143,014F,0144,0145 3 | 0130,0132 0143,014E,0153,0131,0131,0136,0134,0133 4 | 0132 0143,014E,0153,0131,0131,0136,0134,0133 5 | 0130,0133 0142,0159,0154,0145 6 | 0133 0142,0159,0154,0145 7 | 0131,0142 0141,014E,0153,0149 8 | -------------------------------------------------------------------------------- /modules/inter/ALIAS-FROM.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "../../src/bsdconv.h" 4 | 5 | 6 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){ 7 | THIS_CODEC(ins)->priv=bsdconv_create("ASCII:PASS"); 8 | return 0; 9 | } 10 | 11 | void cbdestroy(struct bsdconv_instance *ins){ 12 | bsdconv_destroy(THIS_CODEC(ins)->priv); 13 | } 14 | 15 | void cbconv(struct bsdconv_instance *ins){ 16 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 17 | struct bsdconv_instance *uni=THIS_CODEC(ins)->priv; 18 | const char *locale; 19 | const char *s; 20 | 21 | if (((locale=getenv("LC_ALL")) || (locale=getenv("LC_CTYPE")) || (locale=getenv ("LANG"))) && ((s=strstr(locale, "."))!=NULL)){ 22 | s+=1; 23 | }else{ 24 | s=locale; 25 | } 26 | if(s==NULL || *s==0 || strcmp(s, "C")==0 || strcmp(s, "POSIX")==0){ 27 | s="ASCII"; 28 | } 29 | bsdconv_init(uni); 30 | uni->input.data=strdup(s); 31 | uni->input.len=strlen(s); 32 | uni->input.flags=F_FREE; 33 | uni->input.next=NULL; 34 | uni->flush=1; 35 | bsdconv(uni); 36 | this_phase->data_tail->next=uni->phase[uni->phasen].data_head->next; 37 | uni->phase[uni->phasen].data_head->next=NULL; 38 | uni->phase[uni->phasen].data_tail=uni->phase[uni->phasen].data_head; 39 | while(this_phase->data_tail->next!=NULL){ 40 | this_phase->data_tail=this_phase->data_tail->next; 41 | } 42 | 43 | this_phase->state.status=NEXTPHASE; 44 | return; 45 | } 46 | -------------------------------------------------------------------------------- /modules/inter/ALIAS-FROM.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | #for internal use 3 | > echo BIG5 | bsdconv ascii:from_alias:ascii 4 | UAO250 5 | > echo UAO250 | bsdconv ascii:from_alias:ascii 6 | ASCII,_UAO250 7 | > echo LOCALE | bsdconv ascii:from_alias:ascii 8 | UTF-8 9 | > echo UTF-8 | bsdconv ascii:from_alias:ascii 10 | ASCII,_UTF-8 11 | -------------------------------------------------------------------------------- /modules/inter/ALIAS-INTER.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "../../src/bsdconv.h" 4 | 5 | 6 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){ 7 | THIS_CODEC(ins)->priv=bsdconv_create("ASCII:PASS"); 8 | return 0; 9 | } 10 | 11 | void cbdestroy(struct bsdconv_instance *ins){ 12 | bsdconv_destroy(THIS_CODEC(ins)->priv); 13 | } 14 | 15 | #define CNS11643_UNICODE "CNS11643-UNICODE" 16 | 17 | void cbconv(struct bsdconv_instance *ins){ 18 | struct bsdconv_phase *this_phase = THIS_PHASE(ins); 19 | struct bsdconv_instance *uni = THIS_CODEC(ins)->priv; 20 | int found = 0; 21 | const char *s; 22 | 23 | if(bsdconv_module_check(INTER, CNS11643_UNICODE)){ 24 | found += 1; 25 | s = CNS11643_UNICODE; 26 | bsdconv_init(uni); 27 | uni->input.data=strdup(s); 28 | uni->input.len=strlen(s); 29 | uni->input.flags=F_FREE; 30 | uni->input.next=NULL; 31 | uni->flush=1; 32 | bsdconv(uni); 33 | this_phase->data_tail->next=uni->phase[uni->phasen].data_head->next; 34 | uni->phase[uni->phasen].data_head->next=NULL; 35 | uni->phase[uni->phasen].data_tail=uni->phase[uni->phasen].data_head; 36 | while(this_phase->data_tail->next!=NULL){ 37 | this_phase->data_tail=this_phase->data_tail->next; 38 | } 39 | } 40 | 41 | if(found==0){ 42 | s = "PASS"; 43 | bsdconv_init(uni); 44 | uni->input.data=strdup(s); 45 | uni->input.len=strlen(s); 46 | uni->input.flags=F_FREE; 47 | uni->input.next=NULL; 48 | uni->flush=1; 49 | bsdconv(uni); 50 | this_phase->data_tail->next=uni->phase[uni->phasen].data_head->next; 51 | uni->phase[uni->phasen].data_head->next=NULL; 52 | uni->phase[uni->phasen].data_tail=uni->phase[uni->phasen].data_head; 53 | while(this_phase->data_tail->next!=NULL){ 54 | this_phase->data_tail=this_phase->data_tail->next; 55 | } 56 | } 57 | 58 | this_phase->state.status=NEXTPHASE; 59 | return; 60 | } 61 | -------------------------------------------------------------------------------- /modules/inter/ALIAS-INTER.txt: -------------------------------------------------------------------------------- 1 | 014E,0146,0144 015F,014E,0146,0144,013A,015F,014E,0146,012D,0148,0141,014E,0147,0155,014C,012D,0144,0145,0143,014F,014D,0150,014F,0153,0149,0154,0149,014F,014E,013A,015F,014E,0146,012D,014F,0152,0144,0145,0152 2 | 014E,0146,014B,0144 015F,014E,0146,014B,0144,013A,015F,014E,0146,012D,0148,0141,014E,0147,0155,014C,012D,0144,0145,0143,014F,014D,0150,014F,0153,0149,0154,0149,014F,014E,013A,015F,014E,0146,012D,014F,0152,0144,0145,0152 3 | 014E,0146,0143 014E,0146,0144,013A,015F,014E,0146,0143,013A,015F,014E,0146,012D,0148,0141,014E,0147,0155,014C,012D,0143,014F,014D,0150,014F,0153,0149,0154,0149,014F,014E 4 | 014E,0146,014B,0143 014E,0146,014B,0144,013A,015F,014E,0146,0143,013A,015F,014E,0146,012D,0148,0141,014E,0147,0155,014C,012D,0143,014F,014D,0150,014F,0153,0149,0154,0149,014F,014E 5 | 014E,0146,014B,0144,012D,0143,0141,0153,0145,0146,014F,014C,0144 014E,0146,0144,013A,0143,0141,0153,0145,0146,014F,014C,0144,013A,014E,0146,014B,0144,013A,0143,0141,0153,0145,0146,014F,014C,0144,013A,014E,0146,014B,0144 6 | 0153,0150,014C,0149,0154 0149,014E,0153,0145,0152,0154,0123,0141,0146,0154,0145,0152,013D,0130,0130,0132,0143 7 | 014E,014F,0142,014F,014D 0152,0145,0150,014C,0141,0143,0145,0123,0130,0131,0146,0145,0146,0146 8 | 015A,0148,012D,0153,0154,0152,0149,014E,0147,0153 0153,0154,0152,0149,014E,0147,0153,0123,0146,014F,0152,013D,0143,014A,014B 9 | 0155,014E,0149,0143,014F,0144,0145 ? 10 | -------------------------------------------------------------------------------- /modules/inter/ALIAS-TO.c: -------------------------------------------------------------------------------- 1 | #include "ALIAS-FROM.c" 2 | -------------------------------------------------------------------------------- /modules/inter/ALIAS-TO.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | #for internal use 3 | > echo BIG5 | bsdconv ascii:to_alias:ascii 4 | CP950 5 | > echo CP950 | bsdconv ascii:to_alias:ascii 6 | _CP950,ASCII 7 | -------------------------------------------------------------------------------- /modules/inter/AMBIGUOUS-PAD.c: -------------------------------------------------------------------------------- 1 | #include "../../src/bsdconv.h" 2 | 3 | #include "_AMBIGUOUS.h" 4 | 5 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){ 6 | THIS_CODEC(ins)->priv=malloc(sizeof(int)); 7 | 8 | return 0; 9 | } 10 | 11 | void cbinit(struct bsdconv_instance *ins){ 12 | int *r=THIS_CODEC(ins)->priv; 13 | *r=1; 14 | } 15 | 16 | void cbctl(struct bsdconv_instance *ins, int ctl, void *ptr, size_t v){ 17 | int *r=THIS_CODEC(ins)->priv; 18 | switch(ctl){ 19 | break; 20 | case BSDCONV_CTL_AMBIGUOUS_PAD: 21 | *r=v; 22 | break; 23 | } 24 | } 25 | 26 | void cbdestroy(struct bsdconv_instance *ins){ 27 | int *r=THIS_CODEC(ins)->priv; 28 | free(r); 29 | } 30 | void cbconv(struct bsdconv_instance *ins){ 31 | unsigned char *data; 32 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 33 | int *dopad=THIS_CODEC(ins)->priv; 34 | data=this_phase->curr->data; 35 | int pad; 36 | 37 | int max=sizeof(ambiguous) / sizeof(struct interval) - 1; 38 | int min = 0; 39 | int mid; 40 | char *space="\x01\xA0"; 41 | uint32_t ucs=0; 42 | 43 | this_phase->data_tail->next=dup_data_rt(ins, this_phase->curr); 44 | this_phase->data_tail=this_phase->data_tail->next; 45 | this_phase->data_tail->next=NULL; 46 | 47 | if(this_phase->curr->len>0 && data[0]==0x1){ 48 | for(pad=1;padcurr->len;++pad){ 49 | ucs<<=8; 50 | ucs|=data[pad]; 51 | } 52 | 53 | pad=0; 54 | if (ucs < ambiguous[0].first || ucs > ambiguous[max].last){ 55 | pad=0; 56 | }else while (max >= min) { 57 | mid = (min + max) / 2; 58 | if (ucs > ambiguous[mid].last) 59 | min = mid + 1; 60 | else if (ucs < ambiguous[mid].first) 61 | max = mid - 1; 62 | else{ 63 | pad=1; 64 | break; 65 | } 66 | } 67 | if(pad && *dopad){ 68 | DATA_MALLOC(ins, this_phase->data_tail->next); 69 | this_phase->data_tail=this_phase->data_tail->next; 70 | this_phase->data_tail->len=2; 71 | this_phase->data_tail->data=space; 72 | this_phase->data_tail->flags=0; 73 | this_phase->data_tail->next=NULL; 74 | } 75 | } 76 | 77 | this_phase->state.status=NEXTPHASE; 78 | return; 79 | } 80 | -------------------------------------------------------------------------------- /modules/inter/AMBIGUOUS-PAD.man: -------------------------------------------------------------------------------- 1 | DESC Pad ambiguous width characters 2 | 3 | INPUT 4 | TYPE Unicode 5 | 6 | OUTPUT 7 | TYPE Unicode 8 | DESC Append U+000A after ambiguous characters 9 | 10 | CTL 11 | BSDCONV_AMBIGUOUS_PAD 12 | PTR 13 | don't care 14 | INT 15 | 1 Enable 16 | 2 Disable 17 | 18 | EXAMPLE 19 | #used in https://github.com/buganini/bug5 20 | > echo БИ 2 | bsdconv utf-8:ambiguous-pad:utf-8 21 | Б И  2 22 | > echo БИ 2 | bsdconv utf-8:ambiguous-pad:utf-8 | hexdump -C 23 | 00000000 d0 91 c2 a0 d0 98 c2 a0 20 32 0a |........ 2.| 24 | #(the padded character is U+00A0) 25 | -------------------------------------------------------------------------------- /modules/inter/AMBIGUOUS-PAD.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/inter/AMBIGUOUS-UNPAD.c: -------------------------------------------------------------------------------- 1 | #include "../../src/bsdconv.h" 2 | 3 | #include "_AMBIGUOUS.h" 4 | 5 | struct my_s{ 6 | char s; 7 | int dopad; 8 | }; 9 | 10 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){ 11 | THIS_CODEC(ins)->priv=malloc(sizeof(struct my_s)); 12 | return 0; 13 | } 14 | 15 | void cbinit(struct bsdconv_instance *ins){ 16 | struct my_s *r=THIS_CODEC(ins)->priv; 17 | r->s=0; 18 | r->dopad=1; 19 | } 20 | 21 | void cbctl(struct bsdconv_instance *ins, int ctl, void *ptr, size_t v){ 22 | struct my_s *r=THIS_CODEC(ins)->priv; 23 | switch(ctl){ 24 | break; 25 | case BSDCONV_CTL_AMBIGUOUS_PAD: 26 | r->dopad=v; 27 | break; 28 | } 29 | } 30 | 31 | void cbdestroy(struct bsdconv_instance *ins){ 32 | free(THIS_CODEC(ins)->priv); 33 | } 34 | 35 | void cbconv(struct bsdconv_instance *ins){ 36 | unsigned char *data; 37 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 38 | struct my_s *r=THIS_CODEC(ins)->priv; 39 | data=this_phase->curr->data; 40 | int pad; 41 | int max=sizeof(ambiguous) / sizeof(struct interval) - 1; 42 | int min = 0; 43 | int mid; 44 | uint32_t ucs=0; 45 | 46 | this_phase->state.status=NEXTPHASE; 47 | 48 | if(this_phase->curr->len>1 && data[0]==0x1){ 49 | if(r->s==1 && data[1]==0xA0){ 50 | r->s=0; 51 | return; 52 | }else{ 53 | for(pad=1;padcurr->len;++pad){ 54 | ucs<<=8; 55 | ucs|=data[pad]; 56 | } 57 | 58 | pad=0; 59 | if (ucs < ambiguous[0].first || ucs > ambiguous[max].last){ 60 | pad=0; 61 | }else while (max >= min) { 62 | mid = (min + max) / 2; 63 | if (ucs > ambiguous[mid].last) 64 | min = mid + 1; 65 | else if (ucs < ambiguous[mid].first) 66 | max = mid - 1; 67 | else{ 68 | pad=1; 69 | break; 70 | } 71 | } 72 | if(pad && r->dopad){ 73 | r->s=1; 74 | } 75 | } 76 | this_phase->data_tail->next=dup_data_rt(ins, this_phase->curr); 77 | this_phase->data_tail=this_phase->data_tail->next; 78 | this_phase->data_tail->next=NULL; 79 | }else{ 80 | r->s=0; 81 | } 82 | 83 | return; 84 | } 85 | -------------------------------------------------------------------------------- /modules/inter/AMBIGUOUS-UNPAD.man: -------------------------------------------------------------------------------- 1 | DESC Unpad padded ambiguous width characters 2 | 3 | INPUT 4 | TYPE Unicode 5 | 6 | OUTPUT 7 | TYPE Unicode 8 | DESC Remove U+000A after ambiguous characters 9 | 10 | CTL 11 | BSDCONV_AMBIGUOUS_PAD 12 | PTR 13 | don't care 14 | INT 15 | 1 Enable 16 | 2 Disable 17 | 18 | EXAMPLE 19 | #used in https://github.com/buganini/bug5 20 | > echo Б И  2|bsdconv utf-8:ambiguous-unpad:utf-8 21 | БИ 2 22 | -------------------------------------------------------------------------------- /modules/inter/AMBIGUOUS-UNPAD.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/inter/BIG5-DEFRAG.c: -------------------------------------------------------------------------------- 1 | #include "../../src/bsdconv.h" 2 | 3 | struct my_s{ 4 | struct data_rt *p; 5 | struct data_rt *q; 6 | struct data_rt **r; 7 | char f; 8 | }; 9 | 10 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){ 11 | THIS_CODEC(ins)->priv=malloc(sizeof(struct my_s)); 12 | return 0; 13 | } 14 | 15 | void cbinit(struct bsdconv_instance *ins){ 16 | struct my_s *r=THIS_CODEC(ins)->priv; 17 | r->p=NULL; 18 | r->q=NULL; 19 | r->r=&(r->q); 20 | r->f=0; 21 | } 22 | 23 | void cbdestroy(struct bsdconv_instance *ins){ 24 | free(THIS_CODEC(ins)->priv); 25 | } 26 | 27 | void cbconv(struct bsdconv_instance *ins){ 28 | unsigned char *data; 29 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 30 | struct my_s *r=THIS_CODEC(ins)->priv; 31 | data=this_phase->curr->data; 32 | 33 | if(r->f==0){ 34 | if(data[0]==0x3 && data[1]>0x7f){ 35 | r->f=1; 36 | r->p=dup_data_rt(ins, this_phase->curr); 37 | this_phase->state.status=SUBMATCH; 38 | return; 39 | }else{ 40 | DATA_MALLOC(ins, this_phase->data_tail->next); 41 | this_phase->data_tail->next=dup_data_rt(ins, this_phase->curr); 42 | this_phase->data_tail=this_phase->data_tail->next; 43 | this_phase->data_tail->next=NULL; 44 | this_phase->state.status=NEXTPHASE; 45 | return; 46 | } 47 | }else if(r->f){ 48 | if(data[0]==0x1b){ 49 | *(r->r)=dup_data_rt(ins, this_phase->curr); 50 | (*(r->r))->next=NULL; 51 | r->r=&((*(r->r))->next); 52 | 53 | this_phase->state.status=SUBMATCH; 54 | return; 55 | }else{ 56 | r->f=0; 57 | 58 | this_phase->data_tail->next=r->p; 59 | this_phase->data_tail=this_phase->data_tail->next; 60 | 61 | this_phase->data_tail->next=dup_data_rt(ins, this_phase->curr); 62 | this_phase->data_tail=this_phase->data_tail->next; 63 | 64 | if(r->q){ 65 | this_phase->data_tail->next=r->q; 66 | *(r->r)=NULL; 67 | while(this_phase->data_tail->next){ 68 | this_phase->data_tail=this_phase->data_tail->next; 69 | } 70 | } 71 | r->p=r->q=NULL; 72 | r->r=&(r->q); 73 | r->f=0; 74 | this_phase->state.status=NEXTPHASE; 75 | return; 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /modules/inter/BIG5-DEFRAG.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | > perl -e 'print "\xaf\033[1m\xe0"' | bsdconv 'ansi-control,byte:big5-defrag:byte,ansi-control|skip,big5:bsdconv-stdout' 3 | 0180FD 4 | 1B5B316D ( FREE ) 5 | -------------------------------------------------------------------------------- /modules/inter/BIG5-DEFRAG.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/inter/BONUS.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../../src/bsdconv.h" 3 | 4 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){ 5 | THIS_CODEC(ins)->priv=bsdconv_counter(ins, "SCORE"); 6 | return 0; 7 | } 8 | 9 | void cbconv(struct bsdconv_instance *ins){ 10 | struct data_rt *data_ptr; 11 | bsdconv_counter_t *counter=THIS_CODEC(ins)->priv; 12 | unsigned char *data; 13 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 14 | struct data_st data_st; 15 | memcpy(&data_st, (char *)((this_phase->codec[this_phase->index].data_z)+(uintptr_t)this_phase->state.data), sizeof(struct data_st)); 16 | data=UCP((THIS_CODEC(ins)->data_z)+(uintptr_t)de_offset(data_st.data)); 17 | 18 | *counter += *data; 19 | 20 | LISTCPY_ST(ins, this_phase->data_tail, (void *)(uintptr_t)de_offset(data_st.next), THIS_CODEC(ins)->data_z); 21 | 22 | this_phase->state.status=NEXTPHASE; 23 | return; 24 | } 25 | -------------------------------------------------------------------------------- /modules/inter/COUNT.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "../../src/bsdconv.h" 4 | 5 | struct my_s{ 6 | struct bsdconv_filter *filter; 7 | bsdconv_counter_t *counter; 8 | }; 9 | 10 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){ 11 | struct my_s *r=malloc(sizeof(struct my_s)); 12 | THIS_CODEC(ins)->priv=r; 13 | r->filter=NULL; 14 | 15 | char *filter=NULL; 16 | char *key="COUNT"; 17 | while(arg){ 18 | if(strcasecmp(arg->key, "FOR")==0){ 19 | filter=arg->ptr; 20 | }else{ 21 | key=arg->key; 22 | } 23 | arg=arg->next; 24 | } 25 | if(filter!=NULL){ 26 | r->filter=load_filter(filter); 27 | if(r->filter==NULL){ 28 | free(r); 29 | return EOPNOTSUPP; 30 | } 31 | } 32 | r->counter=bsdconv_counter(ins, key); 33 | return 0; 34 | } 35 | 36 | void cbdestroy(struct bsdconv_instance *ins){ 37 | struct my_s *r=THIS_CODEC(ins)->priv; 38 | if(r->filter) 39 | unload_filter(r->filter); 40 | free(r); 41 | } 42 | 43 | void cbconv(struct bsdconv_instance *ins){ 44 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 45 | struct my_s *r=THIS_CODEC(ins)->priv; 46 | 47 | if(r->filter==NULL || r->filter->cbfilter(this_phase->curr)) 48 | *(r->counter)+=1; 49 | 50 | this_phase->data_tail->next=dup_data_rt(ins, this_phase->curr); 51 | this_phase->data_tail=this_phase->data_tail->next; 52 | this_phase->data_tail->next=NULL; 53 | 54 | this_phase->state.status=NEXTPHASE; 55 | return; 56 | } 57 | -------------------------------------------------------------------------------- /modules/inter/COUNT.man: -------------------------------------------------------------------------------- 1 | INPUT 2 | TYPE Any 3 | 4 | OUTPUT 5 | TYPE Any 6 | DESC No content changed, counter increased 7 | 8 | ARGUMENT 9 | FOR 10 | FORMAT FILTER 11 | $CounterName 12 | -------------------------------------------------------------------------------- /modules/inter/COUNT.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/inter/FULL.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | > echo testTEST1234 | bsdconv utf-8:full:utf-8 3 | testTEST1234 4 | -------------------------------------------------------------------------------- /modules/inter/HALF.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | > echo testTEST1234 | bsdconv utf-8:half:utf-8 3 | testTEST1234 4 | -------------------------------------------------------------------------------- /modules/inter/INSERT.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "../../src/bsdconv.h" 4 | 5 | struct my_s{ 6 | struct data_rt *after; 7 | struct data_rt *before; 8 | }; 9 | 10 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){ 11 | struct my_s *r=malloc(sizeof(struct my_s)); 12 | int e; 13 | r->after=NULL; 14 | r->before=NULL; 15 | 16 | char *after=NULL; 17 | char *before=NULL; 18 | while(arg){ 19 | if(strcasecmp(arg->key, "AFTER")==0){ 20 | after=arg->ptr; 21 | }else if(strcasecmp(arg->key, "BEFORE")==0){ 22 | before=arg->ptr; 23 | }else{ 24 | return EINVAL; 25 | } 26 | arg=arg->next; 27 | } 28 | 29 | if(after){ 30 | r->after=str2data(after, &e, ins); 31 | if(e){ 32 | if(r->after) 33 | DATA_FREE(ins, r->after); 34 | free(r); 35 | return e; 36 | } 37 | } 38 | 39 | if(before){ 40 | r->before=str2data(before, &e, ins); 41 | if(e){ 42 | if(r->after) 43 | DATA_FREE(ins, r->after); 44 | if(r->before) 45 | DATA_FREE(ins, r->before); 46 | free(r); 47 | return e; 48 | } 49 | } 50 | 51 | THIS_CODEC(ins)->priv=r; 52 | return 0; 53 | } 54 | 55 | void cbdestroy(struct bsdconv_instance *ins){ 56 | struct my_s *r=THIS_CODEC(ins)->priv; 57 | if(r->after) 58 | DATA_FREE(ins, r->after); 59 | if(r->before) 60 | DATA_FREE(ins, r->before); 61 | free(r); 62 | } 63 | 64 | void cbconv(struct bsdconv_instance *ins){ 65 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 66 | struct my_s *r=THIS_CODEC(ins)->priv; 67 | 68 | if(r->before) 69 | LISTCPY(ins, this_phase->data_tail, r->before); 70 | 71 | this_phase->data_tail->next=dup_data_rt(ins, this_phase->curr); 72 | this_phase->data_tail=this_phase->data_tail->next; 73 | this_phase->data_tail->next=NULL; 74 | 75 | if(r->after) 76 | LISTCPY(ins, this_phase->data_tail, r->after); 77 | 78 | this_phase->state.status=NEXTPHASE; 79 | return; 80 | } 81 | -------------------------------------------------------------------------------- /modules/inter/INSERT.man: -------------------------------------------------------------------------------- 1 | INPUT 2 | TYPE Any 3 | 4 | OUTPUT 5 | TYPE Any 6 | DESC Append ',' (in term of bsdconv-keyword) 7 | 8 | ARGUMENT 9 | AFTER 10 | TYPE Hex Value List 11 | BEFORE 12 | TYPE Hex Value List 13 | 14 | EXAMPLE: 15 | printf test|bsdconv utf-8:insert#after=002c:bsdconv-keyword,utf-8 16 | t,e,s,t, 17 | -------------------------------------------------------------------------------- /modules/inter/INSERT.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/inter/KANA-PHONETIC.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | > echo ドラえもん | bsdconv utf-8:kana_phonetic:utf-8 3 | doraemon 4 | -------------------------------------------------------------------------------- /modules/inter/KANJI.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | > echo 澀谷驛| bsdconv utf-8:kanji:utf-8 3 | 渋谷駅 4 | -------------------------------------------------------------------------------- /modules/inter/LOWER.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | > echo testTEST | bsdconv utf-8:lower:utf-8 3 | testtest 4 | -------------------------------------------------------------------------------- /modules/inter/MAC.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | > perl -e 'print "a\nb"' | bsdconv utf-8:mac:utf-8|hexdump -C 3 | 00000000 61 0d 62 |a.b| 4 | -------------------------------------------------------------------------------- /modules/inter/MAC.txt: -------------------------------------------------------------------------------- 1 | #convert CRLF/CR/LF to CR (MAC) 2 | 010D,010A 010D #CRLF 3 | 010A 010D #LF 4 | 010D 010D #CR 5 | -------------------------------------------------------------------------------- /modules/inter/NL2BR.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | > perl -e 'print "a\nb"'|bsdconv utf-8:nl2br:utf-8 3 | a
b 4 | -------------------------------------------------------------------------------- /modules/inter/NL2BR.txt: -------------------------------------------------------------------------------- 1 | #convert CRLF/CR/LF to
2 | 010D,010A 013C,0162,0172,0120,012F,013E #CRLF 3 | 010A 013C,0162,0172,0120,012F,013E #LF 4 | 010D 013C,0162,0172,0120,012F,013E #CR 5 | -------------------------------------------------------------------------------- /modules/inter/NULL.c: -------------------------------------------------------------------------------- 1 | #include "../../src/bsdconv.h" 2 | 3 | void cbconv(struct bsdconv_instance *ins){ 4 | THIS_PHASE(ins)->state.status=NEXTPHASE; 5 | return; 6 | } 7 | -------------------------------------------------------------------------------- /modules/inter/NULL.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | #used to removed untranslated characters 3 | > echo ABCDEabcde|bsdconv utf-8:super:utf-8 4 | ᴬᴮCᴰᴱᵃᵇᶜᵈᵉ 5 | > echo ABCDEabcde|bsdconv utf-8:super,null:utf-8 6 | ᴬᴮᴰᴱᵃᵇᶜᵈᵉ 7 | -------------------------------------------------------------------------------- /modules/inter/NULL.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/inter/PASS.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "../../src/bsdconv.h" 6 | 7 | struct my_s{ 8 | struct bsdconv_filter *filter; 9 | int limit; 10 | int passed; 11 | }; 12 | 13 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){ 14 | struct my_s *r=malloc(sizeof(struct my_s)); 15 | THIS_CODEC(ins)->priv=r; 16 | r->filter=NULL; 17 | r->limit=0; 18 | int i; 19 | 20 | char *filter=NULL; 21 | while(arg){ 22 | if(strcasecmp(arg->key, "FOR")==0){ 23 | filter=arg->ptr; 24 | }else if(strcasecmp(arg->key, "LIMIT")==0 && sscanf(arg->ptr, "%d", &i)==1){ 25 | r->limit=i; 26 | }else{ 27 | free(r); 28 | return EINVAL; 29 | } 30 | arg=arg->next; 31 | } 32 | if(filter!=NULL){ 33 | r->filter=load_filter(filter); 34 | if(r->filter==NULL){ 35 | free(r); 36 | return EOPNOTSUPP; 37 | } 38 | } 39 | return 0; 40 | } 41 | 42 | void cbinit(struct bsdconv_instance *ins){ 43 | struct my_s *r=THIS_CODEC(ins)->priv; 44 | r->passed=0; 45 | } 46 | 47 | void cbdestroy(struct bsdconv_instance *ins){ 48 | struct my_s *r=THIS_CODEC(ins)->priv; 49 | if(r->filter) 50 | unload_filter(r->filter); 51 | free(r); 52 | } 53 | 54 | void cbconv(struct bsdconv_instance *ins){ 55 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 56 | struct my_s *t=THIS_CODEC(ins)->priv; 57 | int pass=1; 58 | 59 | if(t->filter!=NULL && !t->filter->cbfilter(this_phase->curr)){ 60 | pass=0; 61 | } 62 | 63 | if(pass && t->limit!=0){ 64 | if(t->passed < t->limit){ 65 | t->passed += 1; 66 | }else{ 67 | pass=0; 68 | } 69 | } 70 | 71 | if(pass){ 72 | this_phase->data_tail->next=dup_data_rt(ins, this_phase->curr); 73 | this_phase->data_tail=this_phase->data_tail->next; 74 | this_phase->data_tail->next=NULL; 75 | 76 | this_phase->i=this_phase->curr->len-1; 77 | this_phase->state.status=NEXTPHASE; 78 | }else{ 79 | this_phase->state.status=DEADEND; 80 | } 81 | 82 | return; 83 | } 84 | -------------------------------------------------------------------------------- /modules/inter/PASS.man: -------------------------------------------------------------------------------- 1 | INPUT 2 | TYPE Any 3 | 4 | OUTPUT 5 | TYPE Any 6 | DESC Filtered according to arguments 7 | 8 | ARGUMENT 9 | FOR 10 | FORMAT ENUM{UNICODE,1,CNS11643,2,BYTE,3,ANSI,1B} 11 | DESC Input type whitelist 12 | LIMIT 13 | FORMAT INTEGER 14 | DESC Limit the number of packets to passed 15 | -------------------------------------------------------------------------------- /modules/inter/PASS.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/inter/REPLACE.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "../../src/bsdconv.h" 4 | 5 | struct my_s{ 6 | struct data_rt *from; 7 | struct data_rt *to; 8 | struct data_rt *cursor; 9 | }; 10 | 11 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){ 12 | struct my_s *r=malloc(sizeof(struct my_s)); 13 | int e; 14 | r->from=NULL; 15 | r->to=NULL; 16 | while(arg){ 17 | DATA_FREE(ins, r->from); 18 | DATA_FREE(ins, r->to); 19 | r->from=str2data(arg->key, &e, ins); 20 | if(e){ 21 | free(r); 22 | return e; 23 | } 24 | if(r->from==NULL){ 25 | free(r); 26 | return EINVAL; 27 | } 28 | if(arg->ptr){ 29 | r->to=str2data(arg->ptr, &e, ins); 30 | if(e){ 31 | DATA_FREE(ins, r->from); 32 | free(r); 33 | return e; 34 | } 35 | } 36 | arg=arg->next; 37 | } 38 | THIS_CODEC(ins)->priv=r; 39 | return 0; 40 | } 41 | 42 | void cbdestroy(struct bsdconv_instance *ins){ 43 | struct my_s *r=THIS_CODEC(ins)->priv; 44 | DATA_FREE(ins, r->from); 45 | DATA_FREE(ins, r->to); 46 | free(r); 47 | } 48 | 49 | void cbinit(struct bsdconv_instance *ins){ 50 | struct my_s *r=THIS_CODEC(ins)->priv; 51 | r->cursor=r->from; 52 | } 53 | 54 | void cbconv(struct bsdconv_instance *ins){ 55 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 56 | struct my_s *r=THIS_CODEC(ins)->priv; 57 | unsigned char *datai=this_phase->curr->data; 58 | unsigned char *datar=r->cursor->data; 59 | size_t l=this_phase->curr->len; 60 | size_t i; 61 | 62 | if(l != r->cursor->len){ 63 | r->cursor=r->from; 64 | this_phase->state.status=DEADEND; 65 | return; 66 | } 67 | 68 | for(i=0;icursor=r->from; 71 | this_phase->state.status=DEADEND; 72 | return; 73 | } 74 | } 75 | 76 | if(r->cursor->next != NULL){ 77 | r->cursor = r->cursor->next; 78 | this_phase->state.status=CONTINUE; 79 | return; 80 | }else{ 81 | r->cursor = r->from; 82 | LISTCPY(ins, this_phase->data_tail, r->to); 83 | this_phase->state.status=NEXTPHASE; 84 | return; 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /modules/inter/REPLACE.man: -------------------------------------------------------------------------------- 1 | echo ABCD | bsdconv utf-8:replace#0142.0143=0132.0133:utf-8 2 | A23D 3 | 4 | echo ABCD | bsdconv utf-8:replace#0142.0143:utf-8 5 | AD 6 | -------------------------------------------------------------------------------- /modules/inter/REPLACE.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/inter/SCORE-TRAIN.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "../../src/bsdconv.h" 6 | 7 | struct my_s{ 8 | FILE *bak; 9 | FILE *score; 10 | FILE *list; 11 | }; 12 | 13 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){ 14 | struct my_s *r=malloc(sizeof(struct my_s)); 15 | char buf[256]={0}; 16 | char *p=getenv("BSDCONV_SCORE"); 17 | if(p==NULL){ 18 | strcpy(buf,getenv("HOME")); 19 | strcat(buf,"/.bsdconv.score"); 20 | p=buf; 21 | } 22 | r->bak=fopen(p,"a"); //ensure file existence 23 | fclose(r->bak); 24 | r->bak=r->score=fopen(p,"rb+"); 25 | r->list=NULL; 26 | THIS_CODEC(ins)->priv=r; 27 | return 0; 28 | } 29 | 30 | void cbdestroy(struct bsdconv_instance *ins){ 31 | struct my_s *r=THIS_CODEC(ins)->priv; 32 | fclose(r->bak); 33 | free(r); 34 | } 35 | 36 | void cbctl(struct bsdconv_instance *ins, int ctl, void *ptr, size_t v){ 37 | struct my_s *r=THIS_CODEC(ins)->priv; 38 | switch(ctl){ 39 | case BSDCONV_CTL_ATTACH_SCORE: 40 | r->score=ptr; 41 | break; 42 | case BSDCONV_CTL_ATTACH_OUTPUT_FILE: 43 | r->list=ptr; 44 | break; 45 | } 46 | } 47 | 48 | void cbconv(struct bsdconv_instance *ins){ 49 | unsigned char *data; 50 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 51 | struct my_s *r=THIS_CODEC(ins)->priv; 52 | data=this_phase->curr->data; 53 | unsigned char v=0; 54 | int i; 55 | uint32_t ucs=0; 56 | uint32_t ucs4; 57 | 58 | this_phase->data_tail->next=dup_data_rt(ins, this_phase->curr); 59 | this_phase->data_tail=this_phase->data_tail->next; 60 | this_phase->data_tail->next=NULL; 61 | 62 | if(data[0]==0x1){ 63 | for(i=1;icurr->len;++i){ 64 | ucs<<=8; 65 | ucs|=data[i]; 66 | } 67 | fseek(r->score, ucs*sizeof(unsigned char), SEEK_SET); 68 | fread(&v, sizeof(unsigned char), 1, r->score); 69 | if(v==0 && r->list){ 70 | ucs4=htobe32(ucs); 71 | fwrite(&ucs4, sizeof(uint32_t), 1, r->list); 72 | } 73 | if(v<3){ 74 | v+=1; 75 | fseek(r->score, ucs*sizeof(unsigned char), SEEK_SET); 76 | fwrite(&v, sizeof(unsigned char), 1, r->score); 77 | } 78 | } 79 | 80 | this_phase->state.status=NEXTPHASE; 81 | return; 82 | } 83 | -------------------------------------------------------------------------------- /modules/inter/SCORE-TRAIN.man: -------------------------------------------------------------------------------- 1 | DESC 2 | Generate score table according to input data. 3 | If environment variable BSDCONV_SCORE is presented, it will be used as score table path, else if ~/.bsdconv.score is presented, it will be used as score table. 4 | 5 | SEEALSO 6 | inter/SCORE#EXAMPLE 7 | -------------------------------------------------------------------------------- /modules/inter/SCORE-TRAIN.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/inter/SCORE.man: -------------------------------------------------------------------------------- 1 | DESC 2 | Increase SCORE counter according to score table. 3 | If environment variable BSDCONV_SCORE is presented, it will be used as score table path, else if ~/.bsdconv.score is presented, it will be used as score table. 4 | 5 | ARGUMENT 6 | WITH 7 | FORMAT Scorer 8 | DESC 9 | If TRAINED is specified, default score file will be used 10 | AS 11 | DESC 12 | Counter Name 13 | 14 | EXAMPLE 15 | > echo 123Б測試 | bsdconv utf-8:score#with=cjk:null 16 | Score: 25 17 | > echo 123Б測試 | bsdconv big5:score#with=cjk:null 18 | IERR: 2 19 | Score: 29 20 | #if score data exists, it will use it. 21 | #default score data path is $HOME/.bsdconv.score 22 | > echo 123Б測試 | bsdconv utf-8:score-train:null 23 | > echo 123Б測試 | bsdconv utf-8:score#with=trained:null 24 | Score: 7 25 | > echo 123Б測試功能 | bsdconv utf-8:score#with=trained:null 26 | Score: 7 27 | > echo 123Б測試功能 | bsdconv utf-8:score-train:null 28 | > echo 123Б測試功能 | bsdconv utf-8:score#with=trained:null 29 | Score: 16 30 | #specify score data path in C 31 | bsdconv_ctl( 32 | struct bsdconv_instance *, /*conversion instance*/ 33 | BSDCONV_CTL_ATTACH_SCORE, /* Ctl constant*/ 34 | FILE *, /* File pointer to the score data*/ 35 | 0 /* unused argument */ 36 | ); 37 | -------------------------------------------------------------------------------- /modules/inter/SCORE.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/inter/STRINGS.man: -------------------------------------------------------------------------------- 1 | DESC 2 | Similiar to STRINGS(1), but target for characters with specified filter 3 | 4 | ARGUMENT 5 | FOR 6 | FORMAT FILTER 7 | MIN-LEN 8 | FROMAT INTEGER 9 | AFTER 10 | TYPE Hex Value List 11 | DEFAULT 010A 12 | BEFORE 13 | TYPE Hex Value List 14 | 15 | EXAMPLE 16 | > echo abd測試efg功能,hij | bsdconv utf-8:strings#for=cjk:utf-8 17 | 測試 18 | 功能 19 | 20 | > echo aㄎabㄎabcㄉabcd|bsdconv utf-8:strings#min-len=3:utf-8 21 | abc 22 | abcd 23 | 24 | > echo aㄎabㄎabcㄉabcd | bsdconv 'utf-8:strings#min-len=3&sep=013b.010a:utf-8' 25 | abc; 26 | abcd; 27 | -------------------------------------------------------------------------------- /modules/inter/STRINGS.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/inter/SUB.txt: -------------------------------------------------------------------------------- 1 | 0103B2 011D66 2 | 0103B3 011D67 3 | 0103C1 011D68 4 | 0103C6 011D69 5 | 0103C7 011D6A 6 | 012212 01208B 7 | 0128 01208D 8 | 0129 01208E 9 | 012B 01208A 10 | 0130 012080 11 | 0131 012081 12 | 0132 012082 13 | 0133 012083 14 | 0134 012084 15 | 0135 012085 16 | 0136 012086 17 | 0137 012087 18 | 0138 012088 19 | 0139 012089 20 | 013D 01208C 21 | 0161 012090 22 | 0165 012091 23 | 0168 012095 24 | 0169 011D62 25 | 016A 012C7C 26 | 016B 012096 27 | 016C 012097 28 | 016D 012098 29 | 016E 012099 30 | 016F 012092 31 | 0170 01209A 32 | 0172 011D63 33 | 0173 01209B 34 | 0174 01209C 35 | 0175 011D64 36 | 0176 011D65 37 | 0178 012093 38 | -------------------------------------------------------------------------------- /modules/inter/SUPER.txt: -------------------------------------------------------------------------------- 1 | 010126 01A7F8 2 | 01014B 011D51 3 | 010153 01A7F9 4 | 01018E 011D32 5 | 010251 011D45 6 | 010254 011D53 7 | 01025B 011D4B 8 | 010266 0102B1 9 | 010275 011DB1 10 | 010292 011DBE 11 | 0103B2 011D5D 12 | 0103B3 011D5E 13 | 0103B4 011D5F 14 | 0103B8 011DBF 15 | 0103C6 011D60 16 | 0103C7 011D61 17 | 01043D 011D78 18 | 01044A 01A69C 19 | 01044C 01A69D 20 | 0110DC 0110FC 21 | 012212 01207B 22 | 0128 01207D 23 | 0129 01207E 24 | 012B 01207A 25 | 012D61 012D6F 26 | 0130 012070 27 | 0131 01B9 28 | 0132 01B2 29 | 0133 01B3 30 | 0134 012074 31 | 0135 012075 32 | 0136 012076 33 | 0137 012077 34 | 0138 012078 35 | 0139 012079 36 | 013D 01207C 37 | 0141 011D2C 38 | 0142 011D2E 39 | 0144 011D30 40 | 0145 011D31 41 | 0147 011D33 42 | 0148 011D34 43 | 0149 011D35 44 | 014A 011D36 45 | 014B 011D37 46 | 014C 011D38 47 | 014D 011D39 48 | 014E 011D3A 49 | 014F 011D3C 50 | 0150 011D3E 51 | 0152 011D3F 52 | 0154 011D40 53 | 0155 011D41 54 | 0156 012C7D 55 | 0157 011D42 56 | 0161 011D43 57 | 0161 01AA 58 | 0162 011D47 59 | 0163 011D9C 60 | 0164 011D48 61 | 0165 011D49 62 | 0166 011DA0 63 | 0167 011D4D 64 | 0168 0102B0 65 | 0169 012071 66 | 016A 0102B2 67 | 016B 011D4F 68 | 016C 0102E1 69 | 016D 011D50 70 | 016E 01207F 71 | 016F 011D52 72 | 016F 01BA 73 | 0170 011D56 74 | 0172 0102B3 75 | 0173 0102E2 76 | 0174 011D57 77 | 0175 011D58 78 | 0176 011D5B 79 | 0177 0102B7 80 | 0178 0102E3 81 | 0179 0102B8 82 | 017A 011DBB 83 | 01A76F 01A770 84 | 01C6 011D2D 85 | 01F0 011D9E 86 | -------------------------------------------------------------------------------- /modules/inter/TRIM-WIDTH.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "../../src/bsdconv.h" 7 | 8 | struct my_s{ 9 | struct bsdconv_instance *ins; 10 | char ambi_width; 11 | size_t width; 12 | long remain; 13 | bsdconv_counter_t *full; 14 | bsdconv_counter_t *half; 15 | bsdconv_counter_t *ambi; 16 | }; 17 | 18 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){ 19 | int i; 20 | struct my_s *r=THIS_CODEC(ins)->priv=malloc(sizeof(struct my_s)); 21 | 22 | char width_set=0; 23 | r->ambi_width=1; 24 | 25 | while(arg){ 26 | if(strcasecmp(arg->key, "AMBI-AS-WIDE")==0 || strcasecmp(arg->key, "AMBIGUOUS-AS-WIDE")==0){ 27 | r->ambi_width=2; 28 | }else if(sscanf(arg->key,"%d", &i)==1){ 29 | r->width=i; 30 | width_set=1; 31 | }else{ 32 | return EINVAL; 33 | } 34 | arg=arg->next; 35 | } 36 | 37 | if(width_set==0) 38 | return EINVAL; 39 | 40 | r->ins=bsdconv_create("WIDTH"); 41 | r->full=bsdconv_counter(r->ins, "FULL"); 42 | r->half=bsdconv_counter(r->ins, "HALF"); 43 | r->ambi=bsdconv_counter(r->ins, "AMBI"); 44 | return 0; 45 | } 46 | 47 | void cbinit(struct bsdconv_instance *ins){ 48 | struct my_s *r=THIS_CODEC(ins)->priv; 49 | bsdconv_init(r->ins); 50 | r->remain=r->width; 51 | } 52 | 53 | void cbdestroy(struct bsdconv_instance *ins){ 54 | struct my_s *r=THIS_CODEC(ins)->priv; 55 | bsdconv_destroy(r->ins); 56 | free(r); 57 | } 58 | 59 | void cbconv(struct bsdconv_instance *ins){ 60 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 61 | struct my_s *r=THIS_CODEC(ins)->priv; 62 | 63 | bsdconv_counter_reset(r->ins, NULL); 64 | bsdconv_init(r->ins); 65 | r->ins->input=*(this_phase->curr); 66 | this_phase->curr->flags &= ~F_FREE; 67 | r->ins->input.next=NULL; 68 | r->ins->flush=1; 69 | bsdconv(r->ins); 70 | int w=*(r->full)*2 + *(r->half) + *(r->ambi) * r->ambi_width; 71 | if(r->remain >= w){ 72 | this_phase->data_tail->next=r->ins->phase[r->ins->phasen].data_head->next; 73 | while(this_phase->data_tail->next){ 74 | this_phase->data_tail=this_phase->data_tail->next; 75 | } 76 | r->ins->phase[r->ins->phasen].data_head->next=NULL; 77 | r->ins->phase[r->ins->phasen].data_tail=r->ins->phase[r->ins->phasen].data_head; 78 | r->remain -= w; 79 | }else{ 80 | r->remain=-1; 81 | } 82 | 83 | this_phase->state.status=NEXTPHASE; 84 | return; 85 | } 86 | -------------------------------------------------------------------------------- /modules/inter/TRIM-WIDTH.man: -------------------------------------------------------------------------------- 1 | INPUT 2 | TYPE ANY 3 | 4 | OUTPUT 5 | TYPE ANY 6 | DESC Trimed content 7 | 8 | ARGUMENT 9 | AMBIGUOUS-AS-WIDE 10 | AMBI-AS-WIDE 11 | Counter ambiguous width characters' width as 2 12 | $Integer (mandatory) 13 | The width of the desired trim 14 | 15 | EXAMPLE 16 | > echo ˋˊ這是個很長的字串啊啊啊 | bsdconv "utf-8:trim-width#22&ambi-as-wide:utf-8" 17 | ˋˊ這是個很長的字串啊 18 | > echo ˋˊ這是個很長的字串啊啊啊 | bsdconv utf-8:trim-width#22:utf-8 19 | ˋˊ這是個很長的字串啊啊 20 | > echo 三長兩短ˊˋ3長2短 | bsdconv utf-8:trim-width#10&ambiguous-as-wide:utf-8 21 | -------------------------------------------------------------------------------- /modules/inter/TRIM-WIDTH.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/inter/UNIX.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | > perl -e 'print "a\r\nb"' | bsdconv utf-8:unix:utf-8 | hexdump -C 3 | 00000000 61 0a 62 |a.b| 4 | -------------------------------------------------------------------------------- /modules/inter/UNIX.txt: -------------------------------------------------------------------------------- 1 | #convert CRLF/CR/LF to LF (UNIX) 2 | 010D,010A 010A #CRLF 3 | 010A 010A #LF 4 | 010D 010A #CR 5 | -------------------------------------------------------------------------------- /modules/inter/UPPER.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | > echo testTEST | bsdconv utf-8:upper:utf-8 3 | TESTTEST 4 | -------------------------------------------------------------------------------- /modules/inter/UPSIDEDOWN.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | > echo FUNNY | bsdconv utf-8:upsidedown:utf-8 3 | Ⅎ∩ᴎᴎ⅄ 4 | -------------------------------------------------------------------------------- /modules/inter/UPSIDEDOWN.txt: -------------------------------------------------------------------------------- 1 | #source: http://www.fileformat.info/convert/text/upside-down-map.htm 2 | 01010412 0142 3 | 010131,010323 0169 4 | 01017F 014A 5 | 010183 0167 6 | 01018E 0145 7 | 010190 0133 8 | 0101DD 0165 9 | 010250 0161 10 | 010254 0163 11 | 01025F 0166 12 | 010265 0168 13 | 01026F 016D 14 | 010279 0172 15 | 01027E 016A 16 | 010283 016C 17 | 010287 0174 18 | 01028C 0176 19 | 01028D 0177 20 | 01028E 0179 21 | 01029E 016B 22 | 0102D9 012E 23 | 01038C 0151 24 | 010500 0150 25 | 01061B 013B 26 | 01152D 0134 27 | 011D0E 014E 28 | 011D1A 0152 29 | 011D27 0156 30 | 01201E 0122 31 | 01203E 015F 32 | 01203F 012040 33 | 012040 01203F 34 | 012045 012046 35 | 012046 012045 36 | 0121 01A1 37 | 012132 0146 38 | 012141 0147 39 | 012142 014C 40 | 012144 0159 41 | 01214B 0126 42 | 012183 0143 43 | 0122 01201E 44 | 012200 0141 45 | 012229 0155 46 | 012234 012235 47 | 012235 012234 48 | 0122A5 0154 49 | 0122CA 014B 50 | 0125D6 0144 51 | 0126 01214B 52 | 0127 012C 53 | 0128 0129 54 | 0129 0128 55 | 012C 0127 56 | 012C62 0137 57 | 012E 0102D9 58 | 0133 010190 59 | 0134 01152D 60 | 0136 0139 61 | 0137 012C62 62 | 0139 0136 63 | 013B 01061B 64 | 013C 013E 65 | 013E 013C 66 | 013F 01BF 67 | 0141 012200 68 | 0142 01010412 69 | 0143 012183 70 | 0144 0125D6 71 | 0145 01018E 72 | 0146 012132 73 | 0147 012141 74 | 014A 01017F 75 | 014B 0122CA 76 | 014C 012142 77 | 014D 0157 78 | 014E 011D0E 79 | 0150 010500 80 | 0151 01038C 81 | 0152 011D1A 82 | 0154 0122A5 83 | 0155 012229 84 | 0156 011D27 85 | 0157 014D 86 | 0159 012144 87 | 015B 015D 88 | 015D 015B 89 | 015F 01203E 90 | 0161 010250 91 | 0162 0171 92 | 0163 010254 93 | 0164 0170 94 | 0165 0101DD 95 | 0166 01025F 96 | 0167 010183 97 | 0168 010265 98 | 0169 010131,010323 99 | 016A 01027E 100 | 016B 01029E 101 | 016C 010283 102 | 016D 01026F 103 | 016E 0175 104 | 0170 0164 105 | 0171 0162 106 | 0172 010279 107 | 0174 010287 108 | 0175 016E 109 | 0176 01028C 110 | 0177 01028D 111 | 0179 01028E 112 | 017B 017D 113 | 017D 017B 114 | 01A1 0121 115 | 01BF 013F 116 | -------------------------------------------------------------------------------- /modules/inter/WHITESPACE-DERAIL.c: -------------------------------------------------------------------------------- 1 | #include "../../src/bsdconv.h" 2 | #include "WHITESPACE.h" 3 | 4 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){ 5 | struct my_s *t; 6 | if(bsdconv_hash_has(ins, HASHKEY)){ 7 | t=bsdconv_hash_get(ins, HASHKEY); 8 | }else{ 9 | t=malloc(sizeof(struct my_s)); 10 | bsdconv_hash_set(ins, HASHKEY, t); 11 | } 12 | t->queue=NULL; 13 | t->rerail=NULL; 14 | THIS_CODEC(ins)->priv=t; 15 | return 0; 16 | } 17 | 18 | void cbinit(struct bsdconv_instance *ins){ 19 | struct my_s *t=THIS_CODEC(ins)->priv; 20 | t->offsetA=0; 21 | t->offsetB=0; 22 | t->last=&t->queue; 23 | struct data_rt *q; 24 | while(t->queue){ 25 | DATUM_FREE(ins, (struct data_rt *)t->queue->data); 26 | q=t->queue; 27 | t->queue=t->queue->next; 28 | DATUM_FREE(ins, q); 29 | } 30 | } 31 | 32 | void cbdestroy(struct bsdconv_instance *ins){ 33 | struct my_s *t=THIS_CODEC(ins)->priv; 34 | struct data_rt *q; 35 | if(bsdconv_hash_has(ins, HASHKEY)){ 36 | while(t->queue){ 37 | DATUM_FREE(ins, (struct data_rt *)t->queue->data); 38 | q=t->queue; 39 | t->queue=t->queue->next; 40 | DATUM_FREE(ins, q); 41 | } 42 | free(t); 43 | bsdconv_hash_del(ins, HASHKEY); 44 | } 45 | } 46 | 47 | void cbconv(struct bsdconv_instance *ins){ 48 | unsigned char *data; 49 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 50 | struct my_s *t=THIS_CODEC(ins)->priv; 51 | struct data_rt *q; 52 | data=this_phase->curr->data; 53 | int i; 54 | uint32_t ucs=0; 55 | 56 | this_phase->state.status=NEXTPHASE; 57 | 58 | if(this_phase->curr->len>0 && data[0]==0x1){ 59 | for(i=1;icurr->len;++i){ 60 | ucs<<=8; 61 | ucs|=data[i]; 62 | } 63 | if(ucs==0x09||ucs==0x0A||ucs==0x0D||ucs==0x20){ 64 | DATA_MALLOC(ins, q); 65 | *(t->last)=q; 66 | q->next=NULL; 67 | q->flags=0; 68 | t->last=&q->next; 69 | q->data=(void *) dup_data_rt(ins, this_phase->curr); 70 | ((struct data_rt *)q->data)->next=NULL; 71 | q->len=t->offsetA; 72 | 73 | if(t->rerail){ 74 | t->rerail->flags |= (F_MATCH | F_PENDING); 75 | t->rerail->match_data = NULL; 76 | } 77 | 78 | return; 79 | } 80 | } 81 | t->offsetA+=1; 82 | 83 | this_phase->data_tail->next=dup_data_rt(ins, this_phase->curr); 84 | this_phase->data_tail=this_phase->data_tail->next; 85 | this_phase->data_tail->next=NULL; 86 | 87 | return; 88 | } 89 | -------------------------------------------------------------------------------- /modules/inter/WHITESPACE-DERAIL.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | > echo 之后 | bsdconv utf-8:zhtw:zhtw-words:utf-8 3 | 之後 4 | > echo 之 后 | bsdconv utf-8:zhtw:zhtw-words:utf-8 5 | 之 后 6 | > echo 之 后 | bsdconv utf-8:whitespace-derail:zhtw:zhtw-words:whitespace-rerail:utf-8 7 | 之 後 8 | -------------------------------------------------------------------------------- /modules/inter/WHITESPACE-DERAIL.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/inter/WHITESPACE-RERAIL.man: -------------------------------------------------------------------------------- 1 | .redirect inter/WHITESPACE-DERAIL 2 | -------------------------------------------------------------------------------- /modules/inter/WHITESPACE-RERAIL.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/inter/WHITESPACE.h: -------------------------------------------------------------------------------- 1 | #define HASHKEY "WHITESPACE" 2 | 3 | struct my_s{ 4 | struct data_rt *queue; 5 | struct data_rt **last; 6 | struct bsdconv_phase *rerail; 7 | size_t offsetA; 8 | size_t offsetB; 9 | }; 10 | -------------------------------------------------------------------------------- /modules/inter/WIDTH.c: -------------------------------------------------------------------------------- 1 | #include "../../src/bsdconv.h" 2 | 3 | #define HALF 1 4 | #define FULL 2 5 | #define AMBI -1 6 | 7 | #include "_WIDTH.h" 8 | 9 | struct my_s{ 10 | bsdconv_counter_t *full; 11 | bsdconv_counter_t *half; 12 | bsdconv_counter_t *ambi; 13 | }; 14 | 15 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){ 16 | struct my_s *r=THIS_CODEC(ins)->priv=malloc(sizeof(struct my_s)); 17 | 18 | r->full=bsdconv_counter(ins, "FULL"); 19 | r->half=bsdconv_counter(ins, "HALF"); 20 | r->ambi=bsdconv_counter(ins, "AMBI"); 21 | return 0; 22 | } 23 | 24 | void cbconv(struct bsdconv_instance *ins){ 25 | struct my_s *r=THIS_CODEC(ins)->priv; 26 | unsigned char *data; 27 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 28 | data=this_phase->curr->data; 29 | int i; 30 | int max=sizeof(width_table) / sizeof(struct width_interval) - 1; 31 | int min = 0; 32 | int mid; 33 | uint32_t ucs=0; 34 | 35 | this_phase->data_tail->next=dup_data_rt(ins, this_phase->curr); 36 | this_phase->data_tail=this_phase->data_tail->next; 37 | this_phase->data_tail->next=NULL; 38 | 39 | if(data[0]==0x1){ 40 | for(i=1;icurr->len;++i){ 41 | ucs<<=8; 42 | ucs|=data[i]; 43 | } 44 | if (ucs < width_table[0].beg || ucs > width_table[max].end){ 45 | //noop 46 | }else while (max >= min) { 47 | mid = (min + max) / 2; 48 | if (ucs > width_table[mid].end) 49 | min = mid + 1; 50 | else if (ucs < width_table[mid].beg) 51 | max = mid - 1; 52 | else{ 53 | switch(width_table[mid].width){ 54 | case FULL: 55 | *(r->full)+=1; 56 | break; 57 | case HALF: 58 | *(r->half)+=1; 59 | break; 60 | case AMBI: 61 | *(r->ambi)+=1; 62 | break; 63 | } 64 | break; 65 | } 66 | } 67 | } 68 | 69 | this_phase->state.status=NEXTPHASE; 70 | return; 71 | } 72 | 73 | 74 | void cbdestroy(struct bsdconv_instance *ins){ 75 | struct my_s *r=THIS_CODEC(ins)->priv; 76 | free(r); 77 | } 78 | -------------------------------------------------------------------------------- /modules/inter/WIDTH.man: -------------------------------------------------------------------------------- 1 | Increase counters {full,half,ambi} accordingly 2 | 3 | EXAMPLE 4 | > perl -e 'print "123Б測試"' | bsdconv utf-8:width:null 5 | Full width: 2 6 | Half width: 3 7 | Ambi width: 1 8 | -------------------------------------------------------------------------------- /modules/inter/WIDTH.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/inter/WIN.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | > perl -e 'print "a\nb"' | bsdconv utf-8:win:utf-8 | hexdump -C 3 | 00000000 61 0d 0a 62 |a..b| 4 | -------------------------------------------------------------------------------- /modules/inter/WIN.txt: -------------------------------------------------------------------------------- 1 | #convert CRLF/CR/LF to CRLF (WIN) 2 | 010D,010A 010D,010A #CRLF 3 | 010A 010D,010A #LF 4 | 010D 010D,010A #CR 5 | -------------------------------------------------------------------------------- /modules/inter/ZH-BONUS-PHRASE.c: -------------------------------------------------------------------------------- 1 | #include "BONUS.c" 2 | -------------------------------------------------------------------------------- /modules/inter/ZH-BONUS.c: -------------------------------------------------------------------------------- 1 | #include "BONUS.c" 2 | -------------------------------------------------------------------------------- /modules/inter/ZH-BONUS.txt: -------------------------------------------------------------------------------- 1 | # Source: http://www.w3.org/html/ig/zh/wiki/Big5-hkscs-vs-uao-in-hk 2 | 0123 ?01,0123 3 | 013639 ?02,013639 4 | 010233B4 ?04,010233B4 5 | 01C5 ?05,01C5 6 | 01FC ?05,01FC 7 | 014E24 ?01,014E24 8 | 015179 ?01,015179 9 | 0152B9 ?01,0152B9 10 | 0153F7 ?01,0153F7 11 | 015553 ?01,015553 12 | 0163F8 ?01,0163F8 13 | 016B74 ?01,016B74 14 | 0170DF ?01,0170DF 15 | 01714A ?01,01714A 16 | 0171DF ?01,0171DF 17 | 017371 ?01,017371 18 | 01743C ?01,01743C 19 | 017468 ?01,017468 20 | 017740 ?01,017740 21 | 017793 ?01,017793 22 | 01781C ?01,01781C 23 | 0178B1 ?01,0178B1 24 | 01793C ?01,01793C 25 | 017962 ?01,017962 26 | 017AEA ?02,017AEA 27 | 017DAB ?01,017DAB 28 | 017EDF ?01,017EDF 29 | 0183D3 ?01,0183D3 30 | 0184AD ?01,0184AD 31 | 01885E ?01,01885E 32 | 0189A7 ?01,0189A7 33 | 0189C6 ?01,0189C6 34 | 018D4E ?01,018D4E 35 | 018D77 ?01,018D77 36 | 018E2A ?01,018E2A 37 | 018E46 ?01,018E46 38 | 018EAD ?01,018EAD 39 | 018FF9 ?01,018FF9 40 | 0190A8 ?02,0190A8 41 | 0190D1 ?02,0190D1 42 | 0190FD ?01,0190FD 43 | 019176 ?03,019176 44 | 0194C3 ?01,0194C3 45 | 0194CA ?01,0194CA 46 | 01976D ?01,01976D 47 | 019938 ?02,019938 48 | 019A90 ?01,019A90 49 | 019DC4 ?01,019DC4 50 | 019E37 ?01,019E37 51 | 019FA5 ?01,019FA5 52 | -------------------------------------------------------------------------------- /modules/inter/ZH-FUZZY-CN.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | > echo 澀谷驛渋谷駅涩谷驿| bsdconv utf-8:zh-fuzzy-cn:utf-8 3 | 澀谷驿澀谷驿澀谷驿 4 | -------------------------------------------------------------------------------- /modules/inter/ZH-FUZZY-TW.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | > echo 澀谷驛渋谷駅涩谷驿| bsdconv utf-8:zh-fuzzy-tw:utf-8 3 | 澀谷驛澀谷驛澀谷驛 4 | -------------------------------------------------------------------------------- /modules/inter/ZHCN.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | > echo 測試 | bsdconv utf-8:zhcn:utf-8 3 | 测试 4 | -------------------------------------------------------------------------------- /modules/inter/ZHTW-WORDS.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | > echo 测试之后 | bsdconv utf-8:zhtw:zhtw-words:utf-8 3 | 測試之後 4 | -------------------------------------------------------------------------------- /modules/inter/ZHTW-WORDS.txt: -------------------------------------------------------------------------------- 1 | 014E00,01500B,01613F 014E00,01500B,019858 2 | 014E0D,018981,0191C7 014E0D,018981,0163A1 3 | 014E4B,01540E 014E4B,015F8C 4 | 014E91,016CB3 0196F2,016CB3 5 | 014EC0,014E48 014EC0,019EBC 6 | 014EE5,01540E 014EE5,015F8C 7 | 014F46,01613F 014F46,019858 8 | 015165,01591C 015165,01591C 9 | 0151E0,01591A 015E7E,01591A 10 | 0151E0,015EA6 015E7E,015EA6 11 | 015206,01949F 015206,019418 12 | 0153D1,01653E 01767C,01653E 13 | 0153D1,0171D2 01767C,0171D2 14 | 0153D1,01734E,0191D1 01767C,01734E,0191D1 15 | 0153D1,0173FE 01767C,0173FE 16 | 0153D1,019001 01767C,019001 17 | 01540E,015929 015F8C,015929 18 | 01540E,016094 015F8C,016094 19 | 01540E,01679C 015F8C,01679C 20 | 015411,015C0E 0156AE,015C0E 21 | 015446,016703 015F85,016703 22 | 01591A,014E48 01591A,019EBC 23 | 01591C,014E4B,01540E 01591C,014E4B,01540E 24 | 015C6C,014E8E 015C6C,0165BC 25 | 015F69,014E91 015F69,0196F2 26 | 015F81,015A5A 015FB5,015A5A 27 | 015F81,016C42 015FB5,016C42 28 | 01600E,014E48 01600E,019EBC 29 | 01613F,01541B 019858,01541B 30 | 016642,01949F 016642,019418 31 | 016700,01540E 016700,015F8C 32 | 016709,0168F1,016709,0189D2 016709,017A1C,016709,0189D2 33 | 0168F1,0189D2 017A1C,0189D2 34 | 016A4B,016881 016A4B,016A11 35 | 016A6B,016881 016A6B,016A11 36 | 016D77,0191CC 016D77,0188E1 37 | 017528,014E8E 017528,0165BC 38 | 0179D2,01949F 0179D2,019418 39 | 01820D,015F97 016368,015F97 40 | 018868,015F81 018868,015FB5 41 | 018C61,015F81 018C61,015FB5 42 | 019019,014E48 019019,019EBC 43 | 019019,0191CC 019019,0188E1 44 | 0190A3,014E48 0190A3,019EBC 45 | 0190A3,0191CC 0190A3,0188E1 46 | 0191C7,016AB3,016994 0163A1,016AB3,016994 47 | 0191CC,019762 0188E1,019762 48 | 01949F,01611B 01937E,01611B 49 | 019632,018303 019632,017BC4 50 | 01982D,0153D1 01982D,019AEE 51 | -------------------------------------------------------------------------------- /modules/inter/ZHTW.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | > echo 测试之后|bsdconv utf-8:zhtw:utf-8 3 | 測試之后 4 | -------------------------------------------------------------------------------- /modules/inter/_NF-HANGUL-COMPOSITION.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/inter/_NF-HANGUL-DECOMPOSITION.c: -------------------------------------------------------------------------------- 1 | #include "../../src/bsdconv.h" 2 | 3 | static void decomposeHangul(uint32_t ucs, struct bsdconv_instance *ins); 4 | 5 | #define SBase 0xAC00 6 | #define LBase 0x1100 7 | #define VBase 0x1161 8 | #define TBase 0x11A7 9 | #define LCount 19 10 | #define VCount 21 11 | #define TCount 28 12 | #define NCount (VCount * TCount) 13 | #define SCount (LCount * NCount) 14 | 15 | void cbconv(struct bsdconv_instance *ins){ 16 | unsigned char *data; 17 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 18 | data=this_phase->curr->data; 19 | int i; 20 | uint32_t ucs=0; 21 | 22 | if(data[0]==0x1){ 23 | for(i=1;icurr->len;++i){ 24 | ucs<<=8; 25 | ucs|=data[i]; 26 | } 27 | int SIndex = ucs - SBase; 28 | if(SIndex >= 0 && SIndex < SCount){ 29 | decomposeHangul(ucs, ins); 30 | }else{ 31 | this_phase->data_tail->next=dup_data_rt(ins, this_phase->curr); 32 | this_phase->data_tail=this_phase->data_tail->next; 33 | this_phase->data_tail->next=NULL; 34 | } 35 | } 36 | 37 | this_phase->state.status=NEXTPHASE; 38 | return; 39 | } 40 | 41 | static void decomposeHangul(uint32_t ucs, struct bsdconv_instance *ins){ 42 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 43 | int SIndex = ucs - SBase; 44 | if(SIndex >= 0 && SIndex < SCount){ 45 | int L = LBase + SIndex / NCount; 46 | int V = VBase + (SIndex % NCount) / TCount; 47 | int T = TBase + SIndex % TCount; 48 | 49 | decomposeHangul(L, ins); 50 | decomposeHangul(V, ins); 51 | if(T != TBase) 52 | decomposeHangul(T, ins); 53 | }else{ 54 | int i; 55 | unsigned char *p; 56 | unsigned char stack[8]; 57 | int stack_len=0; 58 | DATA_MALLOC(ins, this_phase->data_tail->next); 59 | this_phase->data_tail=this_phase->data_tail->next; 60 | while(ucs && stack_len>= 8; 63 | stack_len += 1; 64 | } 65 | this_phase->data_tail->len=stack_len+=1; 66 | this_phase->data_tail->data=malloc(this_phase->data_tail->len); 67 | p=this_phase->data_tail->data; 68 | *p=1; 69 | p+=1; 70 | stack_len-=1; 71 | for(i=0;idata_tail->flags=F_FREE; 76 | this_phase->data_tail->next=NULL; 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /modules/inter/_NF-HANGUL-DECOMPOSITION.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/inter/_NF-ORDER.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/inter/_NFC.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/inter/alias: -------------------------------------------------------------------------------- 1 | NFD _NFD:_NF-HANGUL-DECOMPOSITION:_NF-ORDER 2 | NFKD _NFKD:_NF-HANGUL-DECOMPOSITION:_NF-ORDER 3 | NFC NFD:_NFC:_NF-HANGUL-COMPOSITION 4 | NFKC NFKD:_NFC:_NF-HANGUL-COMPOSITION 5 | NFKD-CASEFOLD NFD:CASEFOLD:NFKD:CASEFOLD:NFKD 6 | SPLIT INSERT#AFTER=002C 7 | NOBOM REPLACE#01FEFF 8 | #compatibility 9 | ZH-STRINGS STRINGS#FOR=CJK 10 | -------------------------------------------------------------------------------- /modules/scorer/CJK.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Reference: http://blog.oasisfeng.com/2006/10/19/full-cjk-unicode-range/ 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range_with_score ranges[] = { 8 | { 0x0, 0x7F, 4 }, //ASCII 9 | { 0x3000, 0x303F, 4 }, //CJK punctuation 10 | { 0x3040, 0x309F, 5 }, //Japanese hiragana 11 | { 0x30A0, 0x30FF, 5 }, //Japanese katakana 12 | { 0x3100, 0x312F, 4 }, //Chinese Bopomofo 13 | { 0x3400, 0x4DB5, 3 }, //CJK Unified Ideographs Extension A ;Unicode3.0 14 | { 0x4E00, 0x6FFF, 5 }, //CJK Unified Ideographs ;Unicode 1.1 ;HF 15 | { 0x7000, 0x9FA5, 4 }, //CJK Unified Ideographs ;Unicode 1.1 ;LF 16 | { 0x9FA6, 0x9FBB, 3 }, //CJK Unified Ideographs ;Unicode 4.1 17 | { 0xAC00, 0xD7AF, 3 }, //Korean word 18 | { 0xF900, 0xFA2D, 4 }, //CJK Compatibility Ideographs ;Unicode 1.1 19 | { 0xFA30, 0xFA6A, 4 }, //CJK Compatibility Ideographs ;Unicode 3.2 20 | { 0xFA70, 0xFAD9, 2 }, //CJK Compatibility Ideographs ;Unicode 4.1 21 | { 0xFF00, 0xFFEF, 3}, //Fullwidth ASCII, punctuation, Japanese, Korean 22 | { 0x20000, 0x2A6D6, 1 },//CJK Unified Ideographs Extension B ;Unicode 3.1 23 | { 0x2F800, 0x2FA1D, 1 },//CJK Compatibility Supplement ;Unicode 3.1 24 | }; 25 | 26 | #include "unicode_range.c" 27 | -------------------------------------------------------------------------------- /modules/scorer/LATIN1.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Reference: http://en.wikipedia.org/wiki/Windows-1252 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | static const struct uint32_range_with_score ranges[] = { 8 | { 0x0, 0x80, 2 }, 9 | { 0x82, 0x8C, 2 }, 10 | { 0x8E, 0x8E, 2 }, 11 | { 0x91, 0x9C, 2 }, 12 | { 0x9E, 0xFF, 2 }, 13 | }; 14 | 15 | #include "unicode_range.c" 16 | -------------------------------------------------------------------------------- /modules/scorer/unicode_range.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Some code come from http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c 3 | */ 4 | 5 | #include "../../src/bsdconv.h" 6 | 7 | uint32_t cbscorer(struct data_rt *data){ 8 | uint32_t ucs=0; 9 | int i; 10 | int max=sizeof(ranges) / sizeof(struct uint32_range_with_score) - 1; 11 | int min = 0; 12 | int mid; 13 | 14 | if(data->len<1 || UCP(data->data)[0]!=1){ 15 | return 0; 16 | } 17 | 18 | for(i=1;ilen;++i){ 19 | ucs<<=8; 20 | ucs|=UCP(data->data)[i]; 21 | } 22 | 23 | if (ucs < ranges[0].first || ucs > ranges[max].last){ 24 | //noop 25 | }else while (max >= min) { 26 | mid = (min + max) / 2; 27 | if (ucs > ranges[mid].last) 28 | min = mid + 1; 29 | else if (ucs < ranges[mid].first) 30 | max = mid - 1; 31 | else{ 32 | return ranges[mid].score; 33 | } 34 | } 35 | 36 | return 0; 37 | } 38 | -------------------------------------------------------------------------------- /modules/to/00.txt: -------------------------------------------------------------------------------- 1 | 0100 00 2 | -------------------------------------------------------------------------------- /modules/to/ANY.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "../../src/bsdconv.h" 4 | 5 | struct my_st { 6 | struct data_rt *data; 7 | bsdconv_counter_t *counter; 8 | }; 9 | 10 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){ 11 | struct my_st *r=malloc(sizeof(struct my_st)); 12 | struct data_rt *bak; 13 | int e; 14 | r->data=str2data("3F", &e, ins); 15 | r->counter=NULL; 16 | while(arg){ 17 | if(strcasecmp(arg->key, "ERROR")==0){ 18 | if(arg->ptr) 19 | r->counter=bsdconv_counter(ins, arg->ptr); 20 | else 21 | r->counter=bsdconv_counter(ins, "OERR"); 22 | }else if(strcasecmp(arg->key, "DROP")==0){ 23 | DATA_FREE(ins, r->data); 24 | r->data = NULL; 25 | }else{ 26 | bak=r->data; 27 | r->data=str2data(arg->key, &e, ins); 28 | DATA_FREE(ins, bak); 29 | if(e){ 30 | DATA_FREE(ins, r->data); 31 | free(r); 32 | return e; 33 | } 34 | } 35 | arg=arg->next; 36 | } 37 | THIS_CODEC(ins)->priv=r; 38 | return 0; 39 | } 40 | 41 | void cbdestroy(struct bsdconv_instance *ins){ 42 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 43 | struct my_st *r=this_phase->codec[this_phase->index].priv; 44 | DATA_FREE(ins, r->data); 45 | free(r); 46 | } 47 | 48 | void cbconv(struct bsdconv_instance *ins){ 49 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 50 | struct my_st *r=this_phase->codec[this_phase->index].priv; 51 | 52 | LISTCPY(ins, this_phase->data_tail, r->data); 53 | 54 | this_phase->state.status=NEXTPHASE; 55 | 56 | if(r->counter) 57 | *(r->counter)+=1; 58 | return; 59 | } 60 | -------------------------------------------------------------------------------- /modules/to/ANY.man: -------------------------------------------------------------------------------- 1 | INPUT 2 | DESC Any byte 3 | EXAMPLE * 4 | 5 | OUTPUT 6 | TYPE Sepcified by argument 7 | DESC Sepcified by argument 8 | 9 | ARGUMENT 10 | ERROR 11 | DESC Increase counter (OERR if no counter name specified) 12 | DROP 13 | DESC Don't output any data 14 | $DataList 15 | DESC Output value 16 | EXAMPLE 3F 17 | EXAMPLE 3F.2F 18 | 19 | EXAMPLE 20 | > echo -n test測試test |bsdconv utf-8:ascii,any#3f 21 | test??test 22 | -------------------------------------------------------------------------------- /modules/to/ANY.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/to/ASCII-HTML-INFO.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | > echo 測 | bsdconv utf-8:ascii-html-info 3 | 4 | -------------------------------------------------------------------------------- /modules/to/ASCII-HTML-INFO.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/to/ASCII-HTML-UNICODE-IMG.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "../../src/bsdconv.h" 5 | 6 | #define TAILIZE(p) while(*p){ p++ ;} 7 | 8 | void cbconv(struct bsdconv_instance *ins){ 9 | char *data, *p, buf[128]={0}; 10 | unsigned int len, i; 11 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 12 | data=this_phase->curr->data; 13 | if(*data!=0x01){ 14 | this_phase->state.status=DEADEND; 15 | return; 16 | } 17 | this_phase->state.status=NEXTPHASE; 18 | p=buf; 19 | i=*data; 20 | data+=1; 21 | len=this_phase->curr->len-1; 22 | DATA_MALLOC(ins, this_phase->data_tail->next); 23 | this_phase->data_tail=this_phase->data_tail->next; 24 | this_phase->data_tail->next=NULL; 25 | 26 | sprintf(p,""); 37 | TAILIZE(p); 38 | len=p-buf; 39 | this_phase->data_tail->len=len; 40 | this_phase->data_tail->flags=F_FREE; 41 | this_phase->data_tail->data=malloc(len); 42 | memcpy(this_phase->data_tail->data, buf, len); 43 | 44 | return; 45 | } 46 | -------------------------------------------------------------------------------- /modules/to/ASCII-HTML-UNICODE-IMG.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | > echo 測 | bsdconv utf-8:ascii-html-unicode-img 3 | 4 | -------------------------------------------------------------------------------- /modules/to/ASCII-HTML-UNICODE-IMG.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/to/ASCII-NAMED-HTML-ENTITY.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | > echo Ç | bsdconv utf-8:ascii-named-html-entity 3 | Ç 4 | -------------------------------------------------------------------------------- /modules/to/ASCII.man: -------------------------------------------------------------------------------- 1 | .redirect from/ASCII 2 | -------------------------------------------------------------------------------- /modules/to/ASCII.txt: -------------------------------------------------------------------------------- 1 | #transposed from from/ASCII 2 | # add 0102DC record 3 | 0101 01 4 | 0102 02 5 | 0103 03 6 | 0104 04 7 | 0105 05 8 | 0106 06 9 | 0107 07 10 | 0108 08 11 | 0109 09 12 | 010A 0A 13 | 010B 0B 14 | 010C 0C 15 | 010D 0D 16 | 010E 0E 17 | 010F 0F 18 | 0110 10 19 | 0111 11 20 | 0112 12 21 | 0113 13 22 | 0114 14 23 | 0115 15 24 | 0116 16 25 | 0117 17 26 | 0118 18 27 | 0119 19 28 | 011A 1A 29 | 011B 1B 30 | 011C 1C 31 | 011D 1D 32 | 011E 1E 33 | 011F 1F 34 | 0120 20 35 | 0121 21 36 | 0122 22 37 | 0123 23 38 | 0124 24 39 | 0125 25 40 | 0126 26 41 | 0127 27 42 | 0128 28 43 | 0129 29 44 | 012A 2A 45 | 012B 2B 46 | 012C 2C 47 | 012D 2D 48 | 012E 2E 49 | 012F 2F 50 | 0130 30 51 | 0131 31 52 | 0132 32 53 | 0133 33 54 | 0134 34 55 | 0135 35 56 | 0136 36 57 | 0137 37 58 | 0138 38 59 | 0139 39 60 | 013A 3A 61 | 013B 3B 62 | 013C 3C 63 | 013D 3D 64 | 013E 3E 65 | 013F 3F 66 | 0140 40 67 | 0141 41 68 | 0142 42 69 | 0143 43 70 | 0144 44 71 | 0145 45 72 | 0146 46 73 | 0147 47 74 | 0148 48 75 | 0149 49 76 | 014A 4A 77 | 014B 4B 78 | 014C 4C 79 | 014D 4D 80 | 014E 4E 81 | 014F 4F 82 | 0150 50 83 | 0151 51 84 | 0152 52 85 | 0153 53 86 | 0154 54 87 | 0155 55 88 | 0156 56 89 | 0157 57 90 | 0158 58 91 | 0159 59 92 | 015A 5A 93 | 015B 5B 94 | 015C 5C 95 | 015D 5D 96 | 015E 5E 97 | 015F 5F 98 | 0160 60 99 | 0161 61 100 | 0162 62 101 | 0163 63 102 | 0164 64 103 | 0165 65 104 | 0166 66 105 | 0167 67 106 | 0168 68 107 | 0169 69 108 | 016A 6A 109 | 016B 6B 110 | 016C 6C 111 | 016D 6D 112 | 016E 6E 113 | 016F 6F 114 | 0170 70 115 | 0171 71 116 | 0172 72 117 | 0173 73 118 | 0174 74 119 | 0175 75 120 | 0176 76 121 | 0177 77 122 | 0178 78 123 | 0179 79 124 | 017A 7A 125 | 017B 7B 126 | 017C 7C 127 | 017D 7D 128 | 017E 7E 129 | 0102DC 7E 130 | 017F 7F 131 | -------------------------------------------------------------------------------- /modules/to/BIG5-5C.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | > echo 功能|bsdconv utf-8:big5-5c,big5|hexdump -C 3 | 00000000 a5 5c 5c af e0 0a |.\\...| 4 | -------------------------------------------------------------------------------- /modules/to/BIG5-5C.txt: -------------------------------------------------------------------------------- 1 | # transpose from from/big5-5C 2 | 01FE4F A15C5C 3 | 01515D A25C5C 4 | 0103B1 A35C5C 5 | 014E48 A45C5C 6 | 01529F A55C5C 7 | 015412 A65C5C 8 | 01542D A75C5C 9 | 016C94 A85C5C 10 | 01577C A95C5C 11 | 016B7F AA5C5C 12 | 014FDE AB5C5C 13 | 0167AF AC5C5C 14 | 0182D2 AD5C5C 15 | 015A09 AE5C5C 16 | 0173EE AF5C5C 17 | 018C79 B05C5C 18 | 015D24 B15C5C 19 | 016DDA B25C5C 20 | 018A31 B35C5C 21 | 015EC4 B45C5C 22 | 017435 B55C5C 23 | 018DDA B65C5C 24 | 016127 B75C5C 25 | 017A1E B85C5C 26 | 01923E B95C5C 27 | 01669D BA5C5C 28 | 0184CB BB5C5C 29 | 0158A6 BC5C5C 30 | 017A40 BD5C5C 31 | 0195B1 BE5C5C 32 | 01749E BF5C5C 33 | 019910 C05C5C 34 | 017E37 C15C5C 35 | 0164FA C25C5C 36 | 019EE0 C35C5C 37 | 015B40 C45C5C 38 | 019ACF C55C5C 39 | 018EA1 C65C5C 40 | 015C10 C95C5C 41 | 014F62 CA5C5C 42 | 016C7B CB5C5C 43 | 015CA4 CC5C5C 44 | 0172D6 CD5C5C 45 | 0157A5 CE5C5C 46 | 0167E6 CF5C5C 47 | 0180D0 D05C5C 48 | 015A16 D15C5C 49 | 016D82 D25C5C 50 | 017F61 D35C5C 51 | 015045 D45C5C 52 | 0160DD D55C5C 53 | 01727E D65C5C 54 | 01838D D75C5C 55 | 01509C D85C5C 56 | 0163CA D95C5C 57 | 01712E DA5C5C 58 | 01833B DB5C5C 59 | 019103 DC5C5C 60 | 015E4B DD5C5C 61 | 016EDC DE5C5C 62 | 017D85 DF5C5C 63 | 018D68 E05C5C 64 | 01587F E15C5C 65 | 0169D9 E25C5C 66 | 017BA4 E35C5C 67 | 018E0A E45C5C 68 | 015AF9 E55C5C 69 | 016F7F E65C5C 70 | 01850C E75C5C 71 | 019186 E85C5C 72 | 015B1E E95C5C 73 | 017366 EA5C5C 74 | 01878F EB5C5C 75 | 019924 EC5C5C 76 | 0171E1 ED5C5C 77 | 0187B0 EE5C5C 78 | 0199F9 EF5C5C 79 | 017912 F05C5C 80 | 0193AA F15C5C 81 | 017019 F25C5C 82 | 019140 F35C5C 83 | 017035 F45C5C 84 | 019A31 F55C5C 85 | 019145 F65C5C 86 | 018D15 F75C5C 87 | 019C4B F85C5C 88 | 019C6D F95C5C 89 | -------------------------------------------------------------------------------- /modules/to/BSDCONV-KEYWORD.man: -------------------------------------------------------------------------------- 1 | INPUT 2 | DESC Products of from/BSDCONV-KEYWORD 3 | 4 | OUTPUT 5 | DESC Original form of data from from/BSDCONV-KEYWORD 6 | 7 | EXAMPLE 8 | > printf "測,試\t测,试\n" | bsdconv bsdconv-keyword,utf-8:bsdconv-keyword,bsdconv 9 | 016E2C,018A66 016D4B,018BD5 10 | > printf "測,試\t测,试\n" | bsdconv bsdconv-keyword,utf-8:bsdconv-keyword,bsdconv | bsdconv bsdconv-keyword,bsdconv:bsdconv-keyword,utf-8 11 | 測,試 测,试 12 | -------------------------------------------------------------------------------- /modules/to/BSDCONV-KEYWORD.txt: -------------------------------------------------------------------------------- 1 | 002C 2C 2 | 0009 09 3 | 0020 20 4 | 003F 3F 5 | 000A 0A 6 | 000D 0D 7 | 005C20 5C20 8 | 005C2C 5C2C 9 | 005C5C 5C5C 10 | -------------------------------------------------------------------------------- /modules/to/BSDCONV-LOG.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "../../src/bsdconv.h" 4 | 5 | #define TAILIZE(p) while(*p){ p++ ;} 6 | 7 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){ 8 | THIS_CODEC(ins)->priv=fopen(getenv("BSDCONV_TO_LOG"),"a"); 9 | return 0; 10 | } 11 | 12 | void cbdestroy(struct bsdconv_instance *ins){ 13 | void *p=THIS_CODEC(ins)->priv; 14 | fclose(p); 15 | } 16 | 17 | void cbconv(struct bsdconv_instance *ins){ 18 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 19 | FILE *fp=THIS_CODEC(ins)->priv; 20 | int i; 21 | this_phase->state.status=NEXTPHASE; 22 | 23 | for(i=0;icurr->len;++i){ 24 | fprintf(fp,"%02X",UCP(this_phase->curr->data)[i]); 25 | } 26 | if(this_phase->curr->flags){ 27 | fprintf(fp," ("); 28 | if(this_phase->curr->flags & F_FREE) fprintf(fp, " FREE"); 29 | if(this_phase->curr->flags & F_MARK) fprintf(fp, " MARK"); 30 | fprintf(fp," )"); 31 | } 32 | fprintf(fp,"\n"); 33 | fflush(fp); 34 | } 35 | -------------------------------------------------------------------------------- /modules/to/BSDCONV-LOG.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | > echo 測試 | env BSDCONV_TO_LOG=/tmp/bsdconv.log bsdconv utf-8:bsdconv-log 3 | > cat /tmp/bsdconv.log 4 | 016E2C ( FREE ) 5 | 018A66 ( FREE ) 6 | 010A 7 | -------------------------------------------------------------------------------- /modules/to/BSDCONV-LOG.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/to/BSDCONV-OUTPUT.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "../../src/bsdconv.h" 5 | 6 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){ 7 | FILE *fp=stdout; 8 | while(arg){ 9 | if(strcasecmp(arg->key, "STDERR")==0){ 10 | fp=stderr; 11 | }else if(strcasecmp(arg->key, "STDOUT")==0){ 12 | fp=stdout; 13 | }else{ 14 | return EINVAL; 15 | } 16 | arg=arg->next; 17 | } 18 | THIS_CODEC(ins)->priv=fp; 19 | return 0; 20 | } 21 | 22 | void cbconv(struct bsdconv_instance *ins){ 23 | FILE *fp=THIS_CODEC(ins)->priv; 24 | int i; 25 | ins->phase[ins->phase_index].state.status=NEXTPHASE; 26 | 27 | for(i=0;iphase[ins->phase_index].curr->len;++i){ 28 | fprintf(fp, "%02X",UCP(ins->phase[ins->phase_index].curr->data)[i]); 29 | } 30 | if(ins->phase[ins->phase_index].curr->flags){ 31 | fprintf(fp, " ("); 32 | if(ins->phase[ins->phase_index].curr->flags & F_FREE) fprintf(fp, " FREE"); 33 | if(ins->phase[ins->phase_index].curr->flags & F_MARK) fprintf(fp, " MARK"); 34 | fprintf(fp, " )"); 35 | } 36 | fprintf(fp, "\n"); 37 | } 38 | -------------------------------------------------------------------------------- /modules/to/BSDCONV-OUTPUT.man: -------------------------------------------------------------------------------- 1 | ARGUMENT 2 | STDERR 3 | Output to stderr 4 | STDOUT (default) 5 | Output to stdout 6 | 7 | EXAMPLE 8 | #foreground debug tool 9 | > echo 測試 | bsdconv utf-8:bsdconv-output 10 | 016E2C ( FREE ) 11 | 018A66 ( FREE ) 12 | 010A 13 | -------------------------------------------------------------------------------- /modules/to/BSDCONV-OUTPUT.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/to/BSDCONV.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "../../src/bsdconv.h" 4 | 5 | #define TAILIZE(p) while(*p){ p++ ;} 6 | 7 | void cbconv(struct bsdconv_instance *ins){ 8 | int i; 9 | char *p; 10 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 11 | 12 | this_phase->state.status=NEXTPHASE; 13 | 14 | DATA_MALLOC(ins, this_phase->data_tail->next); 15 | this_phase->data_tail=this_phase->data_tail->next; 16 | this_phase->data_tail->next=NULL; 17 | this_phase->data_tail->flags=F_FREE; 18 | 19 | this_phase->data_tail->len=this_phase->curr->len*2; 20 | p=this_phase->data_tail->data=malloc(this_phase->data_tail->len+1); 21 | for(i=0;icurr->len;++i){ 22 | sprintf(p,"%02X", UCP(this_phase->curr->data)[i]); 23 | TAILIZE(p); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /modules/to/BSDCONV.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | > echo 測 | bsdconv utf-8:bsdconv 3 | 016E2C010A 4 | -------------------------------------------------------------------------------- /modules/to/BSDCONV.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/to/BYTE.man: -------------------------------------------------------------------------------- 1 | .redirect from/BYTE 2 | -------------------------------------------------------------------------------- /modules/to/CCCII.man: -------------------------------------------------------------------------------- 1 | .redirect from/CCCII 2 | -------------------------------------------------------------------------------- /modules/to/CP936-TRANS.man: -------------------------------------------------------------------------------- 1 | .redirect from/_CP936 2 | -------------------------------------------------------------------------------- /modules/to/CP950-TRANS.man: -------------------------------------------------------------------------------- 1 | .redirect from/_CP950 2 | -------------------------------------------------------------------------------- /modules/to/ESCAPE.man: -------------------------------------------------------------------------------- 1 | INPUT 2 | DESC Any byte 3 | EXAMPLE * 4 | 5 | OUTPUT 6 | TYPE Sepcified by argument 7 | DESC Sepcified by argument 8 | 9 | ARGUMENT 10 | PREFIX 11 | FORMAT Hex 12 | DESC Output prefix 13 | EXAMPLE 013F 14 | SUFFIX 15 | FORMAT Hex 16 | DESC Output suffix 17 | EXAMPLE 013F 18 | MODE 19 | FORMAT ENUM{hex,16,dec,10,oct,8} 20 | DESC Escape mode 21 | FOR 22 | FORMAT ENUM{UNICODE,1,BYTE,3} 23 | DESC Input type whitelist 24 | 25 | EXAMPLE 26 | > echo 測test試 | bsdconv 'utf-8:ascii,url' #URL === ESCAPE#MODE=16&PREFIX=2575,ESCAPE#MODE=16&PREFIX=25 27 | %u6E2Ctest%u8A66 28 | > echo 測test喆試 | bsdconv 'utf-8:big5,unicode|skip,ascii,byte:ascii,url' 29 | %B4%FAtest%u5586%B8%D5 30 | > echo -n test測試 | bsdconv 'utf-8:ascii,ESCAPE#FOR=UNICODE&MODE=16&PREFIX=5C75' 31 | test\u6e2c\u8a66 32 | -------------------------------------------------------------------------------- /modules/to/ESCAPE.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/to/IBM-37.man: -------------------------------------------------------------------------------- 1 | .redirect from/IBM-37 2 | -------------------------------------------------------------------------------- /modules/to/IBM-930.c: -------------------------------------------------------------------------------- 1 | #include "EBCDIC.c" 2 | -------------------------------------------------------------------------------- /modules/to/IBM-930.man: -------------------------------------------------------------------------------- 1 | .redirect from/IBM-37 2 | -------------------------------------------------------------------------------- /modules/to/IBM-933.c: -------------------------------------------------------------------------------- 1 | #include "EBCDIC.c" 2 | -------------------------------------------------------------------------------- /modules/to/IBM-933.man: -------------------------------------------------------------------------------- 1 | .redirect from/IBM-37 2 | -------------------------------------------------------------------------------- /modules/to/IBM-935.c: -------------------------------------------------------------------------------- 1 | #include "EBCDIC.c" 2 | -------------------------------------------------------------------------------- /modules/to/IBM-935.man: -------------------------------------------------------------------------------- 1 | .redirect from/IBM-37 2 | -------------------------------------------------------------------------------- /modules/to/IBM-937.c: -------------------------------------------------------------------------------- 1 | #include "EBCDIC.c" 2 | -------------------------------------------------------------------------------- /modules/to/IBM-937.man: -------------------------------------------------------------------------------- 1 | .redirect from/IBM-37 2 | -------------------------------------------------------------------------------- /modules/to/IBM-939.c: -------------------------------------------------------------------------------- 1 | #include "EBCDIC.c" 2 | -------------------------------------------------------------------------------- /modules/to/IBM-939.man: -------------------------------------------------------------------------------- 1 | .redirect from/IBM-37 2 | -------------------------------------------------------------------------------- /modules/to/NULL.c: -------------------------------------------------------------------------------- 1 | #include "../../src/bsdconv.h" 2 | 3 | void cbconv(struct bsdconv_instance *ins){ 4 | THIS_PHASE(ins)->state.status=NEXTPHASE; 5 | return; 6 | } 7 | -------------------------------------------------------------------------------- /modules/to/NULL.man: -------------------------------------------------------------------------------- 1 | EXAMPLE 2 | #used to test decoding 3 | > echo 測試blah blah blah | bsdconv utf-8:null 4 | #(no output) 5 | -------------------------------------------------------------------------------- /modules/to/NULL.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/to/PASS.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "../../src/bsdconv.h" 4 | 5 | struct my_s{ 6 | struct bsdconv_filter *filter; 7 | int mark; 8 | }; 9 | 10 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){ 11 | struct my_s *r=malloc(sizeof(struct my_s)); 12 | THIS_CODEC(ins)->priv=r; 13 | r->filter=NULL; 14 | r->mark=0; 15 | 16 | char *filter=NULL; 17 | while(arg){ 18 | if(strcasecmp(arg->key, "MARK")==0){ 19 | r->mark=1; 20 | }else if(strcasecmp(arg->key, "FOR")==0){ 21 | filter=arg->ptr; 22 | }else{ 23 | free(r); 24 | return EINVAL; 25 | } 26 | arg=arg->next; 27 | } 28 | if(filter!=NULL){ 29 | r->filter=load_filter(filter); 30 | if(r->filter==NULL){ 31 | free(r); 32 | return EOPNOTSUPP; 33 | } 34 | } 35 | return 0; 36 | } 37 | 38 | void cbdestroy(struct bsdconv_instance *ins){ 39 | struct my_s *r=THIS_CODEC(ins)->priv; 40 | if(r->filter) 41 | unload_filter(r->filter); 42 | free(r); 43 | } 44 | 45 | void cbconv(struct bsdconv_instance *ins){ 46 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 47 | struct my_s *t=THIS_CODEC(ins)->priv; 48 | int pass=1; 49 | 50 | if(t->filter!=NULL && !t->filter->cbfilter(this_phase->curr)) 51 | pass=0; 52 | 53 | if(pass){ 54 | this_phase->data_tail->next=dup_data_rt(ins, this_phase->curr); 55 | this_phase->data_tail=this_phase->data_tail->next; 56 | this_phase->data_tail->next=NULL; 57 | 58 | if(t->mark) 59 | this_phase->data_tail->flags |= F_MARK; 60 | 61 | this_phase->state.status=NEXTPHASE; 62 | }else{ 63 | this_phase->state.status=DEADEND; 64 | } 65 | 66 | return; 67 | } 68 | -------------------------------------------------------------------------------- /modules/to/PASS.man: -------------------------------------------------------------------------------- 1 | INPUT 2 | TYPE Any 3 | 4 | OUTPUT 5 | TYPE Any 6 | DESC Filtered or decorated according to arguments 7 | 8 | ARGUMENT 9 | MARK 10 | DESC Add "MARK" flag 11 | FOR 12 | FORMAT Filter 13 | 14 | EXAMPLE 15 | #pass through packets queue 16 | > echo -n abc | bsdconv 'ascii:pass|pass:bsdconv-stdout' 17 | 0161 ( SKIP ) 18 | 0162 ( SKIP ) 19 | 0163 ( SKIP ) 20 | > echo -n 測試 | bsdconv 'utf-8:utf-16le|pass:bsdconv-stdout' 21 | 2C6E ( FREE ) 22 | 668A ( FREE ) 23 | -------------------------------------------------------------------------------- /modules/to/PASS.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/to/RAW.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../../src/bsdconv.h" 3 | 4 | void cbconv(struct bsdconv_instance *ins){ 5 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 6 | int i; 7 | 8 | DATA_MALLOC(ins, this_phase->data_tail->next); 9 | this_phase->data_tail=this_phase->data_tail->next; 10 | this_phase->data_tail->next=NULL; 11 | this_phase->data_tail->len=ins->phase[ins->phase_index].curr->len-1; 12 | this_phase->data_tail->flags=F_FREE; 13 | this_phase->data_tail->data=malloc(this_phase->data_tail->len); 14 | for(i=0;idata_tail->len;++i){ 15 | CP(this_phase->data_tail->data)[i]=CP(this_phase->curr->data)[i+1]; 16 | } 17 | this_phase->state.status=NEXTPHASE; 18 | return; 19 | } 20 | -------------------------------------------------------------------------------- /modules/to/RAW.man: -------------------------------------------------------------------------------- 1 | INPUT 2 | TYPE Any 3 | 4 | OUTPUT 5 | TYPE Any 6 | DESC Input data with removal of type identifier byte 7 | 8 | EXAMPLE 9 | > printf 測試 | bsdconv utf-8:raw|hexdump -C 10 | 00000000 6e 2c 8a 66 |n,.f| 11 | -------------------------------------------------------------------------------- /modules/to/RAW.txt: -------------------------------------------------------------------------------- 1 | * ? 2 | -------------------------------------------------------------------------------- /modules/to/UCS-2BE.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "../../src/bsdconv.h" 5 | 6 | void cbconv(struct bsdconv_instance *ins){ 7 | char *data; 8 | unsigned int len, i; 9 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 10 | data=this_phase->curr->data; 11 | if(this_phase->curr->len > 3){ 12 | this_phase->state.status=DEADEND; 13 | return; 14 | } 15 | 16 | this_phase->state.status=NEXTPHASE; 17 | data+=1; 18 | len=this_phase->curr->len-1; 19 | 20 | DATA_MALLOC(ins, this_phase->data_tail->next); 21 | this_phase->data_tail=this_phase->data_tail->next; 22 | this_phase->data_tail->next=NULL; 23 | this_phase->data_tail->len=2; 24 | this_phase->data_tail->flags=F_FREE; 25 | this_phase->data_tail->data=malloc(2); 26 | for(i=0;i<2-len;++i){ 27 | CP(this_phase->data_tail->data)[i]=0x0; 28 | } 29 | memcpy(CP(this_phase->data_tail->data)+i, data, len); 30 | } 31 | -------------------------------------------------------------------------------- /modules/to/UCS-2BE.man: -------------------------------------------------------------------------------- 1 | UCS-2, subset of UTF-16 2 | -------------------------------------------------------------------------------- /modules/to/UCS-2BE.txt: -------------------------------------------------------------------------------- 1 | 01* ? 2 | -------------------------------------------------------------------------------- /modules/to/UCS-2LE.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "../../src/bsdconv.h" 5 | 6 | #define SWAP(a,b,i) ((i)=(a), (a)=(b), (b)=(i)) 7 | 8 | void cbconv(struct bsdconv_instance *ins){ 9 | char *data; 10 | unsigned int len, i; 11 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 12 | data=this_phase->curr->data; 13 | if(this_phase->curr->len > 3){ 14 | this_phase->state.status=DEADEND; 15 | return; 16 | } 17 | this_phase->state.status=NEXTPHASE; 18 | data+=1; 19 | len=this_phase->curr->len-1; 20 | 21 | DATA_MALLOC(ins, this_phase->data_tail->next); 22 | this_phase->data_tail=this_phase->data_tail->next; 23 | this_phase->data_tail->next=NULL; 24 | this_phase->data_tail->len=2; 25 | this_phase->data_tail->flags=F_FREE; 26 | this_phase->data_tail->data=malloc(2); 27 | for(i=0;i<2-len;++i){ 28 | CP(this_phase->data_tail->data)[i]=0x0; 29 | } 30 | memcpy(CP(this_phase->data_tail->data)+i, data, len); 31 | data=this_phase->data_tail->data; 32 | 33 | SWAP(data[0],data[1],i); 34 | return; 35 | } 36 | -------------------------------------------------------------------------------- /modules/to/UCS-2LE.man: -------------------------------------------------------------------------------- 1 | .redirect to/UCS-2BE 2 | -------------------------------------------------------------------------------- /modules/to/UCS-2LE.txt: -------------------------------------------------------------------------------- 1 | 01* ? 2 | -------------------------------------------------------------------------------- /modules/to/UTF-16BE.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "../../src/bsdconv.h" 5 | 6 | void cbconv(struct bsdconv_instance *ins){ 7 | char *data, *p, c; 8 | unsigned int len, i; 9 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 10 | data=this_phase->curr->data; 11 | 12 | data+=1; 13 | if(this_phase->curr->len > 3){ 14 | this_phase->state.status=NEXTPHASE; 15 | 16 | DATA_MALLOC(ins, this_phase->data_tail->next); 17 | this_phase->data_tail=this_phase->data_tail->next; 18 | this_phase->data_tail->next=NULL; 19 | this_phase->data_tail->len=4; 20 | this_phase->data_tail->flags=F_FREE; 21 | p=this_phase->data_tail->data=malloc(4); 22 | 23 | c=*data-1; 24 | *p=bb11011000; 25 | *p |= (c >> 2) & bb00000011; 26 | ++p; 27 | *p=(c << 6) & bb11000000; 28 | ++data; 29 | *p |= (*data >> 2) & bb00111111; 30 | ++p; 31 | *p=bb11011100; 32 | *p |= *data & bb00000011; 33 | ++p; 34 | ++data; 35 | *p=*data; 36 | }else{ 37 | this_phase->state.status=NEXTPHASE; 38 | len=this_phase->curr->len-1; 39 | 40 | DATA_MALLOC(ins, this_phase->data_tail->next); 41 | this_phase->data_tail=this_phase->data_tail->next; 42 | this_phase->data_tail->next=NULL; 43 | this_phase->data_tail->len=2; 44 | this_phase->data_tail->flags=F_FREE; 45 | this_phase->data_tail->data=malloc(2); 46 | for(i=0;i<2-len;++i){ 47 | CP(this_phase->data_tail->data)[i]=0x0; 48 | } 49 | memcpy(CP(this_phase->data_tail->data)+i, data, len); 50 | } 51 | return; 52 | } 53 | -------------------------------------------------------------------------------- /modules/to/UTF-16BE.man: -------------------------------------------------------------------------------- 1 | .redirect from/UTF-16BE 2 | -------------------------------------------------------------------------------- /modules/to/UTF-16BE.txt: -------------------------------------------------------------------------------- 1 | 01* ? 2 | -------------------------------------------------------------------------------- /modules/to/UTF-16LE.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "../../src/bsdconv.h" 5 | 6 | #define SWAP(a,b,i) ((i)=(a), (a)=(b), (b)=(i)) 7 | 8 | void cbconv(struct bsdconv_instance *ins){ 9 | char *data, *p, c; 10 | unsigned int len, i; 11 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 12 | data=this_phase->curr->data; 13 | 14 | data+=1; 15 | if(this_phase->curr->len > 3){ 16 | this_phase->state.status=NEXTPHASE; 17 | 18 | DATA_MALLOC(ins, this_phase->data_tail->next); 19 | this_phase->data_tail=this_phase->data_tail->next; 20 | this_phase->data_tail->next=NULL; 21 | this_phase->data_tail->len=4; 22 | this_phase->data_tail->flags=F_FREE; 23 | p=this_phase->data_tail->data=malloc(4); 24 | 25 | c=*data-1; 26 | *p=bb11011000; 27 | *p |= (c >> 2) & bb00000011; 28 | ++p; 29 | *p=(c << 6) & bb11000000; 30 | ++data; 31 | *p |= (*data >> 2) & bb00111111; 32 | ++p; 33 | *p=bb11011100; 34 | *p |= *data & bb00000011; 35 | ++p; 36 | ++data; 37 | *p=*data; 38 | 39 | data=this_phase->data_tail->data; 40 | 41 | SWAP(data[0],data[1],i); 42 | SWAP(data[2],data[3],i); 43 | }else{ 44 | this_phase->state.status=NEXTPHASE; 45 | len=this_phase->curr->len-1; 46 | 47 | DATA_MALLOC(ins, this_phase->data_tail->next); 48 | this_phase->data_tail=this_phase->data_tail->next; 49 | this_phase->data_tail->next=NULL; 50 | this_phase->data_tail->len=2; 51 | this_phase->data_tail->flags=F_FREE; 52 | this_phase->data_tail->data=malloc(2); 53 | for(i=0;i<2-len;++i){ 54 | CP(this_phase->data_tail->data)[i]=0x0; 55 | } 56 | memcpy(CP(this_phase->data_tail->data)+i, data, len); 57 | data=this_phase->data_tail->data; 58 | 59 | SWAP(data[0],data[1],i); 60 | } 61 | return; 62 | } 63 | -------------------------------------------------------------------------------- /modules/to/UTF-16LE.man: -------------------------------------------------------------------------------- 1 | .redirect from/UTF-16BE 2 | -------------------------------------------------------------------------------- /modules/to/UTF-16LE.txt: -------------------------------------------------------------------------------- 1 | 01* ? 2 | -------------------------------------------------------------------------------- /modules/to/UTF-32BE.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "../../src/bsdconv.h" 5 | 6 | void cbconv(struct bsdconv_instance *ins){ 7 | char *data; 8 | unsigned int len, i; 9 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 10 | data=this_phase->curr->data; 11 | 12 | this_phase->state.status=NEXTPHASE; 13 | data+=1; 14 | len=this_phase->curr->len-1; 15 | 16 | DATA_MALLOC(ins, this_phase->data_tail->next); 17 | this_phase->data_tail=this_phase->data_tail->next; 18 | this_phase->data_tail->next=NULL; 19 | this_phase->data_tail->len=4; 20 | this_phase->data_tail->flags=F_FREE; 21 | this_phase->data_tail->data=malloc(4); 22 | for(i=0;i<4-len;++i){ 23 | CP(this_phase->data_tail->data)[i]=0x0; 24 | } 25 | memcpy(CP(this_phase->data_tail->data)+i, data, len); 26 | } 27 | -------------------------------------------------------------------------------- /modules/to/UTF-32BE.man: -------------------------------------------------------------------------------- 1 | .redirect from/UTF-32BE 2 | -------------------------------------------------------------------------------- /modules/to/UTF-32BE.txt: -------------------------------------------------------------------------------- 1 | 01* ? 2 | -------------------------------------------------------------------------------- /modules/to/UTF-32LE.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "../../src/bsdconv.h" 5 | 6 | #define SWAP(a,b,i) ((i)=(a), (a)=(b), (b)=(i)) 7 | 8 | void cbconv(struct bsdconv_instance *ins){ 9 | char *data; 10 | unsigned int len, i; 11 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 12 | data=this_phase->curr->data; 13 | 14 | this_phase->state.status=NEXTPHASE; 15 | data+=1; 16 | len=this_phase->curr->len-1; 17 | 18 | DATA_MALLOC(ins, this_phase->data_tail->next); 19 | this_phase->data_tail=this_phase->data_tail->next; 20 | this_phase->data_tail->next=NULL; 21 | this_phase->data_tail->len=4; 22 | this_phase->data_tail->flags=F_FREE; 23 | this_phase->data_tail->data=malloc(4); 24 | for(i=0;i<4-len;++i){ 25 | CP(this_phase->data_tail->data)[i]=0x0; 26 | } 27 | memcpy(CP(this_phase->data_tail->data)+i, data, len); 28 | data=this_phase->data_tail->data; 29 | SWAP(data[0],data[3],i); 30 | SWAP(data[1],data[2],i); 31 | return; 32 | } 33 | -------------------------------------------------------------------------------- /modules/to/UTF-32LE.man: -------------------------------------------------------------------------------- 1 | .redirect from/UTF-32BE 2 | -------------------------------------------------------------------------------- /modules/to/UTF-32LE.txt: -------------------------------------------------------------------------------- 1 | 01* ? 2 | -------------------------------------------------------------------------------- /modules/to/_CP1251.man: -------------------------------------------------------------------------------- 1 | .redirect from/_CP1251 2 | -------------------------------------------------------------------------------- /modules/to/_CP1251.txt: -------------------------------------------------------------------------------- 1 | #transposed from from/_CP1251.txt 2 | 010402 80 3 | 010403 81 4 | 01201A 82 5 | 010453 83 6 | 01201E 84 7 | 012026 85 8 | 012020 86 9 | 012021 87 10 | 0120AC 88 11 | 012030 89 12 | 010409 8A 13 | 012039 8B 14 | 01040A 8C 15 | 01040C 8D 16 | 01040B 8E 17 | 01040F 8F 18 | 010452 90 19 | 012018 91 20 | 012019 92 21 | 01201C 93 22 | 01201D 94 23 | 012022 95 24 | 012013 96 25 | 012014 97 26 | 012122 99 27 | 010459 9A 28 | 01203A 9B 29 | 01045A 9C 30 | 01045C 9D 31 | 01045B 9E 32 | 01045F 9F 33 | 01A0 A0 34 | 01040E A1 35 | 01045E A2 36 | 010408 A3 37 | 01A4 A4 38 | 010490 A5 39 | 01A6 A6 40 | 01A7 A7 41 | 010401 A8 42 | 01A9 A9 43 | 010404 AA 44 | 01AB AB 45 | 01AC AC 46 | 01AD AD 47 | 01AE AE 48 | 010407 AF 49 | 01B0 B0 50 | 01B1 B1 51 | 010406 B2 52 | 010456 B3 53 | 010491 B4 54 | 01B5 B5 55 | 01B6 B6 56 | 01B7 B7 57 | 010451 B8 58 | 012116 B9 59 | 010454 BA 60 | 01BB BB 61 | 010458 BC 62 | 010405 BD 63 | 010455 BE 64 | 010457 BF 65 | 010410 C0 66 | 010411 C1 67 | 010412 C2 68 | 010413 C3 69 | 010414 C4 70 | 010415 C5 71 | 010416 C6 72 | 010417 C7 73 | 010418 C8 74 | 010419 C9 75 | 01041A CA 76 | 01041B CB 77 | 01041C CC 78 | 01041D CD 79 | 01041E CE 80 | 01041F CF 81 | 010420 D0 82 | 010421 D1 83 | 010422 D2 84 | 010423 D3 85 | 010424 D4 86 | 010425 D5 87 | 010426 D6 88 | 010427 D7 89 | 010428 D8 90 | 010429 D9 91 | 01042A DA 92 | 01042B DB 93 | 01042C DC 94 | 01042D DD 95 | 01042E DE 96 | 01042F DF 97 | 010430 E0 98 | 010431 E1 99 | 010432 E2 100 | 010433 E3 101 | 010434 E4 102 | 010435 E5 103 | 010436 E6 104 | 010437 E7 105 | 010438 E8 106 | 010439 E9 107 | 01043A EA 108 | 01043B EB 109 | 01043C EC 110 | 01043D ED 111 | 01043E EE 112 | 01043F EF 113 | 010440 F0 114 | 010441 F1 115 | 010442 F2 116 | 010443 F3 117 | 010444 F4 118 | 010445 F5 119 | 010446 F6 120 | 010447 F7 121 | 010448 F8 122 | 010449 F9 123 | 01044A FA 124 | 01044B FB 125 | 01044C FC 126 | 01044D FD 127 | 01044E FE 128 | 01044F FF 129 | -------------------------------------------------------------------------------- /modules/to/_CP1252.man: -------------------------------------------------------------------------------- 1 | .redirect from/_CP1252 2 | -------------------------------------------------------------------------------- /modules/to/_CP1252.txt: -------------------------------------------------------------------------------- 1 | #transposed from from/_CP1252.txt 2 | 0120AC 80 3 | 01201A 82 4 | 010192 83 5 | 01201E 84 6 | 012026 85 7 | 012020 86 8 | 012021 87 9 | 0102C6 88 10 | 012030 89 11 | 010160 8A 12 | 012039 8B 13 | 010152 8C 14 | 01017D 8E 15 | 012018 91 16 | 012019 92 17 | 01201C 93 18 | 01201D 94 19 | 012022 95 20 | 012013 96 21 | 012014 97 22 | 0102DC 98 23 | 012122 99 24 | 010161 9A 25 | 01203A 9B 26 | 010153 9C 27 | 01017E 9E 28 | 010178 9F 29 | 01A0 A0 30 | 01A1 A1 31 | 01A2 A2 32 | 01A3 A3 33 | 01A4 A4 34 | 01A5 A5 35 | 01A6 A6 36 | 01A7 A7 37 | 01A8 A8 38 | 01A9 A9 39 | 01AA AA 40 | 01AB AB 41 | 01AC AC 42 | 01AD AD 43 | 01AE AE 44 | 01AF AF 45 | 01B0 B0 46 | 01B1 B1 47 | 01B2 B2 48 | 01B3 B3 49 | 01B4 B4 50 | 01B5 B5 51 | 01B6 B6 52 | 01B7 B7 53 | 01B8 B8 54 | 01B9 B9 55 | 01BA BA 56 | 01BB BB 57 | 01BC BC 58 | 01BD BD 59 | 01BE BE 60 | 01BF BF 61 | 01C0 C0 62 | 01C1 C1 63 | 01C2 C2 64 | 01C3 C3 65 | 01C4 C4 66 | 01C5 C5 67 | 01C6 C6 68 | 01C7 C7 69 | 01C8 C8 70 | 01C9 C9 71 | 01CA CA 72 | 01CB CB 73 | 01CC CC 74 | 01CD CD 75 | 01CE CE 76 | 01CF CF 77 | 01D0 D0 78 | 01D1 D1 79 | 01D2 D2 80 | 01D3 D3 81 | 01D4 D4 82 | 01D5 D5 83 | 01D6 D6 84 | 01D7 D7 85 | 01D8 D8 86 | 01D9 D9 87 | 01DA DA 88 | 01DB DB 89 | 01DC DC 90 | 01DD DD 91 | 01DE DE 92 | 01DF DF 93 | 01E0 E0 94 | 01E1 E1 95 | 01E2 E2 96 | 01E3 E3 97 | 01E4 E4 98 | 01E5 E5 99 | 01E6 E6 100 | 01E7 E7 101 | 01E8 E8 102 | 01E9 E9 103 | 01EA EA 104 | 01EB EB 105 | 01EC EC 106 | 01ED ED 107 | 01EE EE 108 | 01EF EF 109 | 01F0 F0 110 | 01F1 F1 111 | 01F2 F2 112 | 01F3 F3 113 | 01F4 F4 114 | 01F5 F5 115 | 01F6 F6 116 | 01F7 F7 117 | 01F8 F8 118 | 01F9 F9 119 | 01FA FA 120 | 01FB FB 121 | 01FC FC 122 | 01FD FD 123 | 01FE FE 124 | 01FF FF 125 | -------------------------------------------------------------------------------- /modules/to/_CP1253.man: -------------------------------------------------------------------------------- 1 | .redirect from/_CP1253 2 | -------------------------------------------------------------------------------- /modules/to/_CP1253.txt: -------------------------------------------------------------------------------- 1 | # transposed from from/_CP1253 2 | 0120AC 80 3 | 01201A 82 4 | 010192 83 5 | 01201E 84 6 | 012026 85 7 | 012020 86 8 | 012021 87 9 | 012030 89 10 | 012039 8B 11 | 012018 91 12 | 012019 92 13 | 01201C 93 14 | 01201D 94 15 | 012022 95 16 | 012013 96 17 | 012014 97 18 | 012122 99 19 | 01203A 9B 20 | 0100A0 A0 21 | 010385 A1 22 | 010386 A2 23 | 0100A3 A3 24 | 0100A4 A4 25 | 0100A5 A5 26 | 0100A6 A6 27 | 0100A7 A7 28 | 0100A8 A8 29 | 0100A9 A9 30 | 0100AB AB 31 | 0100AC AC 32 | 0100AD AD 33 | 0100AE AE 34 | 012015 AF 35 | 0100B0 B0 36 | 0100B1 B1 37 | 0100B2 B2 38 | 0100B3 B3 39 | 010384 B4 40 | 0100B5 B5 41 | 0100B6 B6 42 | 0100B7 B7 43 | 010388 B8 44 | 010389 B9 45 | 01038A BA 46 | 0100BB BB 47 | 01038C BC 48 | 0100BD BD 49 | 01038E BE 50 | 01038F BF 51 | 010390 C0 52 | 010391 C1 53 | 010392 C2 54 | 010393 C3 55 | 010394 C4 56 | 010395 C5 57 | 010396 C6 58 | 010397 C7 59 | 010398 C8 60 | 010399 C9 61 | 01039A CA 62 | 01039B CB 63 | 01039C CC 64 | 01039D CD 65 | 01039E CE 66 | 01039F CF 67 | 0103A0 D0 68 | 0103A1 D1 69 | 0103A3 D3 70 | 0103A4 D4 71 | 0103A5 D5 72 | 0103A6 D6 73 | 0103A7 D7 74 | 0103A8 D8 75 | 0103A9 D9 76 | 0103AA DA 77 | 0103AB DB 78 | 0103AC DC 79 | 0103AD DD 80 | 0103AE DE 81 | 0103AF DF 82 | 0103B0 E0 83 | 0103B1 E1 84 | 0103B2 E2 85 | 0103B3 E3 86 | 0103B4 E4 87 | 0103B5 E5 88 | 0103B6 E6 89 | 0103B7 E7 90 | 0103B8 E8 91 | 0103B9 E9 92 | 0103BA EA 93 | 0103BB EB 94 | 0103BC EC 95 | 0103BD ED 96 | 0103BE EE 97 | 0103BF EF 98 | 0103C0 F0 99 | 0103C1 F1 100 | 0103C2 F2 101 | 0103C3 F3 102 | 0103C4 F4 103 | 0103C5 F5 104 | 0103C6 F6 105 | 0103C7 F7 106 | 0103C8 F8 107 | 0103C9 F9 108 | 0103CA FA 109 | 0103CB FB 110 | 0103CC FC 111 | 0103CD FD 112 | 0103CE FE 113 | -------------------------------------------------------------------------------- /modules/to/_CP874.man: -------------------------------------------------------------------------------- 1 | .redirect from/_CP874 2 | -------------------------------------------------------------------------------- /modules/to/_CP874.txt: -------------------------------------------------------------------------------- 1 | #transposed from from/_CP874 2 | 0120AC 80 3 | 012026 85 4 | 012018 91 5 | 012019 92 6 | 01201C 93 7 | 01201D 94 8 | 012022 95 9 | 012013 96 10 | 012014 97 11 | 0100A0 A0 12 | 010E01 A1 13 | 010E02 A2 14 | 010E03 A3 15 | 010E04 A4 16 | 010E05 A5 17 | 010E06 A6 18 | 010E07 A7 19 | 010E08 A8 20 | 010E09 A9 21 | 010E0A AA 22 | 010E0B AB 23 | 010E0C AC 24 | 010E0D AD 25 | 010E0E AE 26 | 010E0F AF 27 | 010E10 B0 28 | 010E11 B1 29 | 010E12 B2 30 | 010E13 B3 31 | 010E14 B4 32 | 010E15 B5 33 | 010E16 B6 34 | 010E17 B7 35 | 010E18 B8 36 | 010E19 B9 37 | 010E1A BA 38 | 010E1B BB 39 | 010E1C BC 40 | 010E1D BD 41 | 010E1E BE 42 | 010E1F BF 43 | 010E20 C0 44 | 010E21 C1 45 | 010E22 C2 46 | 010E23 C3 47 | 010E24 C4 48 | 010E25 C5 49 | 010E26 C6 50 | 010E27 C7 51 | 010E28 C8 52 | 010E29 C9 53 | 010E2A CA 54 | 010E2B CB 55 | 010E2C CC 56 | 010E2D CD 57 | 010E2E CE 58 | 010E2F CF 59 | 010E30 D0 60 | 010E31 D1 61 | 010E32 D2 62 | 010E33 D3 63 | 010E34 D4 64 | 010E35 D5 65 | 010E36 D6 66 | 010E37 D7 67 | 010E38 D8 68 | 010E39 D9 69 | 010E3A DA 70 | 010E3F DF 71 | 010E40 E0 72 | 010E41 E1 73 | 010E42 E2 74 | 010E43 E3 75 | 010E44 E4 76 | 010E45 E5 77 | 010E46 E6 78 | 010E47 E7 79 | 010E48 E8 80 | 010E49 E9 81 | 010E4A EA 82 | 010E4B EB 83 | 010E4C EC 84 | 010E4D ED 85 | 010E4E EE 86 | 010E4F EF 87 | 010E50 F0 88 | 010E51 F1 89 | 010E52 F2 90 | 010E53 F3 91 | 010E54 F4 92 | 010E55 F5 93 | 010E56 F6 94 | 010E57 F7 95 | 010E58 F8 96 | 010E59 F9 97 | 010E5A FA 98 | 010E5B FB 99 | -------------------------------------------------------------------------------- /modules/to/_CP936.man: -------------------------------------------------------------------------------- 1 | .redirect from/_CP936 2 | -------------------------------------------------------------------------------- /modules/to/_CP949.man: -------------------------------------------------------------------------------- 1 | .redirect from/_CP949 2 | -------------------------------------------------------------------------------- /modules/to/_CP950.man: -------------------------------------------------------------------------------- 1 | .redirect from/_CP950 2 | -------------------------------------------------------------------------------- /modules/to/_GB18030.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../../src/bsdconv.h" 3 | 4 | struct gb18030_data { 5 | uint32_t beg; 6 | uint32_t end; 7 | uint32_t off; 8 | }; 9 | 10 | static const struct gb18030_data gb18030_table[] = { 11 | {0x0452, 0x200F, 1688038}, 12 | {0x2643, 0x2E80, 1696437}, 13 | {0x361B, 0x3917, 1700191}, 14 | {0x3CE1, 0x4055, 1701916}, 15 | {0x4160, 0x4336, 1703065}, 16 | {0x44D7, 0x464B, 1703947}, 17 | {0x478E, 0x4946, 1704636}, 18 | {0x49B8, 0x4C76, 1705179}, 19 | {0x9FA6, 0xD7FF, 1706261}, 20 | {0xE865, 0xF92B, 1720768}, 21 | {0xFA2A, 0xFE2F, 1725296}, 22 | {0xFFE6, 0xFFFF, 1726612}, 23 | {0x10000, 0x10FFFF, 1876218}, 24 | }; 25 | 26 | void cbconv(struct bsdconv_instance *ins){ 27 | struct bsdconv_phase *this_phase=THIS_PHASE(ins); 28 | unsigned char *data, *p; 29 | unsigned int len; 30 | int max=sizeof(gb18030_table) / sizeof(struct gb18030_data) - 1; 31 | int min = 0; 32 | int mid; 33 | union { 34 | unsigned char byte[4]; 35 | uint32_t num; 36 | } codepoint; 37 | int i; 38 | uint32_t ucs; 39 | uint32_t gb; 40 | data=this_phase->curr->data; 41 | 42 | data+=1; 43 | len=this_phase->curr->len-1; 44 | 45 | codepoint.num=0; 46 | for(i=0;(len-i)>0;++i){ 47 | codepoint.byte[3-i]=data[len-i-1]; 48 | } 49 | ucs=be32toh(codepoint.num); 50 | 51 | if (ucs < gb18030_table[0].beg || ucs > gb18030_table[max].end){ 52 | this_phase->state.status=DEADEND; 53 | return; 54 | }else while (max >= min) { 55 | mid = (min + max) / 2; 56 | if (ucs > gb18030_table[mid].end) 57 | min = mid + 1; 58 | else if (ucs < gb18030_table[mid].beg) 59 | max = mid - 1; 60 | else{ 61 | break; 62 | } 63 | } 64 | if(gb18030_table[mid].beg<=ucs && ucs<=gb18030_table[mid].end){ 65 | this_phase->state.status=NEXTPHASE; 66 | DATA_MALLOC(ins, this_phase->data_tail->next); 67 | this_phase->data_tail=this_phase->data_tail->next; 68 | this_phase->data_tail->next=NULL; 69 | this_phase->data_tail->flags=F_FREE; 70 | 71 | gb=gb18030_table[mid].off + (ucs - gb18030_table[mid].beg); 72 | 73 | this_phase->data_tail->len=4; 74 | p=this_phase->data_tail->data=malloc(4); 75 | 76 | gb-=1687218; 77 | p[3]=0x30+gb%10; 78 | gb/=10; 79 | p[2]=0x81+gb%126; 80 | gb/=126; 81 | p[1]=0x30+gb%10; 82 | gb/=10; 83 | p[0]=0x81+gb; 84 | return; 85 | }else{ 86 | this_phase->state.status=DEADEND; 87 | return; 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /modules/to/_GB18030.man: -------------------------------------------------------------------------------- 1 | .redirect from/_CP936 2 | -------------------------------------------------------------------------------- /modules/to/_GB2312.man: -------------------------------------------------------------------------------- 1 | .redirect from/_CP936 2 | -------------------------------------------------------------------------------- /modules/to/_GBK.man: -------------------------------------------------------------------------------- 1 | .redirect from/_CP936 2 | -------------------------------------------------------------------------------- /modules/to/_ISO-8859-1.man: -------------------------------------------------------------------------------- 1 | .redirect from/_ISO-8859-1 2 | -------------------------------------------------------------------------------- /modules/to/_ISO-8859-1.txt: -------------------------------------------------------------------------------- 1 | # transposed from from/iso-8859-1 2 | 01A0 A0 3 | 01A1 A1 4 | 01A2 A2 5 | 01A3 A3 6 | 01A4 A4 7 | 01A5 A5 8 | 01A6 A6 9 | 01A7 A7 10 | 01A8 A8 11 | 01A9 A9 12 | 01AA AA 13 | 01AB AB 14 | 01AC AC 15 | 01AD AD 16 | 01AE AE 17 | 01AF AF 18 | 01B0 B0 19 | 01B1 B1 20 | 01B2 B2 21 | 01B3 B3 22 | 01B4 B4 23 | 01B5 B5 24 | 01B6 B6 25 | 01B7 B7 26 | 01B8 B8 27 | 01B9 B9 28 | 01BA BA 29 | 01BB BB 30 | 01BC BC 31 | 01BD BD 32 | 01BE BE 33 | 01BF BF 34 | 01C0 C0 35 | 01C1 C1 36 | 01C2 C2 37 | 01C3 C3 38 | 01C4 C4 39 | 01C5 C5 40 | 01C6 C6 41 | 01C7 C7 42 | 01C8 C8 43 | 01C9 C9 44 | 01CA CA 45 | 01CB CB 46 | 01CC CC 47 | 01CD CD 48 | 01CE CE 49 | 01CF CF 50 | 01D0 D0 51 | 01D1 D1 52 | 01D2 D2 53 | 01D3 D3 54 | 01D4 D4 55 | 01D5 D5 56 | 01D6 D6 57 | 01D7 D7 58 | 01D8 D8 59 | 01D9 D9 60 | 01DA DA 61 | 01DB DB 62 | 01DC DC 63 | 01DD DD 64 | 01DE DE 65 | 01DF DF 66 | 01E0 E0 67 | 01E1 E1 68 | 01E2 E2 69 | 01E3 E3 70 | 01E4 E4 71 | 01E5 E5 72 | 01E6 E6 73 | 01E7 E7 74 | 01E8 E8 75 | 01E9 E9 76 | 01EA EA 77 | 01EB EB 78 | 01EC EC 79 | 01ED ED 80 | 01EE EE 81 | 01EF EF 82 | 01F0 F0 83 | 01F1 F1 84 | 01F2 F2 85 | 01F3 F3 86 | 01F4 F4 87 | 01F5 F5 88 | 01F6 F6 89 | 01F7 F7 90 | 01F8 F8 91 | 01F9 F9 92 | 01FA FA 93 | 01FB FB 94 | 01FC FC 95 | 01FD FD 96 | 01FE FE 97 | 01FF FF 98 | -------------------------------------------------------------------------------- /modules/to/_SHIFT-JIS.man: -------------------------------------------------------------------------------- 1 | .redirect from/_SHIFT-JIS 2 | -------------------------------------------------------------------------------- /modules/to/_UAO241.man: -------------------------------------------------------------------------------- 1 | .redirect from/_CP950 2 | -------------------------------------------------------------------------------- /modules/to/_UAO250.man: -------------------------------------------------------------------------------- 1 | .redirect from/_CP950 2 | -------------------------------------------------------------------------------- /modules/to/_UTF-8.man: -------------------------------------------------------------------------------- 1 | .redirect FROM/_UTF-8 2 | -------------------------------------------------------------------------------- /modules/to/_UTF-8.txt: -------------------------------------------------------------------------------- 1 | 01* ? 2 | -------------------------------------------------------------------------------- /modules/to/alias: -------------------------------------------------------------------------------- 1 | UAO UAO250 2 | SOURCE ASCII-ESCAPED-UNICODE 3 | JAVA ASCII-ESCAPED-UNICODE 4 | HTMLENTITY ASCII-HEX-NUMERIC-HTML-ENTITY 5 | NAMED-HTMLENTITY ASCII-NAMED-HTML-ENTITY 6 | HTML-IMG ASCII-HTML-UNICODE-IMG 7 | UCS-4 UTF-32LE 8 | UCS-4BE UTF-32BE 9 | UCS-4LE UTF-32LE 10 | UTF-32 UTF-32LE 11 | UTF-16 UTF-16LE 12 | UCS-2 UCS-2LE 13 | BIG5 CP950 14 | CNS11643 ASCII,_CNS11643 15 | CP1251 _CP1251,ASCII 16 | CP1252 _CP1252,ASCII 17 | CP1253 _CP1253,ASCII 18 | CP874 _CP874,ASCII 19 | CP936 _CP936,ASCII 20 | CP949 _CP949,ASCII 21 | CP950 _CP950,ASCII 22 | GB18030 _GB18030,ASCII 23 | GB2312 _GB2312,ASCII 24 | GBK _GBK,ASCII 25 | ISO-8859-1 _ISO-8859-1,ASCII 26 | JIS _JIS0212,ASCII 27 | SHIFT-JIS _SHIFT-JIS,ASCII 28 | UAO241 _UAO241,ASCII 29 | UAO250 _UAO250,ASCII 30 | UTF-8 _UTF-8,ASCII 31 | 3F ANY#3F&ERROR 32 | URL ESCAPE#FOR=UNICODE&MODE=16&PREFIX=2575,ESCAPE#FOR=BYTE&MODE=16&PREFIX=25 33 | ASCII-ESCAPED-UNICODE ESCAPE#FOR=UNICODE&MODE=16&PREFIX=5C75 34 | ASCII-HEX-NUMERIC-HTML-ENTITY ESCAPE#FOR=UNICODE&PREFIX=262378&MODE=16&SUFFIX=3B 35 | ASCII-DEC-NUMERIC-HTML-ENTITY ESCAPE#FOR=UNICODE&PREFIX=2623&MODE=10&SUFFIX=3B 36 | # backward compatibility 37 | UNICODE PASS#MARK&FOR=UNICODE 38 | ANSI-CONTROL PASS#MARK&FOR=ANSI 39 | BSDCONV-STDOUT BSDCONV-OUTPUT 40 | -------------------------------------------------------------------------------- /src/bsdconv-completion.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "bsdconv.h" 4 | 5 | int item_in_array(char *item, char **array, int size){ 6 | int i; 7 | for(i=0;i1) 27 | arg=argv[1]; 28 | 29 | arg=arg_base=strdup(arg); 30 | 31 | if(arg[0]=='\'' || arg[0]=='"'){ 32 | arg+=1; 33 | } 34 | i=strlen(arg)-1; 35 | if(arg[i]=='\'' || arg[i]=='"'){ 36 | arg[i]=0; 37 | } 38 | 39 | part=arg; 40 | mode=FROM; 41 | strtoupper(arg); 42 | for(c=arg;*c;++c){ 43 | switch(*c){ 44 | case ':': 45 | mode=TO; 46 | part=c+1; 47 | break; 48 | case '|': 49 | mode=FROM; 50 | part=c+1; 51 | break; 52 | case ',': 53 | part=c+1; 54 | break; 55 | } 56 | } 57 | 58 | int size=0; 59 | int num=0; 60 | inter_list=bsdconv_modules_list(INTER); 61 | fromto_list=bsdconv_modules_list(mode); 62 | for(p=inter_list;*p;++p) 63 | num+=1; 64 | for(p=fromto_list;*p;++p) 65 | num+=1; 66 | 67 | codecs_list=malloc(sizeof(char *) * (num+1)); 68 | for(p=inter_list;*p;++p){ 69 | if(strstr(*p,part)==*p && !item_in_array(*p, codecs_list, size)){ 70 | codecs_list[size]=*p; 71 | size+=1; 72 | } 73 | codecs_list[size]=NULL; 74 | } 75 | for(p=fromto_list;*p;++p){ 76 | if(strstr(*p,part)==*p && !item_in_array(*p, codecs_list, size)){ 77 | codecs_list[size]=*p; 78 | size+=1; 79 | } 80 | codecs_list[size]=NULL; 81 | } 82 | 83 | for(i=0;icounter; 3 | struct bsdconv_counter_entry *t; 4 | char *key=strdup(_key); 5 | strtoupper(key); 6 | if(p==NULL){ 7 | ins->counter=malloc(sizeof(struct bsdconv_counter_entry)); 8 | ins->counter->key=key; 9 | ins->counter->val=0; 10 | ins->counter->next=0; 11 | return &ins->counter->val; 12 | }else{ 13 | do{ 14 | t=p; 15 | if(strcmp(p->key, key)==0){ 16 | free(key); 17 | return &p->val; 18 | } 19 | p=p->next; 20 | }while(p!=NULL); 21 | t->next=malloc(sizeof(struct bsdconv_counter_entry)); 22 | t=t->next; 23 | t->key=key; 24 | t->val=0; 25 | t->next=0; 26 | return &t->val; 27 | } 28 | } 29 | 30 | void bsdconv_counter_reset(struct bsdconv_instance *ins, const char *key){ 31 | struct bsdconv_counter_entry *p=ins->counter; 32 | bsdconv_counter_t *v; 33 | if(key==NULL){ 34 | while(p){ 35 | p->val=0; 36 | p=p->next; 37 | } 38 | }else{ 39 | v=bsdconv_counter(ins, key); 40 | *v=0; 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/libbsdconv_filter.c: -------------------------------------------------------------------------------- 1 | struct bsdconv_filter *load_filter(const char *_name){ 2 | struct bsdconv_filter *filter; 3 | 4 | char *cwd; 5 | char *c; 6 | char path[PATH_MAX+1]; 7 | char *name=strdup(_name); 8 | strtoupper(name); 9 | 10 | while(!bsdconv_module_check(FILTER, name)){ 11 | c=bsdconv_solve_alias(FILTER, name); 12 | if(c==NULL || strcmp(c, name)==0){ 13 | free(name); 14 | free(c); 15 | return NULL; 16 | } 17 | free(name); 18 | name=c; 19 | } 20 | cwd=getcwd(NULL, 0); 21 | if((c=getenv("BSDCONV_PATH"))){ 22 | chdir(c); 23 | }else{ 24 | chdir(BSDCONV_PATH); 25 | } 26 | chdir(MODULES_SUBPATH); 27 | chdir("filter"); 28 | REALPATH(name, path); 29 | chdir(cwd); 30 | free(cwd); 31 | free(name); 32 | strcat(path, "." SHLIBEXT); 33 | 34 | filter=malloc(sizeof(struct bsdconv_filter)); 35 | filter->so=OPEN_SHAREOBJECT(path); 36 | if(!filter->so){ 37 | free(filter); 38 | return NULL; 39 | } 40 | 41 | filter->cbfilter=SHAREOBJECT_SYMBOL(filter->so, "cbfilter"); 42 | 43 | return filter; 44 | } 45 | 46 | void unload_filter(struct bsdconv_filter *filter){ 47 | CLOSE_SHAREOBJECT(filter->so); 48 | free(filter); 49 | } 50 | -------------------------------------------------------------------------------- /src/libbsdconv_hash.c: -------------------------------------------------------------------------------- 1 | void bsdconv_hash_set(struct bsdconv_instance *ins, const char *key, void *ptr){ 2 | char *tk; 3 | void *tp; 4 | struct bsdconv_hash_entry *p=ins->hash; 5 | while(p!=NULL){ 6 | if(strcmp(p->key, key)==0){ 7 | tp=ptr; 8 | tk=p->key; 9 | p->key=ins->hash->key; 10 | p->ptr=ins->hash->ptr; 11 | ins->hash->key=tk; 12 | ins->hash->ptr=tp; 13 | return; 14 | } 15 | p=p->next; 16 | } 17 | p=malloc(sizeof(struct bsdconv_hash_entry)); 18 | p->next=ins->hash; 19 | ins->hash=p; 20 | p->key=strdup(key); 21 | p->ptr=ptr; 22 | return; 23 | } 24 | 25 | void *bsdconv_hash_get(struct bsdconv_instance *ins, const char *key){ 26 | char *tk; 27 | void *tp; 28 | struct bsdconv_hash_entry *p=ins->hash; 29 | while(p!=NULL){ 30 | if(strcmp(p->key, key)==0){ 31 | tk=p->key; 32 | tp=p->ptr; 33 | p->key=ins->hash->key; 34 | p->ptr=ins->hash->ptr; 35 | ins->hash->key=tk; 36 | ins->hash->ptr=tp; 37 | return p->ptr; 38 | } 39 | p=p->next; 40 | } 41 | return NULL; 42 | } 43 | 44 | int bsdconv_hash_has(struct bsdconv_instance *ins, const char *key){ 45 | char *tk; 46 | void *tp; 47 | struct bsdconv_hash_entry *p=ins->hash; 48 | while(p!=NULL){ 49 | if(strcmp(p->key, key)==0){ 50 | tk=p->key; 51 | tp=p->ptr; 52 | p->key=ins->hash->key; 53 | p->ptr=ins->hash->ptr; 54 | ins->hash->key=tk; 55 | ins->hash->ptr=tp; 56 | return 1; 57 | } 58 | p=p->next; 59 | } 60 | return 0; 61 | } 62 | 63 | void bsdconv_hash_del(struct bsdconv_instance *ins, const char *key){ 64 | struct bsdconv_hash_entry *p=ins->hash; 65 | struct bsdconv_hash_entry **q=&ins->hash; 66 | while(p!=NULL){ 67 | if(strcmp(p->key, key)==0){ 68 | free(p->key); 69 | *q=p->next; 70 | free(p); 71 | return; 72 | } 73 | p=p->next; 74 | q=&p->next; 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/libbsdconv_scorer.c: -------------------------------------------------------------------------------- 1 | struct bsdconv_scorer *load_scorer(const char *_name){ 2 | struct bsdconv_scorer *scorer; 3 | 4 | char *cwd; 5 | char *c; 6 | char path[PATH_MAX+1]; 7 | char *name=strdup(_name); 8 | strtoupper(name); 9 | 10 | while(!bsdconv_module_check(SCORER, name)){ 11 | c=bsdconv_solve_alias(SCORER, name); 12 | if(c==NULL || strcmp(c, name)==0){ 13 | free(name); 14 | free(c); 15 | return NULL; 16 | } 17 | free(name); 18 | name=c; 19 | } 20 | cwd=getcwd(NULL, 0); 21 | if((c=getenv("BSDCONV_PATH"))){ 22 | chdir(c); 23 | }else{ 24 | chdir(BSDCONV_PATH); 25 | } 26 | chdir(MODULES_SUBPATH); 27 | chdir("scorer"); 28 | REALPATH(name, path); 29 | chdir(cwd); 30 | free(cwd); 31 | free(name); 32 | strcat(path, "." SHLIBEXT); 33 | 34 | scorer=malloc(sizeof(struct bsdconv_scorer)); 35 | scorer->so=OPEN_SHAREOBJECT(path); 36 | if(!scorer->so){ 37 | free(scorer); 38 | return NULL; 39 | } 40 | 41 | scorer->cbscorer=SHAREOBJECT_SYMBOL(scorer->so, "cbscorer"); 42 | 43 | return scorer; 44 | } 45 | 46 | void unload_scorer(struct bsdconv_scorer *scorer){ 47 | CLOSE_SHAREOBJECT(scorer->so); 48 | free(scorer); 49 | } 50 | -------------------------------------------------------------------------------- /src/libfmalloc.c: -------------------------------------------------------------------------------- 1 | #ifdef USE_FMALLOC 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "fmalloc.h" 8 | 9 | const char *fmalloc_template="/tmp/.fmalloc.XXXXXX"; 10 | struct fmalloc_entry * fmalloc_pools=NULL; 11 | int fmalloc_num=0; 12 | 13 | void * fmalloc(size_t s){ 14 | void *m; 15 | char *tmpfile; 16 | int tmpfd; 17 | size_t o_offset; 18 | struct fmalloc_entry * last; 19 | if(fmalloc_pools==NULL || ((fmalloc_pools->offset+s) > FMALLOC_SIZE)){ 20 | if(fmalloc_num < FMALLOC_NUM){ 21 | tmpfile=strdup(fmalloc_template); 22 | if((tmpfd=mkstemp(tmpfile))==-1){ 23 | free(tmpfile); 24 | return malloc(s); 25 | } 26 | unlink(tmpfile); 27 | free(tmpfile); 28 | ftruncate(tmpfd, FMALLOC_SIZE); 29 | m=mmap(0, FMALLOC_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, tmpfd, 0); 30 | if(m==MAP_FAILED){ 31 | return malloc(s); 32 | } 33 | last=fmalloc_pools; 34 | fmalloc_pools=malloc(sizeof(struct fmalloc_entry)); 35 | fmalloc_pools->z=m; 36 | fmalloc_pools->offset=0; 37 | fmalloc_pools->fd=tmpfd; 38 | fmalloc_pools->next=last; 39 | fmalloc_num+=1; 40 | }else{ 41 | return malloc(s); 42 | } 43 | } 44 | o_offset=fmalloc_pools->offset; 45 | fmalloc_pools->offset+=s; 46 | return fmalloc_pools->z + o_offset; 47 | } 48 | 49 | void fmfree(void *p){ 50 | struct fmalloc_entry *entry=fmalloc_pools; 51 | while(entry){ 52 | if(p>=entry->z && pz+entry->offset){ 53 | return; 54 | } 55 | entry=entry->next; 56 | } 57 | free(p); 58 | } 59 | 60 | void fmsync(void){ 61 | struct fmalloc_entry *entry=fmalloc_pools; 62 | while(entry){ 63 | msync(entry->z, entry->offset, MS_SYNC); 64 | entry=entry->next; 65 | } 66 | } 67 | 68 | void fmcleanup(void){ 69 | struct fmalloc_entry *next=fmalloc_pools; 70 | while(fmalloc_pools){ 71 | next=fmalloc_pools->next; 72 | munmap(fmalloc_pools->z, FMALLOC_SIZE); 73 | close(fmalloc_pools->fd); 74 | free(fmalloc_pools); 75 | fmalloc_pools=next; 76 | } 77 | } 78 | #endif 79 | -------------------------------------------------------------------------------- /src/missing_func.c: -------------------------------------------------------------------------------- 1 | #ifdef WIN32 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | char * strsep(char **stringp, const char *delim){ 9 | char *r=*stringp; 10 | if(!**stringp) return NULL; 11 | for(;**stringp && !strchr(delim, **stringp);++(*stringp)); 12 | if(**stringp){ 13 | **stringp=0x0; 14 | (*stringp)++; 15 | } 16 | return r; 17 | } 18 | 19 | char * strndup(const char *str, size_t len){ 20 | char *r; 21 | size_t l=strlen(str); 22 | if(len 2 | #include 3 | #include 4 | 5 | int main(int argc, char *argv[]){ 6 | char *in=strdup("utf-8:utf-8,ascii"); 7 | char *out; 8 | char *expect; 9 | struct bsdconv_instance *ins; 10 | 11 | expect="ASCII,_UTF-8,ANY#013F&ERROR:count#lowercase:AsCiI,any#3f"; 12 | ins=bsdconv_create("utf-8,3f:count#lowercase:AsCiI,any#3f"); 13 | out=bsdconv_pack(ins); 14 | if(strcmp(expect, out)){ 15 | printf("Test failed at bsdconv_pack\nexpect: %s\nresult: %s\n", expect, out); 16 | return 1; 17 | } 18 | free(out); 19 | bsdconv_destroy(ins); 20 | 21 | expect="utf-8:upper:utf-8,ascii"; 22 | out=bsdconv_insert_phase(in, "upper", INTER, 1); 23 | if(strcmp(expect, out)){ 24 | printf("Test failed at bsdconv_insert_phase\nexpect: %s\nresult: %s\n", expect, out); 25 | return 1; 26 | } 27 | free(in); 28 | 29 | in=out; 30 | expect="utf-8:full:utf-8,ascii"; 31 | out=bsdconv_replace_phase(in, "full", INTER, 1); 32 | if(strcmp(expect, out)){ 33 | printf("Test failed at bsdconv_replace_phase\nexpect: %s\nresult: %s\n", expect, out); 34 | return 1; 35 | } 36 | free(in); 37 | 38 | in=out; 39 | expect="utf-8:full:utf-8,big5"; 40 | out=bsdconv_replace_codec(in, "big5", 2, 1); 41 | if(strcmp(expect, out)){ 42 | printf("Test failed at bsdconv_replace_codec\nexpect: %s\nresult: %s\n", expect, out); 43 | return 1; 44 | } 45 | free(in); 46 | 47 | in=out; 48 | expect="utf-8,ascii:full:utf-8,big5"; 49 | out=bsdconv_insert_codec(in, "ascii", 0, 1); 50 | if(strcmp(expect, out)){ 51 | printf("Test failed at bsdconv_insert_codec\nexpect: %s\nresult: %s\n", expect, out); 52 | return 1; 53 | } 54 | free(in); 55 | 56 | printf("API tests passed\n"); 57 | free(out); 58 | return 0; 59 | } 60 | -------------------------------------------------------------------------------- /tools/AexcludeBCD.py: -------------------------------------------------------------------------------- 1 | # AexcludeBCD.py A B [C...] 2 | 3 | import sys 4 | import re 5 | 6 | sep = re.compile(r"\s+") 7 | 8 | excl={} 9 | 10 | for fn in sys.argv[2:]: 11 | f = open(fn) 12 | for l in f: 13 | l = l.strip() 14 | if l == "": 15 | continue 16 | if l.startswith("#"): 17 | continue 18 | a = sep.split(l) 19 | p = a[0].upper() 20 | excl[p] = 1 21 | f.close() 22 | 23 | f = open(sys.argv[1]) 24 | for l in f: 25 | l2 = l.strip() 26 | if l2 == "": 27 | sys.stdout.write(l) 28 | continue 29 | if l2.startswith("#"): 30 | sys.stdout.write(l) 31 | continue 32 | a = sep.split(l2) 33 | p = a[0].upper() 34 | if p not in excl: 35 | sys.stdout.write(l) 36 | f.close() 37 | -------------------------------------------------------------------------------- /tools/_bsdconv-completion.zsh: -------------------------------------------------------------------------------- 1 | #compdef bsdconv 2 | 3 | _bsdconv() { 4 | 5 | _arguments \ 6 | '1: :->first'\ 7 | '2: :->second'\ 8 | '*: :->files' 9 | 10 | case $state in 11 | first) 12 | if [ "$words[2]" = "-" ] 13 | then 14 | compadd -- "-l" 15 | else 16 | compadd `bsdconv-completion "$words[2]"` 17 | fi 18 | ;; 19 | second) 20 | if [ "$words[3]" = "-" ] 21 | then 22 | compadd -- "-i" 23 | else 24 | _files 25 | fi 26 | ;; 27 | files) 28 | _files 29 | ;; 30 | esac 31 | } 32 | 33 | _bsdconv "$@" 34 | -------------------------------------------------------------------------------- /tools/codepage.py: -------------------------------------------------------------------------------- 1 | # python codepage.py CPXXXX.TXT | sort > _CPXXXX.txt 2 | import os 3 | import sys 4 | import re 5 | 6 | def v(s): 7 | return int(s, 16) 8 | 9 | def f1(s): 10 | s = s.strip() 11 | s = re.sub("^0x", "", s) 12 | return s 13 | 14 | def f2(s): 15 | s = s.strip() 16 | s = re.sub("^0x", "", s) 17 | s = s.lstrip("0") 18 | if len(s) & 1: 19 | s = "0"+s 20 | return "01"+s 21 | 22 | f = open(sys.argv[1]) 23 | for l in f: 24 | l = l.strip() 25 | if l=="": 26 | continue 27 | if l.startswith("#"): 28 | continue 29 | l = l.split("#")[0].strip() 30 | a = l.split(" ") 31 | if len(a)<2: 32 | continue 33 | if v(a[0])<=0x7F and v(a[0])==v(a[1]): 34 | continue 35 | print f1(a[0])+"\t"+f2(a[1]) 36 | -------------------------------------------------------------------------------- /tools/findAinB.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import re 3 | 4 | sep = re.compile(r"\s+") 5 | stp = re.compile(r"^0[xX]") 6 | 7 | fa = open(sys.argv[1]) 8 | fb = open(sys.argv[2]) 9 | 10 | la = {} 11 | lb = {} 12 | 13 | for f,l in ((fa, la), (fb, lb)): 14 | for ln in f: 15 | ln = ln.strip().upper() 16 | if ln == "": 17 | continue 18 | if ln.startswith("#"): 19 | continue 20 | a = sep.split(ln) 21 | p = stp.sub("", a[0]) 22 | l[p]=1 23 | 24 | allnotin = True 25 | allin = True 26 | 27 | total=0 28 | inc=0 29 | ninc=0 30 | 31 | for k in la: 32 | total+=1 33 | if k in lb: 34 | inc+=1 35 | print("IN\t%s " % k) 36 | allnotin = False 37 | else: 38 | ninc+=1 39 | print("NOTIN\t%s " % k) 40 | allin = False 41 | 42 | if allin: 43 | print("All In") 44 | elif allnotin: 45 | print("All Not In") 46 | else: 47 | print("Not All In") 48 | print("Total: ", total) 49 | print("In: ", inc) 50 | print("Not In: ", ninc) 51 | -------------------------------------------------------------------------------- /tools/gen_hex.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | 4 | if len(sys.argv) != 3: 5 | print("Usage: {} fromfile tofile".format(sys.argv[0])) 6 | sys.exit(1) 7 | 8 | fromfile = sys.argv[1] 9 | tofile = sys.argv[2] 10 | 11 | m = { 12 | "0": "0", 13 | "1": "1", 14 | "2": "2", 15 | "3": "3", 16 | "4": "4", 17 | "5": "5", 18 | "6": "6", 19 | "7": "7", 20 | "8": "8", 21 | "9": "9", 22 | "A": "aA", 23 | "B": "bB", 24 | "C": "cC", 25 | "D": "dD", 26 | "E": "eE", 27 | "F": "fF", 28 | } 29 | 30 | with open(tofile, "w") as tof: 31 | for i in range(256): 32 | hh = "{:02X}".format(i) 33 | bb = "".join(["{:02X}".format(ord(c)) for c in hh]) 34 | tof.write("03{}\t{}\n".format(hh, bb)) 35 | 36 | with open(fromfile, "w") as fromf: 37 | for i in range(256): 38 | hh = "{:02X}".format(i) 39 | hhs = [""] 40 | for c in hh: 41 | nhh = [] 42 | for x in m[c]: 43 | for kk in hhs: 44 | nhh.append(kk+x) 45 | hhs = nhh 46 | 47 | for hh in hhs: 48 | bb = "".join(["{:02X}".format(ord(c)) for c in hh]) 49 | fromf.write("{}\t03{}\n".format(bb, hh)) 50 | -------------------------------------------------------------------------------- /tools/mkalias.py: -------------------------------------------------------------------------------- 1 | import os,sys,re; 2 | 3 | def f(c): 4 | s="%X" % ord(c) 5 | if len(s)%2: 6 | r='010%s' % s; 7 | else: 8 | r='01%s' % s; 9 | return r; 10 | 11 | fi=open(sys.argv[1],'rU'); 12 | fo=open(sys.argv[2],'w'); 13 | 14 | for l in fi: 15 | if l.find('\t')!=-1: 16 | a,b=re.split('\t+',l.strip()); 17 | a=','.join([f(c) for c in a]); 18 | b=','.join([f(c) for c in b]); 19 | fo.write("%s\t%s\n" % (a,b)); 20 | 21 | fi.close(); 22 | fo.close(); 23 | 24 | -------------------------------------------------------------------------------- /tools/mkbonus.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | #mkbonus.py src_list char_list phrase_list 4 | 5 | import sys 6 | import re 7 | from bsdconv import Bsdconv 8 | 9 | clist=open(sys.argv[2], "w") 10 | plist=open(sys.argv[3], "w") 11 | 12 | sc=Bsdconv("utf-8:score#with=cjk:null") 13 | bcv=Bsdconv("utf-8:insert#after=002c:bsdconv-keyword,bsdconv") 14 | bcv_zhtw=Bsdconv("utf-8:zhtw:insert#after=002c:bsdconv-keyword,bsdconv") 15 | 16 | sep=re.compile(r"\s+") 17 | 18 | f=open(sys.argv[1]) 19 | for l in f: 20 | l = l.strip() 21 | if l == "": 22 | continue 23 | if l.startswith("#"): 24 | clist.write(l+"\n") 25 | plist.write(l+"\n") 26 | a = sep.split(l) 27 | p = a[0] 28 | ln = len(p.decode("utf-8")) 29 | if ln > 1: 30 | bonus = 6 31 | p = bcv_zhtw.conv(p).rstrip(",") 32 | of = plist 33 | else: 34 | try: 35 | bonus = int(a[1]) 36 | except: 37 | bonus = 0 38 | sc.counter_reset() 39 | sc.conv(p) 40 | score = sc.counter("SCORE") 41 | if score < 5*ln: 42 | bonus += 5*ln - score 43 | if bonus == 0: 44 | continue 45 | p = bcv.conv(p).rstrip(",") 46 | of = clist 47 | of.write("%s\t?%02X,%s\n" % (p, bonus, p)) 48 | 49 | f.close() 50 | clist.close() 51 | plist.close() 52 | -------------------------------------------------------------------------------- /tools/nfkc_gen.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # python nfkc_gen.py '⁰¹²³'|sort|uniq 3 | import sys 4 | from bsdconv import Bsdconv 5 | 6 | nfkc = Bsdconv("utf-8:nfkc:utf-8") 7 | i = sys.argv[1].decode("utf-8") 8 | for c in i: 9 | c = c.encode("utf-8") 10 | d = nfkc.conv(c) 11 | if c==d: 12 | continue 13 | print("{}\t{}".format(d, c)) 14 | -------------------------------------------------------------------------------- /tools/setEnvVar.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | reg add "HKLM\SYSTEM\CurrentControlSet\Control\Session Manager\Environment" /v BSDCONV_PATH /t REG_SZ /d %~dp0 /f 3 | reg add "HKLM\SYSTEM\CurrentControlSet\Control\Session Manager\Environment" /v Path /t REG_EXPAND_SZ /d %PATH%;%~dp0 /f 4 | pause -------------------------------------------------------------------------------- /tools/simple_gen.py: -------------------------------------------------------------------------------- 1 | # simple_gen.py phase_type from_column to_column file 2 | import sys 3 | import re 4 | 5 | def bsdconv01(dt): 6 | dt=dt.strip().lstrip("0").upper() 7 | if len(dt) & 1: 8 | return "010"+dt 9 | else: 10 | return "01"+dt 11 | 12 | def raw(dt): 13 | return dt 14 | 15 | pt = sys.argv[1].upper() 16 | if pt == "FROM": 17 | ff = raw 18 | tf = bsdconv01 19 | elif pt == "INTER": 20 | ff = bsdconv01 21 | tf = bsdconv01 22 | else: 23 | ff = bsdconv01 24 | tf = raw 25 | 26 | stp = re.compile(r"^(U\+|0X)") 27 | sep = re.compile(r"\s+") 28 | vld = re.compile(r"^[a-fA-F0-9,]+$") 29 | 30 | from_column = int(sys.argv[2]) 31 | to_column = int(sys.argv[3]) 32 | 33 | f=open(sys.argv[4]) 34 | for l in f: 35 | l = l.strip().upper() 36 | if l == "": 37 | continue 38 | if l.startswith("#"): 39 | continue 40 | a = sep.split(l) 41 | fr = stp.sub("", a[from_column]) 42 | to = stp.sub("", a[to_column]) 43 | if not vld.match(fr): 44 | continue 45 | if not vld.match(to): 46 | continue 47 | 48 | print("%s\t%s" % (ff(fr), tf(to))) 49 | -------------------------------------------------------------------------------- /tools/zh_component.py: -------------------------------------------------------------------------------- 1 | 2 | #argv[1] CNS_component_word_yyyymmdd.txt 3 | #argv[2] CNS_component_yyyymmdd.txt 4 | import os,sys 5 | 6 | def p(s): 7 | if(len(s)%2): 8 | return '0'+s; 9 | return s; 10 | 11 | def tw(s): 12 | try: 13 | r=w[s]; 14 | except: 15 | r=s; 16 | return r; 17 | 18 | w={}; 19 | fi=open(sys.argv[1],'rU') 20 | for l in fi: 21 | a,b,c=l.strip().split('\t'); 22 | w[int(a)]=int(b); 23 | fi.close() 24 | 25 | fi=open(sys.argv[2],'rU') 26 | for l in fi: 27 | a,b,c=l.strip().split('\t'); 28 | a=p(a) 29 | b=p(b) 30 | cns="02%s%s" % (a,b) 31 | comps=c.strip(';').split(';'); 32 | for comp in comps: 33 | com=','.join(["04"+p("%X" % tw(int(x.strip()))) for x in comp.strip(',').split(',')]); 34 | print "%s\t%s" % (cns,com) 35 | --------------------------------------------------------------------------------