├── modules
├── from
│ ├── ANY.txt
│ ├── 00.txt
│ ├── BSDCONV.txt
│ ├── ESCAPE.txt
│ ├── PASS.txt
│ ├── UTF-16BE.txt
│ ├── UTF-16LE.txt
│ ├── _JIS.txt
│ ├── _UTF-8.txt
│ ├── ANSI-CONTROL.txt
│ ├── BSDCONV-LOG.txt
│ ├── CP950-UDA.txt
│ ├── UTF-16BE.man
│ ├── UTF-32BE.man
│ ├── UTF-32BE.txt
│ ├── UTF-32LE.txt
│ ├── IBM-37.man
│ ├── IBM-930.c
│ ├── IBM-933.c
│ ├── IBM-935.c
│ ├── IBM-937.c
│ ├── IBM-939.c
│ ├── _GBK.man
│ ├── IBM-930.man
│ ├── IBM-933.man
│ ├── IBM-935.man
│ ├── IBM-937.man
│ ├── IBM-939.man
│ ├── UTF-16LE.man
│ ├── UTF-32LE.man
│ ├── _BIG5E.man
│ ├── _GB18030.man
│ ├── _GB2312.man
│ ├── _UAO241.man
│ ├── _UAO250.man
│ ├── ASCII-NUMERIC-HTML-ENTITY.txt
│ ├── ASCII.man
│ ├── _BIG5-ETEN.man
│ ├── CCCII.man
│ ├── CP950-UDA.man
│ ├── ASCII-NAMED-HTML-ENTITY.man
│ ├── _SHIFT-JIS.man
│ ├── ASCII-NUMERIC-HTML-ENTITY.man
│ ├── _CP1251.man
│ ├── _CP1252.man
│ ├── _CP1253.man
│ ├── _CP874.man
│ ├── _CP932.man
│ ├── _CP949.man
│ ├── _ISO-8859-1.man
│ ├── _BIG5-2003.man
│ ├── 00.man
│ ├── BYTE.man
│ ├── _CP936.man
│ ├── BIG5-5C.man
│ ├── BSDCONV-KEYWORD.txt
│ ├── BSDCONV.man
│ ├── BSDCONV-LOG.man
│ ├── _UTF-8.man
│ ├── ANSI-CONTROL.man
│ ├── FALLBACK-UNICODE.man
│ ├── PASS.man
│ ├── BSDCONV-KEYWORD.man
│ ├── ANY.man
│ ├── BSDCONV-LOG.c
│ ├── ESCAPE.man
│ ├── _CP950.man
│ ├── _JIS0201.txt
│ ├── _ISO-8859-1.txt
│ ├── alias
│ ├── EBCDIC.c
│ ├── _CP874.txt
│ ├── ANY.c
│ ├── BIG5-5C.txt
│ ├── _CP1255.txt
│ ├── ANSI-CONTROL.c
│ ├── ASCII.txt
│ ├── _CP1253.txt
│ ├── _CP1258.txt
│ ├── _CP1254.txt
│ ├── _CP1257.txt
│ ├── _CP1252.txt
│ ├── PASS.c
│ ├── UTF-32BE.c
│ ├── UTF-32LE.c
│ ├── _CP1256.txt
│ ├── _CP1251.txt
│ ├── ASCII-NUMERIC-HTML-ENTITY.c
│ └── CP950-UDA.c
├── to
│ ├── ANY.txt
│ ├── NULL.txt
│ ├── PASS.txt
│ ├── RAW.txt
│ ├── 00.txt
│ ├── BSDCONV.txt
│ ├── ESCAPE.txt
│ ├── UCS-2BE.txt
│ ├── UCS-2LE.txt
│ ├── UTF-16BE.txt
│ ├── UTF-16LE.txt
│ ├── UTF-32BE.txt
│ ├── UTF-32LE.txt
│ ├── _UTF-8.txt
│ ├── ASCII-HTML-INFO.txt
│ ├── BSDCONV-LOG.txt
│ ├── BSDCONV-OUTPUT.txt
│ ├── BYTE.man
│ ├── ASCII-HTML-UNICODE-IMG.txt
│ ├── ASCII.man
│ ├── CCCII.man
│ ├── IBM-37.man
│ ├── IBM-930.c
│ ├── IBM-930.man
│ ├── IBM-933.c
│ ├── IBM-933.man
│ ├── IBM-935.c
│ ├── IBM-935.man
│ ├── IBM-937.c
│ ├── IBM-937.man
│ ├── IBM-939.c
│ ├── IBM-939.man
│ ├── UCS-2LE.man
│ ├── _CP874.man
│ ├── _CP936.man
│ ├── _CP949.man
│ ├── _CP950.man
│ ├── _GB2312.man
│ ├── _GBK.man
│ ├── _UAO241.man
│ ├── _UAO250.man
│ ├── _UTF-8.man
│ ├── CP936-TRANS.man
│ ├── CP950-TRANS.man
│ ├── UCS-2BE.man
│ ├── UTF-16BE.man
│ ├── UTF-16LE.man
│ ├── UTF-32BE.man
│ ├── UTF-32LE.man
│ ├── _CP1251.man
│ ├── _CP1252.man
│ ├── _CP1253.man
│ ├── _GB18030.man
│ ├── _ISO-8859-1.man
│ ├── _SHIFT-JIS.man
│ ├── BSDCONV.man
│ ├── ASCII-NAMED-HTML-ENTITY.man
│ ├── NULL.man
│ ├── BIG5-5C.man
│ ├── BSDCONV-KEYWORD.txt
│ ├── NULL.c
│ ├── BSDCONV-LOG.man
│ ├── ASCII-HTML-UNICODE-IMG.man
│ ├── BSDCONV-OUTPUT.man
│ ├── RAW.man
│ ├── ASCII-HTML-INFO.man
│ ├── BSDCONV-KEYWORD.man
│ ├── ANY.man
│ ├── PASS.man
│ ├── RAW.c
│ ├── BSDCONV.c
│ ├── ESCAPE.man
│ ├── UTF-32BE.c
│ ├── UCS-2BE.c
│ ├── UTF-32LE.c
│ ├── BSDCONV-LOG.c
│ ├── UCS-2LE.c
│ ├── BSDCONV-OUTPUT.c
│ ├── alias
│ ├── _ISO-8859-1.txt
│ ├── ASCII-HTML-UNICODE-IMG.c
│ ├── _CP874.txt
│ ├── UTF-16BE.c
│ ├── ANY.c
│ ├── PASS.c
│ ├── BIG5-5C.txt
│ ├── UTF-16LE.c
│ ├── _CP1253.txt
│ ├── _CP1252.txt
│ ├── ASCII.txt
│ ├── _CP1251.txt
│ └── _GB18030.c
├── inter
│ ├── COUNT.txt
│ ├── INSERT.txt
│ ├── NULL.txt
│ ├── PASS.txt
│ ├── REPLACE.txt
│ ├── SCORE.txt
│ ├── STRINGS.txt
│ ├── WIDTH.txt
│ ├── _NFC.txt
│ ├── BIG5-DEFRAG.txt
│ ├── SCORE-TRAIN.txt
│ ├── TRIM-WIDTH.txt
│ ├── _NF-ORDER.txt
│ ├── AMBIGUOUS-PAD.txt
│ ├── AMBIGUOUS-UNPAD.txt
│ ├── WHITESPACE-DERAIL.txt
│ ├── WHITESPACE-RERAIL.txt
│ ├── ZH-BONUS.c
│ ├── _NF-HANGUL-COMPOSITION.txt
│ ├── ALIAS-TO.c
│ ├── _NF-HANGUL-DECOMPOSITION.txt
│ ├── ZH-BONUS-PHRASE.c
│ ├── WHITESPACE-RERAIL.man
│ ├── KANJI.man
│ ├── ZHCN.man
│ ├── ZHTW.man
│ ├── LOWER.man
│ ├── UPPER.man
│ ├── UPSIDEDOWN.man
│ ├── FULL.man
│ ├── HALF.man
│ ├── NL2BR.man
│ ├── ZHTW-WORDS.man
│ ├── KANA-PHONETIC.man
│ ├── ZH-FUZZY-CN.man
│ ├── ZH-FUZZY-TW.man
│ ├── MAC.txt
│ ├── UNIX.txt
│ ├── WIN.txt
│ ├── REPLACE.man
│ ├── MAC.man
│ ├── NULL.c
│ ├── UNIX.man
│ ├── WIN.man
│ ├── ALIAS-TO.man
│ ├── NL2BR.txt
│ ├── BIG5-DEFRAG.man
│ ├── COUNT.man
│ ├── WIDTH.man
│ ├── NULL.man
│ ├── WHITESPACE.h
│ ├── WHITESPACE-DERAIL.man
│ ├── SCORE-TRAIN.man
│ ├── ALIAS-FILTER.txt
│ ├── PASS.man
│ ├── ALIAS-FROM.man
│ ├── INSERT.man
│ ├── alias
│ ├── AMBIGUOUS-UNPAD.man
│ ├── TRIM-WIDTH.man
│ ├── STRINGS.man
│ ├── AMBIGUOUS-PAD.man
│ ├── SUB.txt
│ ├── BONUS.c
│ ├── ZH-BONUS.txt
│ ├── SCORE.man
│ ├── ALIAS-INTER.txt
│ ├── ALIAS-FROM.c
│ ├── COUNT.c
│ ├── SUPER.txt
│ ├── ZHTW-WORDS.txt
│ ├── INSERT.c
│ ├── ALIAS-INTER.c
│ ├── WIDTH.c
│ ├── PASS.c
│ ├── REPLACE.c
│ ├── UPSIDEDOWN.txt
│ ├── AMBIGUOUS-UNPAD.c
│ ├── AMBIGUOUS-PAD.c
│ ├── BIG5-DEFRAG.c
│ ├── SCORE-TRAIN.c
│ ├── _NF-HANGUL-DECOMPOSITION.c
│ ├── WHITESPACE-DERAIL.c
│ └── TRIM-WIDTH.c
├── filter
│ ├── BYTE.c
│ ├── ANSI.c
│ ├── CNS11643.c
│ ├── UNICODE.c
│ ├── alias
│ ├── PRINT.c
│ ├── type.c
│ ├── ROMAN.c
│ ├── LAO.c
│ ├── THAI.c
│ ├── HEBREW.c
│ ├── KANNADA.c
│ ├── MIAO.c
│ ├── SYRIAC.c
│ ├── TAGALOG.c
│ ├── TAMIL.c
│ ├── TELUGU.c
│ ├── TIBETAN.c
│ ├── ARMENIAN.c
│ ├── CHEROKEE.c
│ ├── EMOTICON.c
│ ├── HIRAGANA.c
│ ├── IPA.c
│ ├── JAVANESE.c
│ ├── MALAYALAM.c
│ ├── MONGOLIAN.c
│ ├── SAMARITAN.c
│ ├── TIFINAGH.c
│ ├── BRAILLE.c
│ ├── CURRENCY.c
│ ├── MAHJONG.c
│ ├── KHMER.c
│ ├── YI.c
│ ├── DEVANAGARI.c
│ ├── SINHALA.c
│ ├── SUNDANESE.c
│ ├── KATAKANA.c
│ ├── LATIN1.c
│ ├── CUNEIFORM.c
│ ├── EGYPTIAN.c
│ ├── GEORGIAN.c
│ ├── MYANMAR.c
│ ├── PHONETIC.c
│ ├── MUSIC.c
│ ├── PUA.c
│ ├── CJK.man
│ ├── ETHIOPIC.c
│ ├── GREEK.c
│ ├── CYRILLIC.c
│ ├── HANGUL.c
│ ├── ARROWS.c
│ ├── PUNCTUATION.c
│ ├── LATIN.c
│ ├── MATH.c
│ ├── ARABIC.c
│ ├── unicode_range.c
│ └── CJK.c
└── scorer
│ ├── LATIN1.c
│ ├── unicode_range.c
│ └── CJK.c
├── Contributors
├── Todo
├── .gitignore
├── tools
├── setEnvVar.bat
├── nfkc_gen.py
├── mkalias.py
├── _bsdconv-completion.zsh
├── AexcludeBCD.py
├── zh_component.py
├── codepage.py
├── findAinB.py
├── simple_gen.py
├── gen_hex.py
└── mkbonus.py
├── doc
└── index.rst
├── .gitmodules
├── src
├── fmalloc.h
├── missing_func.c
├── libbsdconv_counter.c
├── libbsdconv_filter.c
├── libbsdconv_scorer.c
├── libbsdconv_hash.c
├── libfmalloc.c
└── bsdconv-completion.c
├── LICENSE
├── Makefile.win
└── testsuite
└── api.c
/modules/from/ANY.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/to/ANY.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/to/NULL.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/to/PASS.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/to/RAW.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/from/00.txt:
--------------------------------------------------------------------------------
1 | 00 0100
2 |
--------------------------------------------------------------------------------
/modules/from/BSDCONV.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/from/ESCAPE.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/from/PASS.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/from/UTF-16BE.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/from/UTF-16LE.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/from/_JIS.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/from/_UTF-8.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/inter/COUNT.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/inter/INSERT.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/inter/NULL.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/inter/PASS.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/inter/REPLACE.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/inter/SCORE.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/inter/STRINGS.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/inter/WIDTH.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/inter/_NFC.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/to/00.txt:
--------------------------------------------------------------------------------
1 | 0100 00
2 |
--------------------------------------------------------------------------------
/modules/to/BSDCONV.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/to/ESCAPE.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/to/UCS-2BE.txt:
--------------------------------------------------------------------------------
1 | 01* ?
2 |
--------------------------------------------------------------------------------
/modules/to/UCS-2LE.txt:
--------------------------------------------------------------------------------
1 | 01* ?
2 |
--------------------------------------------------------------------------------
/modules/to/UTF-16BE.txt:
--------------------------------------------------------------------------------
1 | 01* ?
2 |
--------------------------------------------------------------------------------
/modules/to/UTF-16LE.txt:
--------------------------------------------------------------------------------
1 | 01* ?
2 |
--------------------------------------------------------------------------------
/modules/to/UTF-32BE.txt:
--------------------------------------------------------------------------------
1 | 01* ?
2 |
--------------------------------------------------------------------------------
/modules/to/UTF-32LE.txt:
--------------------------------------------------------------------------------
1 | 01* ?
2 |
--------------------------------------------------------------------------------
/modules/to/_UTF-8.txt:
--------------------------------------------------------------------------------
1 | 01* ?
2 |
--------------------------------------------------------------------------------
/modules/from/ANSI-CONTROL.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/from/BSDCONV-LOG.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/from/CP950-UDA.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/from/UTF-16BE.man:
--------------------------------------------------------------------------------
1 | UTF-16
2 |
--------------------------------------------------------------------------------
/modules/from/UTF-32BE.man:
--------------------------------------------------------------------------------
1 | UTF-32
2 |
--------------------------------------------------------------------------------
/modules/from/UTF-32BE.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/from/UTF-32LE.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/inter/BIG5-DEFRAG.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/inter/SCORE-TRAIN.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/inter/TRIM-WIDTH.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/inter/_NF-ORDER.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/to/ASCII-HTML-INFO.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/to/BSDCONV-LOG.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/to/BSDCONV-OUTPUT.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/from/IBM-37.man:
--------------------------------------------------------------------------------
1 | #EBCDIC codecs
2 |
--------------------------------------------------------------------------------
/modules/inter/AMBIGUOUS-PAD.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/inter/AMBIGUOUS-UNPAD.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/inter/WHITESPACE-DERAIL.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/inter/WHITESPACE-RERAIL.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/to/BYTE.man:
--------------------------------------------------------------------------------
1 | .redirect from/BYTE
2 |
--------------------------------------------------------------------------------
/modules/from/IBM-930.c:
--------------------------------------------------------------------------------
1 | #include "EBCDIC.c"
2 |
--------------------------------------------------------------------------------
/modules/from/IBM-933.c:
--------------------------------------------------------------------------------
1 | #include "EBCDIC.c"
2 |
--------------------------------------------------------------------------------
/modules/from/IBM-935.c:
--------------------------------------------------------------------------------
1 | #include "EBCDIC.c"
2 |
--------------------------------------------------------------------------------
/modules/from/IBM-937.c:
--------------------------------------------------------------------------------
1 | #include "EBCDIC.c"
2 |
--------------------------------------------------------------------------------
/modules/from/IBM-939.c:
--------------------------------------------------------------------------------
1 | #include "EBCDIC.c"
2 |
--------------------------------------------------------------------------------
/modules/from/_GBK.man:
--------------------------------------------------------------------------------
1 | .redirect from/_CP936
2 |
--------------------------------------------------------------------------------
/modules/inter/ZH-BONUS.c:
--------------------------------------------------------------------------------
1 | #include "BONUS.c"
2 |
--------------------------------------------------------------------------------
/modules/inter/_NF-HANGUL-COMPOSITION.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/to/ASCII-HTML-UNICODE-IMG.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/to/ASCII.man:
--------------------------------------------------------------------------------
1 | .redirect from/ASCII
2 |
--------------------------------------------------------------------------------
/modules/to/CCCII.man:
--------------------------------------------------------------------------------
1 | .redirect from/CCCII
2 |
--------------------------------------------------------------------------------
/modules/to/IBM-37.man:
--------------------------------------------------------------------------------
1 | .redirect from/IBM-37
2 |
--------------------------------------------------------------------------------
/modules/to/IBM-930.c:
--------------------------------------------------------------------------------
1 | #include "EBCDIC.c"
2 |
--------------------------------------------------------------------------------
/modules/to/IBM-930.man:
--------------------------------------------------------------------------------
1 | .redirect from/IBM-37
2 |
--------------------------------------------------------------------------------
/modules/to/IBM-933.c:
--------------------------------------------------------------------------------
1 | #include "EBCDIC.c"
2 |
--------------------------------------------------------------------------------
/modules/to/IBM-933.man:
--------------------------------------------------------------------------------
1 | .redirect from/IBM-37
2 |
--------------------------------------------------------------------------------
/modules/to/IBM-935.c:
--------------------------------------------------------------------------------
1 | #include "EBCDIC.c"
2 |
--------------------------------------------------------------------------------
/modules/to/IBM-935.man:
--------------------------------------------------------------------------------
1 | .redirect from/IBM-37
2 |
--------------------------------------------------------------------------------
/modules/to/IBM-937.c:
--------------------------------------------------------------------------------
1 | #include "EBCDIC.c"
2 |
--------------------------------------------------------------------------------
/modules/to/IBM-937.man:
--------------------------------------------------------------------------------
1 | .redirect from/IBM-37
2 |
--------------------------------------------------------------------------------
/modules/to/IBM-939.c:
--------------------------------------------------------------------------------
1 | #include "EBCDIC.c"
2 |
--------------------------------------------------------------------------------
/modules/to/IBM-939.man:
--------------------------------------------------------------------------------
1 | .redirect from/IBM-37
2 |
--------------------------------------------------------------------------------
/modules/to/UCS-2LE.man:
--------------------------------------------------------------------------------
1 | .redirect to/UCS-2BE
2 |
--------------------------------------------------------------------------------
/modules/to/_CP874.man:
--------------------------------------------------------------------------------
1 | .redirect from/_CP874
2 |
--------------------------------------------------------------------------------
/modules/to/_CP936.man:
--------------------------------------------------------------------------------
1 | .redirect from/_CP936
2 |
--------------------------------------------------------------------------------
/modules/to/_CP949.man:
--------------------------------------------------------------------------------
1 | .redirect from/_CP949
2 |
--------------------------------------------------------------------------------
/modules/to/_CP950.man:
--------------------------------------------------------------------------------
1 | .redirect from/_CP950
2 |
--------------------------------------------------------------------------------
/modules/to/_GB2312.man:
--------------------------------------------------------------------------------
1 | .redirect from/_CP936
2 |
--------------------------------------------------------------------------------
/modules/to/_GBK.man:
--------------------------------------------------------------------------------
1 | .redirect from/_CP936
2 |
--------------------------------------------------------------------------------
/modules/to/_UAO241.man:
--------------------------------------------------------------------------------
1 | .redirect from/_CP950
2 |
--------------------------------------------------------------------------------
/modules/to/_UAO250.man:
--------------------------------------------------------------------------------
1 | .redirect from/_CP950
2 |
--------------------------------------------------------------------------------
/modules/to/_UTF-8.man:
--------------------------------------------------------------------------------
1 | .redirect FROM/_UTF-8
2 |
--------------------------------------------------------------------------------
/modules/from/IBM-930.man:
--------------------------------------------------------------------------------
1 | .redirect from/IBM-37
2 |
--------------------------------------------------------------------------------
/modules/from/IBM-933.man:
--------------------------------------------------------------------------------
1 | .redirect from/IBM-37
2 |
--------------------------------------------------------------------------------
/modules/from/IBM-935.man:
--------------------------------------------------------------------------------
1 | .redirect from/IBM-37
2 |
--------------------------------------------------------------------------------
/modules/from/IBM-937.man:
--------------------------------------------------------------------------------
1 | .redirect from/IBM-37
2 |
--------------------------------------------------------------------------------
/modules/from/IBM-939.man:
--------------------------------------------------------------------------------
1 | .redirect from/IBM-37
2 |
--------------------------------------------------------------------------------
/modules/from/UTF-16LE.man:
--------------------------------------------------------------------------------
1 | .redirect from/UTF-16BE
2 |
--------------------------------------------------------------------------------
/modules/from/UTF-32LE.man:
--------------------------------------------------------------------------------
1 | .redirect from/UTF-32BE
2 |
--------------------------------------------------------------------------------
/modules/from/_BIG5E.man:
--------------------------------------------------------------------------------
1 | .redirect from/_BIG5-2003
2 |
--------------------------------------------------------------------------------
/modules/from/_GB18030.man:
--------------------------------------------------------------------------------
1 | .redirect from/_CP936
2 |
--------------------------------------------------------------------------------
/modules/from/_GB2312.man:
--------------------------------------------------------------------------------
1 | .redirect from/_CP936
2 |
--------------------------------------------------------------------------------
/modules/from/_UAO241.man:
--------------------------------------------------------------------------------
1 | .redirect from/_CP950
2 |
--------------------------------------------------------------------------------
/modules/from/_UAO250.man:
--------------------------------------------------------------------------------
1 | .redirect from/_CP950
2 |
--------------------------------------------------------------------------------
/modules/inter/ALIAS-TO.c:
--------------------------------------------------------------------------------
1 | #include "ALIAS-FROM.c"
2 |
--------------------------------------------------------------------------------
/modules/inter/_NF-HANGUL-DECOMPOSITION.txt:
--------------------------------------------------------------------------------
1 | * ?
2 |
--------------------------------------------------------------------------------
/modules/to/CP936-TRANS.man:
--------------------------------------------------------------------------------
1 | .redirect from/_CP936
2 |
--------------------------------------------------------------------------------
/modules/to/CP950-TRANS.man:
--------------------------------------------------------------------------------
1 | .redirect from/_CP950
2 |
--------------------------------------------------------------------------------
/modules/to/UCS-2BE.man:
--------------------------------------------------------------------------------
1 | UCS-2, subset of UTF-16
2 |
--------------------------------------------------------------------------------
/modules/to/UTF-16BE.man:
--------------------------------------------------------------------------------
1 | .redirect from/UTF-16BE
2 |
--------------------------------------------------------------------------------
/modules/to/UTF-16LE.man:
--------------------------------------------------------------------------------
1 | .redirect from/UTF-16BE
2 |
--------------------------------------------------------------------------------
/modules/to/UTF-32BE.man:
--------------------------------------------------------------------------------
1 | .redirect from/UTF-32BE
2 |
--------------------------------------------------------------------------------
/modules/to/UTF-32LE.man:
--------------------------------------------------------------------------------
1 | .redirect from/UTF-32BE
2 |
--------------------------------------------------------------------------------
/modules/to/_CP1251.man:
--------------------------------------------------------------------------------
1 | .redirect from/_CP1251
2 |
--------------------------------------------------------------------------------
/modules/to/_CP1252.man:
--------------------------------------------------------------------------------
1 | .redirect from/_CP1252
2 |
--------------------------------------------------------------------------------
/modules/to/_CP1253.man:
--------------------------------------------------------------------------------
1 | .redirect from/_CP1253
2 |
--------------------------------------------------------------------------------
/modules/to/_GB18030.man:
--------------------------------------------------------------------------------
1 | .redirect from/_CP936
2 |
--------------------------------------------------------------------------------
/modules/from/ASCII-NUMERIC-HTML-ENTITY.txt:
--------------------------------------------------------------------------------
1 | 2623* ?
2 |
--------------------------------------------------------------------------------
/modules/from/ASCII.man:
--------------------------------------------------------------------------------
1 | #just ascii (U+0001 ~ U+007F)
2 |
--------------------------------------------------------------------------------
/modules/from/_BIG5-ETEN.man:
--------------------------------------------------------------------------------
1 | .redirect from/_BIG5-2003
2 |
--------------------------------------------------------------------------------
/modules/inter/ZH-BONUS-PHRASE.c:
--------------------------------------------------------------------------------
1 | #include "BONUS.c"
2 |
--------------------------------------------------------------------------------
/modules/to/_ISO-8859-1.man:
--------------------------------------------------------------------------------
1 | .redirect from/_ISO-8859-1
2 |
--------------------------------------------------------------------------------
/modules/to/_SHIFT-JIS.man:
--------------------------------------------------------------------------------
1 | .redirect from/_SHIFT-JIS
2 |
--------------------------------------------------------------------------------
/modules/from/CCCII.man:
--------------------------------------------------------------------------------
1 | #just CCCII, it's incompatible with ASCII
2 |
--------------------------------------------------------------------------------
/modules/inter/WHITESPACE-RERAIL.man:
--------------------------------------------------------------------------------
1 | .redirect inter/WHITESPACE-DERAIL
2 |
--------------------------------------------------------------------------------
/modules/inter/KANJI.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | > echo 澀谷驛| bsdconv utf-8:kanji:utf-8
3 | 渋谷駅
4 |
--------------------------------------------------------------------------------
/modules/inter/ZHCN.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | > echo 測試 | bsdconv utf-8:zhcn:utf-8
3 | 测试
4 |
--------------------------------------------------------------------------------
/modules/inter/ZHTW.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | > echo 测试之后|bsdconv utf-8:zhtw:utf-8
3 | 測試之后
4 |
--------------------------------------------------------------------------------
/modules/to/BSDCONV.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | > echo 測 | bsdconv utf-8:bsdconv
3 | 016E2C010A
4 |
--------------------------------------------------------------------------------
/modules/inter/LOWER.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | > echo testTEST | bsdconv utf-8:lower:utf-8
3 | testtest
4 |
--------------------------------------------------------------------------------
/modules/inter/UPPER.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | > echo testTEST | bsdconv utf-8:upper:utf-8
3 | TESTTEST
4 |
--------------------------------------------------------------------------------
/modules/inter/UPSIDEDOWN.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | > echo FUNNY | bsdconv utf-8:upsidedown:utf-8
3 | Ⅎ∩ᴎᴎ⅄
4 |
--------------------------------------------------------------------------------
/Contributors:
--------------------------------------------------------------------------------
1 | adamv
2 | Artoria2e5
3 | buganini
4 | godfat
5 | kcwu
6 | PkmX
7 | roytam1
8 | Thomas-Tsai
9 |
--------------------------------------------------------------------------------
/modules/filter/BYTE.c:
--------------------------------------------------------------------------------
1 | #include "../../src/bsdconv.h"
2 |
3 | #define TYPE 3
4 |
5 | #include "type.c"
6 |
--------------------------------------------------------------------------------
/modules/inter/FULL.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | > echo testTEST1234 | bsdconv utf-8:full:utf-8
3 | testTEST1234
4 |
--------------------------------------------------------------------------------
/modules/inter/HALF.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | > echo testTEST1234 | bsdconv utf-8:half:utf-8
3 | testTEST1234
4 |
--------------------------------------------------------------------------------
/modules/inter/NL2BR.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | > perl -e 'print "a\nb"'|bsdconv utf-8:nl2br:utf-8
3 | a
b
4 |
--------------------------------------------------------------------------------
/modules/inter/ZHTW-WORDS.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | > echo 测试之后 | bsdconv utf-8:zhtw:zhtw-words:utf-8
3 | 測試之後
4 |
--------------------------------------------------------------------------------
/Todo:
--------------------------------------------------------------------------------
1 | update score table
2 | to/ASCII-TRANS
3 | {}
4 | ()
5 | Auto Scorer
6 | Unpaired UTF-16/U+D800..U+DFFF
7 |
--------------------------------------------------------------------------------
/modules/filter/ANSI.c:
--------------------------------------------------------------------------------
1 | #include "../../src/bsdconv.h"
2 |
3 | #define TYPE 0x1b
4 |
5 | #include "type.c"
6 |
--------------------------------------------------------------------------------
/modules/filter/CNS11643.c:
--------------------------------------------------------------------------------
1 | #include "../../src/bsdconv.h"
2 |
3 | #define TYPE 2
4 |
5 | #include "type.c"
6 |
--------------------------------------------------------------------------------
/modules/filter/UNICODE.c:
--------------------------------------------------------------------------------
1 | #include "../../src/bsdconv.h"
2 |
3 | #define TYPE 1
4 |
5 | #include "type.c"
6 |
--------------------------------------------------------------------------------
/modules/inter/KANA-PHONETIC.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | > echo ドラえもん | bsdconv utf-8:kana_phonetic:utf-8
3 | doraemon
4 |
--------------------------------------------------------------------------------
/modules/inter/ZH-FUZZY-CN.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | > echo 澀谷驛渋谷駅涩谷驿| bsdconv utf-8:zh-fuzzy-cn:utf-8
3 | 澀谷驿澀谷驿澀谷驿
4 |
--------------------------------------------------------------------------------
/modules/inter/ZH-FUZZY-TW.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | > echo 澀谷驛渋谷駅涩谷驿| bsdconv utf-8:zh-fuzzy-tw:utf-8
3 | 澀谷驛澀谷驛澀谷驛
4 |
--------------------------------------------------------------------------------
/modules/filter/alias:
--------------------------------------------------------------------------------
1 | 01 UNICODE
2 | 1 UNICODE
3 | 02 CNS11643
4 | 2 CNS11643
5 | 03 BYTE
6 | 3 BYTE
7 | 1B ANSI
8 |
--------------------------------------------------------------------------------
/modules/from/CP950-UDA.man:
--------------------------------------------------------------------------------
1 | DESC
2 | Mapping CP950 UDA (User-Defined Areas) to Unicode PUA (Private Use Areas)
3 |
--------------------------------------------------------------------------------
/modules/to/ASCII-NAMED-HTML-ENTITY.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | > echo Ç | bsdconv utf-8:ascii-named-html-entity
3 | Ç
4 |
--------------------------------------------------------------------------------
/modules/from/ASCII-NAMED-HTML-ENTITY.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | > echo 'ü' | bsdconv ascii-named-html-entity:utf-8
3 | ü
4 |
--------------------------------------------------------------------------------
/modules/inter/MAC.txt:
--------------------------------------------------------------------------------
1 | #convert CRLF/CR/LF to CR (MAC)
2 | 010D,010A 010D #CRLF
3 | 010A 010D #LF
4 | 010D 010D #CR
5 |
--------------------------------------------------------------------------------
/modules/inter/UNIX.txt:
--------------------------------------------------------------------------------
1 | #convert CRLF/CR/LF to LF (UNIX)
2 | 010D,010A 010A #CRLF
3 | 010A 010A #LF
4 | 010D 010A #CR
5 |
--------------------------------------------------------------------------------
/modules/to/NULL.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | #used to test decoding
3 | > echo 測試blah blah blah | bsdconv utf-8:null
4 | #(no output)
5 |
--------------------------------------------------------------------------------
/modules/from/_SHIFT-JIS.man:
--------------------------------------------------------------------------------
1 | DESC
2 | japanese
3 | ascii is excluded, use same name without '_' prefixed to get ascii involved
4 |
--------------------------------------------------------------------------------
/modules/from/ASCII-NUMERIC-HTML-ENTITY.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | > echo '測試' | bsdconv ascii-numeric-html-entity:utf-8
3 | 測試
4 |
--------------------------------------------------------------------------------
/modules/from/_CP1251.man:
--------------------------------------------------------------------------------
1 | DESC
2 | cyrillic alphabet
3 | ascii is excluded, use same name without '_' prefixed to get ascii involved
4 |
--------------------------------------------------------------------------------
/modules/from/_CP1252.man:
--------------------------------------------------------------------------------
1 | DESC
2 | latin alphabet
3 | ascii is excluded, use same name without '_' prefixed to get ascii involved
4 |
--------------------------------------------------------------------------------
/modules/from/_CP1253.man:
--------------------------------------------------------------------------------
1 | DESC
2 | greek alphabet
3 | ascii is excluded, use same name without '_' prefixed to get ascii involved
4 |
--------------------------------------------------------------------------------
/modules/from/_CP874.man:
--------------------------------------------------------------------------------
1 | DESC
2 | thai characters
3 | ascii is excluded, use same name without '_' prefixed to get ascii involved
4 |
--------------------------------------------------------------------------------
/modules/from/_CP932.man:
--------------------------------------------------------------------------------
1 | DESC
2 | japanese characters
3 | ascii is excluded, use same name without '_' prefixed to get ascii involved
4 |
--------------------------------------------------------------------------------
/modules/from/_CP949.man:
--------------------------------------------------------------------------------
1 | DESC
2 | korean characters
3 | ascii is excluded, use same name without '_' prefixed to get ascii involved
4 |
--------------------------------------------------------------------------------
/modules/from/_ISO-8859-1.man:
--------------------------------------------------------------------------------
1 | DESC
2 | latin alphabet
3 | ascii is excluded, use same name without '_' prefixed to get ascii involved
4 |
--------------------------------------------------------------------------------
/modules/inter/WIN.txt:
--------------------------------------------------------------------------------
1 | #convert CRLF/CR/LF to CRLF (WIN)
2 | 010D,010A 010D,010A #CRLF
3 | 010A 010D,010A #LF
4 | 010D 010D,010A #CR
5 |
--------------------------------------------------------------------------------
/modules/from/_BIG5-2003.man:
--------------------------------------------------------------------------------
1 | DESC
2 | Traditional Chinese
3 | ASCII is excluded, use same name without '_' prefixed to get ASCII involved
4 |
--------------------------------------------------------------------------------
/modules/from/00.man:
--------------------------------------------------------------------------------
1 | INPUT
2 | DESC Null byte ('\0')
3 | EXAMPLE 00
4 |
5 | OUTPUT
6 | TYPE 01 (UNICODE)
7 | DESC U+0000
8 | EXAMPLE 0100
9 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | build
2 | tmp
3 | testsuite/api
4 | bsdconv.aux
5 | bsdconv.log
6 | bsdconv.out
7 | bsdconv.pdf
8 | bsdconv.synctex.gz
9 | bsdconv.toc
10 |
--------------------------------------------------------------------------------
/modules/to/BIG5-5C.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | > echo 功能|bsdconv utf-8:big5-5c,big5|hexdump -C
3 | 00000000 a5 5c 5c af e0 0a |.\\...|
4 |
--------------------------------------------------------------------------------
/modules/to/BSDCONV-KEYWORD.txt:
--------------------------------------------------------------------------------
1 | 002C 2C
2 | 0009 09
3 | 0020 20
4 | 003F 3F
5 | 000A 0A
6 | 000D 0D
7 | 005C20 5C20
8 | 005C2C 5C2C
9 | 005C5C 5C5C
10 |
--------------------------------------------------------------------------------
/modules/inter/REPLACE.man:
--------------------------------------------------------------------------------
1 | echo ABCD | bsdconv utf-8:replace#0142.0143=0132.0133:utf-8
2 | A23D
3 |
4 | echo ABCD | bsdconv utf-8:replace#0142.0143:utf-8
5 | AD
6 |
--------------------------------------------------------------------------------
/modules/inter/MAC.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | > perl -e 'print "a\nb"' | bsdconv utf-8:mac:utf-8|hexdump -C
3 | 00000000 61 0d 62 |a.b|
4 |
--------------------------------------------------------------------------------
/modules/inter/NULL.c:
--------------------------------------------------------------------------------
1 | #include "../../src/bsdconv.h"
2 |
3 | void cbconv(struct bsdconv_instance *ins){
4 | THIS_PHASE(ins)->state.status=NEXTPHASE;
5 | return;
6 | }
7 |
--------------------------------------------------------------------------------
/modules/to/NULL.c:
--------------------------------------------------------------------------------
1 | #include "../../src/bsdconv.h"
2 |
3 | void cbconv(struct bsdconv_instance *ins){
4 | THIS_PHASE(ins)->state.status=NEXTPHASE;
5 | return;
6 | }
7 |
--------------------------------------------------------------------------------
/modules/filter/PRINT.c:
--------------------------------------------------------------------------------
1 | #include "../../src/bsdconv.h"
2 |
3 | static const struct uint32_range ranges[] = {
4 | { 0x1F, 0x7E },
5 | };
6 |
7 | #include "unicode_range.c"
8 |
--------------------------------------------------------------------------------
/modules/from/BYTE.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | > perl -e 'print "\xaa\xbb\xcc\xdd"' | bsdconv byte:byte|hexdump -C
3 | 00000000 aa bb cc dd |....|
4 |
--------------------------------------------------------------------------------
/modules/from/_CP936.man:
--------------------------------------------------------------------------------
1 | DESC
2 | simplified chinese
3 | ascii is excluded, use same name without '_' prefixed to get ascii involved
4 | cp936_trans is for transliteration
5 |
--------------------------------------------------------------------------------
/modules/inter/UNIX.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | > perl -e 'print "a\r\nb"' | bsdconv utf-8:unix:utf-8 | hexdump -C
3 | 00000000 61 0a 62 |a.b|
4 |
--------------------------------------------------------------------------------
/modules/inter/WIN.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | > perl -e 'print "a\nb"' | bsdconv utf-8:win:utf-8 | hexdump -C
3 | 00000000 61 0d 0a 62 |a..b|
4 |
--------------------------------------------------------------------------------
/modules/inter/ALIAS-TO.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | #for internal use
3 | > echo BIG5 | bsdconv ascii:to_alias:ascii
4 | CP950
5 | > echo CP950 | bsdconv ascii:to_alias:ascii
6 | _CP950,ASCII
7 |
--------------------------------------------------------------------------------
/modules/filter/type.c:
--------------------------------------------------------------------------------
1 | #include "../../src/bsdconv.h"
2 |
3 | int cbfilter(struct data_rt *data){
4 | if(data->len>0 && UCP(data->data)[0]==TYPE)
5 | return 1;
6 | else
7 | return 0;
8 | }
9 |
--------------------------------------------------------------------------------
/modules/from/BIG5-5C.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | > perl -e 'print "\xa5\x5c\x5c\xaf\xe0"' | bsdconv big5:utf-8
3 | 功\能
4 | > perl -e 'print "\xa5\x5c\x5c\xaf\xe0"' | bsdconv big5-5c,big5:utf-8
5 | 功能
6 |
--------------------------------------------------------------------------------
/modules/inter/NL2BR.txt:
--------------------------------------------------------------------------------
1 | #convert CRLF/CR/LF to
2 | 010D,010A 013C,0162,0172,0120,012F,013E #CRLF
3 | 010A 013C,0162,0172,0120,012F,013E #LF
4 | 010D 013C,0162,0172,0120,012F,013E #CR
5 |
--------------------------------------------------------------------------------
/modules/to/BSDCONV-LOG.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | > echo 測試 | env BSDCONV_TO_LOG=/tmp/bsdconv.log bsdconv utf-8:bsdconv-log
3 | > cat /tmp/bsdconv.log
4 | 016E2C ( FREE )
5 | 018A66 ( FREE )
6 | 010A
7 |
--------------------------------------------------------------------------------
/modules/inter/BIG5-DEFRAG.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | > perl -e 'print "\xaf\033[1m\xe0"' | bsdconv 'ansi-control,byte:big5-defrag:byte,ansi-control|skip,big5:bsdconv-stdout'
3 | 0180FD
4 | 1B5B316D ( FREE )
5 |
--------------------------------------------------------------------------------
/modules/inter/COUNT.man:
--------------------------------------------------------------------------------
1 | INPUT
2 | TYPE Any
3 |
4 | OUTPUT
5 | TYPE Any
6 | DESC No content changed, counter increased
7 |
8 | ARGUMENT
9 | FOR
10 | FORMAT FILTER
11 | $CounterName
12 |
--------------------------------------------------------------------------------
/modules/inter/WIDTH.man:
--------------------------------------------------------------------------------
1 | Increase counters {full,half,ambi} accordingly
2 |
3 | EXAMPLE
4 | > perl -e 'print "123Б測試"' | bsdconv utf-8:width:null
5 | Full width: 2
6 | Half width: 3
7 | Ambi width: 1
8 |
--------------------------------------------------------------------------------
/modules/inter/NULL.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | #used to removed untranslated characters
3 | > echo ABCDEabcde|bsdconv utf-8:super:utf-8
4 | ᴬᴮCᴰᴱᵃᵇᶜᵈᵉ
5 | > echo ABCDEabcde|bsdconv utf-8:super,null:utf-8
6 | ᴬᴮᴰᴱᵃᵇᶜᵈᵉ
7 |
--------------------------------------------------------------------------------
/modules/filter/ROMAN.c:
--------------------------------------------------------------------------------
1 | #include "../../src/bsdconv.h"
2 |
3 | static const struct uint32_range ranges[] = {
4 | { 0x30, 0x39 },
5 | { 0x41, 0x5A },
6 | { 0x61, 0x7A },
7 | };
8 |
9 | #include "unicode_range.c"
10 |
--------------------------------------------------------------------------------
/modules/inter/WHITESPACE.h:
--------------------------------------------------------------------------------
1 | #define HASHKEY "WHITESPACE"
2 |
3 | struct my_s{
4 | struct data_rt *queue;
5 | struct data_rt **last;
6 | struct bsdconv_phase *rerail;
7 | size_t offsetA;
8 | size_t offsetB;
9 | };
10 |
--------------------------------------------------------------------------------
/modules/from/BSDCONV-KEYWORD.txt:
--------------------------------------------------------------------------------
1 | 2C 002C
2 | 09 0009
3 | 20 0020
4 | 3F 003F
5 | 0A 000A
6 | 0D 000D
7 | 303132302C 005C20,002C
8 | 303132432C 005C2C,002C
9 | 303135432C 005C5C,002C
10 | 5C20 0120
11 | 5C2C 012C
12 | 5C5C 015C
13 |
--------------------------------------------------------------------------------
/modules/to/ASCII-HTML-UNICODE-IMG.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | > echo 測 | bsdconv utf-8:ascii-html-unicode-img
3 | 
4 |
--------------------------------------------------------------------------------
/modules/inter/WHITESPACE-DERAIL.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | > echo 之后 | bsdconv utf-8:zhtw:zhtw-words:utf-8
3 | 之後
4 | > echo 之 后 | bsdconv utf-8:zhtw:zhtw-words:utf-8
5 | 之 后
6 | > echo 之 后 | bsdconv utf-8:whitespace-derail:zhtw:zhtw-words:whitespace-rerail:utf-8
7 | 之 後
8 |
--------------------------------------------------------------------------------
/modules/to/BSDCONV-OUTPUT.man:
--------------------------------------------------------------------------------
1 | ARGUMENT
2 | STDERR
3 | Output to stderr
4 | STDOUT (default)
5 | Output to stdout
6 |
7 | EXAMPLE
8 | #foreground debug tool
9 | > echo 測試 | bsdconv utf-8:bsdconv-output
10 | 016E2C ( FREE )
11 | 018A66 ( FREE )
12 | 010A
13 |
--------------------------------------------------------------------------------
/modules/to/RAW.man:
--------------------------------------------------------------------------------
1 | INPUT
2 | TYPE Any
3 |
4 | OUTPUT
5 | TYPE Any
6 | DESC Input data with removal of type identifier byte
7 |
8 | EXAMPLE
9 | > printf 測試 | bsdconv utf-8:raw|hexdump -C
10 | 00000000 6e 2c 8a 66 |n,.f|
11 |
--------------------------------------------------------------------------------
/modules/from/BSDCONV.man:
--------------------------------------------------------------------------------
1 | INPUT
2 | DESC internal format in hexadecimal form
3 | EXAMPLE 019644
4 |
5 | OUTPUT
6 | DESC binary form of input
7 |
8 | EXAMPLE
9 | > echo 016e2c|bsdconv bsdconv:utf-8
10 | 測
11 | > echo 016e2c,018a66 | bsdconv bsdconv:utf-8
12 | 測試
13 |
--------------------------------------------------------------------------------
/tools/setEnvVar.bat:
--------------------------------------------------------------------------------
1 | @echo off
2 | reg add "HKLM\SYSTEM\CurrentControlSet\Control\Session Manager\Environment" /v BSDCONV_PATH /t REG_SZ /d %~dp0 /f
3 | reg add "HKLM\SYSTEM\CurrentControlSet\Control\Session Manager\Environment" /v Path /t REG_EXPAND_SZ /d %PATH%;%~dp0 /f
4 | pause
--------------------------------------------------------------------------------
/modules/filter/LAO.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x0E80, 0x0EFF }, // Lao
9 | };
10 | #include "unicode_range.c"
11 |
--------------------------------------------------------------------------------
/modules/filter/THAI.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x0E00, 0x0E7F }, // Thai
9 | };
10 | #include "unicode_range.c"
11 |
--------------------------------------------------------------------------------
/modules/filter/HEBREW.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x0590, 0x05FF }, // Hebrew
9 | };
10 | #include "unicode_range.c"
11 |
--------------------------------------------------------------------------------
/modules/filter/KANNADA.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x0C80, 0x0CFF }, // Kannada
9 | };
10 | #include "unicode_range.c"
11 |
--------------------------------------------------------------------------------
/modules/filter/MIAO.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x16F00, 0x16F9F }, // Miao
9 | };
10 | #include "unicode_range.c"
11 |
--------------------------------------------------------------------------------
/modules/filter/SYRIAC.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x0700, 0x074F }, // Syriac
9 | };
10 | #include "unicode_range.c"
11 |
--------------------------------------------------------------------------------
/modules/filter/TAGALOG.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x1700, 0x171F }, // Tagalog
9 | };
10 | #include "unicode_range.c"
11 |
--------------------------------------------------------------------------------
/modules/filter/TAMIL.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x0B80, 0x0BFF }, // Tamil
9 | };
10 | #include "unicode_range.c"
11 |
--------------------------------------------------------------------------------
/modules/filter/TELUGU.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x0C00, 0x0C7F }, // Telugu
9 | };
10 | #include "unicode_range.c"
11 |
--------------------------------------------------------------------------------
/modules/filter/TIBETAN.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x0F00, 0x0FFF }, // Tibetan
9 | };
10 | #include "unicode_range.c"
11 |
--------------------------------------------------------------------------------
/modules/filter/ARMENIAN.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x0530, 0x058F }, // Armenian
9 | };
10 | #include "unicode_range.c"
11 |
--------------------------------------------------------------------------------
/modules/filter/CHEROKEE.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x13A0, 0x13FF }, // Cherokee
9 | };
10 | #include "unicode_range.c"
11 |
--------------------------------------------------------------------------------
/modules/filter/EMOTICON.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x1F600, 0x1F64F }, // Emoticons
9 | };
10 | #include "unicode_range.c"
11 |
--------------------------------------------------------------------------------
/modules/filter/HIRAGANA.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x3040, 0x309F }, // Hiragana
9 | };
10 | #include "unicode_range.c"
11 |
--------------------------------------------------------------------------------
/modules/filter/IPA.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x0250, 0x02AF }, // IPA Extensions
9 | };
10 | #include "unicode_range.c"
11 |
--------------------------------------------------------------------------------
/modules/filter/JAVANESE.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0xA980, 0xA9DF }, // Javanese
9 | };
10 | #include "unicode_range.c"
11 |
--------------------------------------------------------------------------------
/modules/filter/MALAYALAM.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x0D00, 0x0D7F }, // Malayalam
9 | };
10 | #include "unicode_range.c"
11 |
--------------------------------------------------------------------------------
/modules/filter/MONGOLIAN.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x1800, 0x18AF }, // Mongolian
9 | };
10 | #include "unicode_range.c"
11 |
--------------------------------------------------------------------------------
/modules/filter/SAMARITAN.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x0800, 0x083F }, // Samaritan
9 | };
10 | #include "unicode_range.c"
11 |
--------------------------------------------------------------------------------
/modules/filter/TIFINAGH.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x2D30, 0x2D7F }, // Tifinagh
9 | };
10 | #include "unicode_range.c"
11 |
--------------------------------------------------------------------------------
/modules/inter/SCORE-TRAIN.man:
--------------------------------------------------------------------------------
1 | DESC
2 | Generate score table according to input data.
3 | If environment variable BSDCONV_SCORE is presented, it will be used as score table path, else if ~/.bsdconv.score is presented, it will be used as score table.
4 |
5 | SEEALSO
6 | inter/SCORE#EXAMPLE
7 |
--------------------------------------------------------------------------------
/modules/filter/BRAILLE.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x2800, 0x28FF }, // Braille Patterns
9 | };
10 | #include "unicode_range.c"
11 |
--------------------------------------------------------------------------------
/modules/filter/CURRENCY.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x20A0, 0x20CF }, // Currency Symbols
9 | };
10 | #include "unicode_range.c"
11 |
--------------------------------------------------------------------------------
/modules/filter/MAHJONG.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x1F000, 0x1F02F }, // Mahjong Tiles
9 | };
10 | #include "unicode_range.c"
11 |
--------------------------------------------------------------------------------
/modules/inter/ALIAS-FILTER.txt:
--------------------------------------------------------------------------------
1 | 0130,0131 0155,014E,0149,0143,014F,0144,0145
2 | 0131 0155,014E,0149,0143,014F,0144,0145
3 | 0130,0132 0143,014E,0153,0131,0131,0136,0134,0133
4 | 0132 0143,014E,0153,0131,0131,0136,0134,0133
5 | 0130,0133 0142,0159,0154,0145
6 | 0133 0142,0159,0154,0145
7 | 0131,0142 0141,014E,0153,0149
8 |
--------------------------------------------------------------------------------
/modules/inter/PASS.man:
--------------------------------------------------------------------------------
1 | INPUT
2 | TYPE Any
3 |
4 | OUTPUT
5 | TYPE Any
6 | DESC Filtered according to arguments
7 |
8 | ARGUMENT
9 | FOR
10 | FORMAT ENUM{UNICODE,1,CNS11643,2,BYTE,3,ANSI,1B}
11 | DESC Input type whitelist
12 | LIMIT
13 | FORMAT INTEGER
14 | DESC Limit the number of packets to passed
15 |
--------------------------------------------------------------------------------
/modules/filter/KHMER.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x1780, 0x17FF }, // Khmer
9 | { 0x19E0, 0x19FF }, // Khmer Symbols
10 | };
11 | #include "unicode_range.c"
12 |
--------------------------------------------------------------------------------
/modules/filter/YI.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0xA000, 0xA48F }, // Yi Syllables
9 | { 0xA490, 0xA4CF }, // Yi Radicals
10 | };
11 | #include "unicode_range.c"
12 |
--------------------------------------------------------------------------------
/modules/inter/ALIAS-FROM.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | #for internal use
3 | > echo BIG5 | bsdconv ascii:from_alias:ascii
4 | UAO250
5 | > echo UAO250 | bsdconv ascii:from_alias:ascii
6 | ASCII,_UAO250
7 | > echo LOCALE | bsdconv ascii:from_alias:ascii
8 | UTF-8
9 | > echo UTF-8 | bsdconv ascii:from_alias:ascii
10 | ASCII,_UTF-8
11 |
--------------------------------------------------------------------------------
/modules/inter/INSERT.man:
--------------------------------------------------------------------------------
1 | INPUT
2 | TYPE Any
3 |
4 | OUTPUT
5 | TYPE Any
6 | DESC Append ',' (in term of bsdconv-keyword)
7 |
8 | ARGUMENT
9 | AFTER
10 | TYPE Hex Value List
11 | BEFORE
12 | TYPE Hex Value List
13 |
14 | EXAMPLE:
15 | printf test|bsdconv utf-8:insert#after=002c:bsdconv-keyword,utf-8
16 | t,e,s,t,
17 |
--------------------------------------------------------------------------------
/modules/filter/DEVANAGARI.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x0900, 0x097F }, // Devanagari
9 | { 0xA8E0, 0xA8FF }, // Devanagari Extended
10 | };
11 | #include "unicode_range.c"
12 |
--------------------------------------------------------------------------------
/modules/filter/SINHALA.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x0D80, 0x0DFF }, // Sinhala
9 | { 0x111E0, 0x111FF }, // Sinhala Archaic Numbers
10 | };
11 | #include "unicode_range.c"
12 |
--------------------------------------------------------------------------------
/modules/filter/SUNDANESE.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x1B80, 0x1BBF }, // Sundanese
9 | { 0x1CC0, 0x1CCF }, // Sundanese Supplement
10 | };
11 | #include "unicode_range.c"
12 |
--------------------------------------------------------------------------------
/modules/filter/KATAKANA.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x30A0, 0x30FF }, // Katakana
9 | { 0x31F0, 0x31FF }, // Katakana Phonetic Extensions
10 | };
11 | #include "unicode_range.c"
12 |
--------------------------------------------------------------------------------
/modules/filter/LATIN1.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Reference: http://en.wikipedia.org/wiki/Windows-1252
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x0, 0x80 },
9 | { 0x82, 0x8C },
10 | { 0x8E, 0x8E },
11 | { 0x91, 0x9C },
12 | { 0x9E, 0xFF },
13 | };
14 |
15 | #include "unicode_range.c"
16 |
--------------------------------------------------------------------------------
/tools/nfkc_gen.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # python nfkc_gen.py '⁰¹²³'|sort|uniq
3 | import sys
4 | from bsdconv import Bsdconv
5 |
6 | nfkc = Bsdconv("utf-8:nfkc:utf-8")
7 | i = sys.argv[1].decode("utf-8")
8 | for c in i:
9 | c = c.encode("utf-8")
10 | d = nfkc.conv(c)
11 | if c==d:
12 | continue
13 | print("{}\t{}".format(d, c))
14 |
--------------------------------------------------------------------------------
/modules/filter/CUNEIFORM.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x12000, 0x123FF }, // Cuneiform
9 | { 0x12400, 0x1247F }, // Cuneiform Numbers and Punctuation
10 | };
11 | #include "unicode_range.c"
12 |
--------------------------------------------------------------------------------
/modules/inter/alias:
--------------------------------------------------------------------------------
1 | NFD _NFD:_NF-HANGUL-DECOMPOSITION:_NF-ORDER
2 | NFKD _NFKD:_NF-HANGUL-DECOMPOSITION:_NF-ORDER
3 | NFC NFD:_NFC:_NF-HANGUL-COMPOSITION
4 | NFKC NFKD:_NFC:_NF-HANGUL-COMPOSITION
5 | NFKD-CASEFOLD NFD:CASEFOLD:NFKD:CASEFOLD:NFKD
6 | SPLIT INSERT#AFTER=002C
7 | NOBOM REPLACE#01FEFF
8 | #compatibility
9 | ZH-STRINGS STRINGS#FOR=CJK
10 |
--------------------------------------------------------------------------------
/modules/filter/EGYPTIAN.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x13000, 0x1342F }, // Egyptian Hieroglyphs
9 | { 0x13430, 0x1343F }, // Egyptian Hieroglyph Format Controls
10 | };
11 | #include "unicode_range.c"
12 |
--------------------------------------------------------------------------------
/modules/scorer/LATIN1.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Reference: http://en.wikipedia.org/wiki/Windows-1252
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range_with_score ranges[] = {
8 | { 0x0, 0x80, 2 },
9 | { 0x82, 0x8C, 2 },
10 | { 0x8E, 0x8E, 2 },
11 | { 0x91, 0x9C, 2 },
12 | { 0x9E, 0xFF, 2 },
13 | };
14 |
15 | #include "unicode_range.c"
16 |
--------------------------------------------------------------------------------
/modules/filter/GEORGIAN.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x10A0, 0x10FF }, // Georgian
9 | { 0x1C90, 0x1CBF }, // Georgian Extended
10 | { 0x2D00, 0x2D2F }, // Georgian Supplement
11 | };
12 | #include "unicode_range.c"
13 |
--------------------------------------------------------------------------------
/modules/filter/MYANMAR.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x1000, 0x109F }, // Myanmar
9 | { 0xA9E0, 0xA9FF }, // Myanmar Extended-B
10 | { 0xAA60, 0xAA7F }, // Myanmar Extended-A
11 | };
12 | #include "unicode_range.c"
13 |
--------------------------------------------------------------------------------
/modules/from/BSDCONV-LOG.man:
--------------------------------------------------------------------------------
1 | INPUT
2 | DESC Any
3 |
4 | OUTPUT
5 | DESC None, data is written into log file
6 |
7 | ENV
8 | BSDCONV_FROM_LOG log file
9 |
10 | EXAMPLE
11 | #background debug tool
12 | > echo 測試 | env BSDCONV_FROM_LOG=/tmp/bsdconv.log bsdconv bsdconv-log:utf-8
13 | > cat /tmp/bsdconv.log
14 | E6
15 | B8
16 | AC
17 | E8
18 | A9
19 | A6
20 | 0A
21 |
--------------------------------------------------------------------------------
/modules/from/_UTF-8.man:
--------------------------------------------------------------------------------
1 | UTF-8, ASCII excluded
2 |
3 | ARGUMENT
4 | CESU
5 | DESC Decode surrogate pairs
6 | LOOSE
7 | DESC Accept undecoded surrogates
8 | NUL
9 | DESC Accept NUL character, must used with OVERLONG, since ASCII is excluded, so they must be overlong
10 | OVERLONG
11 | DESC Accept overlong sequence
12 | SUPER
13 | DESC Accept code point over U+10FFFF
14 |
--------------------------------------------------------------------------------
/modules/to/ASCII-HTML-INFO.man:
--------------------------------------------------------------------------------
1 | EXAMPLE
2 | > echo 測 | bsdconv utf-8:ascii-html-info
3 | 
4 |
--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
1 | Welcome to bsdconv's documentation!
2 | ===================================
3 |
4 | Contents:
5 |
6 | .. toctree::
7 | :maxdepth: 2
8 |
9 | api
10 | go-bsdconv/doc/api
11 | php-bsdconv/doc/api
12 | python-bsdconv/doc/api
13 | ruby-bsdconv/doc/api
14 |
15 |
16 | Indices and tables
17 | ==================
18 |
19 | * :ref:`genindex`
20 | * :ref:`search`
21 |
22 |
--------------------------------------------------------------------------------
/modules/filter/PHONETIC.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x0250, 0x02AF }, // IPA Extensions
9 | { 0x1D00, 0x1D7F }, // Phonetic Extensions
10 | { 0x1D80, 0x1DBF }, // Phonetic Extensions Supplement
11 | };
12 | #include "unicode_range.c"
13 |
--------------------------------------------------------------------------------
/modules/filter/MUSIC.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x1D000, 0x1D0FF }, // Byzantine Musical Symbols
9 | { 0x1D100, 0x1D1FF }, // Musical Symbols
10 | { 0x1D200, 0x1D24F }, // Ancient Greek Musical Notation
11 | };
12 | #include "unicode_range.c"
13 |
--------------------------------------------------------------------------------
/modules/filter/PUA.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0xE000, 0xF8FF }, // Private Use Area
9 | { 0xF0000, 0xFFFFF }, // Supplementary Private Use Area-A
10 | { 0x100000, 0x10FFFF }, // Supplementary Private Use Area-B
11 | };
12 | #include "unicode_range.c"
13 |
--------------------------------------------------------------------------------
/modules/from/ANSI-CONTROL.man:
--------------------------------------------------------------------------------
1 | INPUT
2 | DESC ANSI control sequence
3 | EXAMPLE "\x1B[30m"
4 |
5 | OUTPUT
6 | TYPE 1B (ANSI CONTROL SEQUENCE)
7 | DESC Input as-is
8 | EXAMPLE 1B5B33306D
9 |
10 | EXAMPLE
11 | #used in https://github.com/buganini/bug5
12 | #used with inter/BIG5-DEFRAG
13 | > perl -e 'print "a\033[1mb"' | bsdconv ansi-control,utf-8:bsdconv-stdout
14 | 0161
15 | 1B5B316D ( FREE )
16 | 0162
17 |
--------------------------------------------------------------------------------
/modules/filter/CJK.man:
--------------------------------------------------------------------------------
1 | U+3400..U+4DB5 CJK Unified Ideographs Extension A 3.0
2 | U+4E00..U+9FA5 CJK Unified Ideographs 1.1
3 | U+9FA6..U+9FBB CJK Unified Ideographs 4.1
4 | U+F900..U+FA2D CJK Compatibility Ideographs 1.1
5 | U+FA30..U+FA6A CJK Compatibility Ideographs 3.2
6 | U+FA70..U+FAD9 CJK Compatibility Ideographs 4.1
7 | U+20000..U+2A6D6 CJK Unified Ideographs Extension B 3.1
8 | U+2F800..U+2FA1D CJK Compatibility Supplement 3.1
9 |
--------------------------------------------------------------------------------
/modules/filter/ETHIOPIC.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x1200, 0x137F }, // Ethiopic
9 | { 0x1380, 0x139F }, // Ethiopic Supplement
10 | { 0x2D80, 0x2DDF }, // Ethiopic Extended
11 | { 0xAB00, 0xAB2F }, // Ethiopic Extended-A
12 | };
13 | #include "unicode_range.c"
14 |
--------------------------------------------------------------------------------
/modules/from/FALLBACK-UNICODE.man:
--------------------------------------------------------------------------------
1 | DESC
2 | This could be used with windows codepages to achieve the behavior described in MICSFT/WindowsBestFit
3 |
4 | INPUT
5 | DESC Any
6 |
7 | OUTPUT
8 | TYPE Unicode
9 | DESC Same value of codepoint as input
10 |
11 | EXAMPLE
12 | > printf "\x8E\x81" | bsdconv cp1252:bsdconv-stdout
13 | 01017D
14 | > printf "\x8E\x81" | bsdconv cp1252,fallback-unicode:bsdconv-stdout
15 | 01017D
16 | 0181
17 |
--------------------------------------------------------------------------------
/modules/to/BSDCONV-KEYWORD.man:
--------------------------------------------------------------------------------
1 | INPUT
2 | DESC Products of from/BSDCONV-KEYWORD
3 |
4 | OUTPUT
5 | DESC Original form of data from from/BSDCONV-KEYWORD
6 |
7 | EXAMPLE
8 | > printf "測,試\t测,试\n" | bsdconv bsdconv-keyword,utf-8:bsdconv-keyword,bsdconv
9 | 016E2C,018A66 016D4B,018BD5
10 | > printf "測,試\t测,试\n" | bsdconv bsdconv-keyword,utf-8:bsdconv-keyword,bsdconv | bsdconv bsdconv-keyword,bsdconv:bsdconv-keyword,utf-8
11 | 測,試 测,试
12 |
--------------------------------------------------------------------------------
/modules/filter/GREEK.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x0370, 0x03FF }, // Greek and Coptic
9 | { 0x1F00, 0x1FFF }, // Greek Extended
10 | { 0x10140, 0x1018F }, // Ancient Greek Numbers
11 | { 0x1D200, 0x1D24F }, // Ancient Greek Musical Notation
12 | };
13 | #include "unicode_range.c"
14 |
--------------------------------------------------------------------------------
/modules/inter/AMBIGUOUS-UNPAD.man:
--------------------------------------------------------------------------------
1 | DESC Unpad padded ambiguous width characters
2 |
3 | INPUT
4 | TYPE Unicode
5 |
6 | OUTPUT
7 | TYPE Unicode
8 | DESC Remove U+000A after ambiguous characters
9 |
10 | CTL
11 | BSDCONV_AMBIGUOUS_PAD
12 | PTR
13 | don't care
14 | INT
15 | 1 Enable
16 | 2 Disable
17 |
18 | EXAMPLE
19 | #used in https://github.com/buganini/bug5
20 | > echo Б И 2|bsdconv utf-8:ambiguous-unpad:utf-8
21 | БИ 2
22 |
--------------------------------------------------------------------------------
/modules/to/ANY.man:
--------------------------------------------------------------------------------
1 | INPUT
2 | DESC Any byte
3 | EXAMPLE *
4 |
5 | OUTPUT
6 | TYPE Sepcified by argument
7 | DESC Sepcified by argument
8 |
9 | ARGUMENT
10 | ERROR
11 | DESC Increase counter (OERR if no counter name specified)
12 | DROP
13 | DESC Don't output any data
14 | $DataList
15 | DESC Output value
16 | EXAMPLE 3F
17 | EXAMPLE 3F.2F
18 |
19 | EXAMPLE
20 | > echo -n test測試test |bsdconv utf-8:ascii,any#3f
21 | test??test
22 |
--------------------------------------------------------------------------------
/modules/filter/CYRILLIC.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x0400, 0x04FF }, // Cyrillic
9 | { 0x0500, 0x052F }, // Cyrillic Supplement
10 | { 0x1C80, 0x1C8F }, // Cyrillic Extended-C
11 | { 0x2DE0, 0x2DFF }, // Cyrillic Extended-A
12 | { 0xA640, 0xA69F }, // Cyrillic Extended-B
13 | };
14 | #include "unicode_range.c"
15 |
--------------------------------------------------------------------------------
/tools/mkalias.py:
--------------------------------------------------------------------------------
1 | import os,sys,re;
2 |
3 | def f(c):
4 | s="%X" % ord(c)
5 | if len(s)%2:
6 | r='010%s' % s;
7 | else:
8 | r='01%s' % s;
9 | return r;
10 |
11 | fi=open(sys.argv[1],'rU');
12 | fo=open(sys.argv[2],'w');
13 |
14 | for l in fi:
15 | if l.find('\t')!=-1:
16 | a,b=re.split('\t+',l.strip());
17 | a=','.join([f(c) for c in a]);
18 | b=','.join([f(c) for c in b]);
19 | fo.write("%s\t%s\n" % (a,b));
20 |
21 | fi.close();
22 | fo.close();
23 |
24 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "doc/python-bsdconv"]
2 | path = doc/python-bsdconv
3 | url = https://github.com/buganini/python-bsdconv.git
4 | [submodule "doc/php-bsdconv"]
5 | path = doc/php-bsdconv
6 | url = https://github.com/buganini/php-bsdconv.git
7 | [submodule "doc/ruby-bsdconv"]
8 | path = doc/ruby-bsdconv
9 | url = https://github.com/buganini/ruby-bsdconv.git
10 | [submodule "doc/go-bsdconv"]
11 | path = doc/go-bsdconv
12 | url = https://github.com/buganini/go-bsdconv.git
13 |
--------------------------------------------------------------------------------
/modules/filter/HANGUL.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x1100, 0x11FF }, // Hangul Jamo
9 | { 0x3130, 0x318F }, // Hangul Compatibility Jamo
10 | { 0xA960, 0xA97F }, // Hangul Jamo Extended-A
11 | { 0xAC00, 0xD7AF }, // Hangul Syllables
12 | { 0xD7B0, 0xD7FF }, // Hangul Jamo Extended-B
13 | };
14 | #include "unicode_range.c"
15 |
--------------------------------------------------------------------------------
/modules/filter/ARROWS.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x2190, 0x21FF }, // Arrows
9 | { 0x27F0, 0x27FF }, // Supplemental Arrows-A
10 | { 0x2900, 0x297F }, // Supplemental Arrows-B
11 | { 0x2B00, 0x2BFF }, // Miscellaneous Symbols and Arrows
12 | { 0x1F800, 0x1F8FF }, // Supplemental Arrows-C
13 | };
14 | #include "unicode_range.c"
15 |
--------------------------------------------------------------------------------
/modules/to/PASS.man:
--------------------------------------------------------------------------------
1 | INPUT
2 | TYPE Any
3 |
4 | OUTPUT
5 | TYPE Any
6 | DESC Filtered or decorated according to arguments
7 |
8 | ARGUMENT
9 | MARK
10 | DESC Add "MARK" flag
11 | FOR
12 | FORMAT Filter
13 |
14 | EXAMPLE
15 | #pass through packets queue
16 | > echo -n abc | bsdconv 'ascii:pass|pass:bsdconv-stdout'
17 | 0161 ( SKIP )
18 | 0162 ( SKIP )
19 | 0163 ( SKIP )
20 | > echo -n 測試 | bsdconv 'utf-8:utf-16le|pass:bsdconv-stdout'
21 | 2C6E ( FREE )
22 | 668A ( FREE )
23 |
--------------------------------------------------------------------------------
/modules/from/PASS.man:
--------------------------------------------------------------------------------
1 | INPUT
2 | TYPE Any
3 |
4 | OUTPUT
5 | TYPE Any
6 | DESC Filtered or decorated according to arguments
7 |
8 | ARGUMENT
9 | UNMARK
10 | DESC Remove "MARK" flag
11 | FOR
12 | FORMAT Filter
13 |
14 | EXAMPLE
15 | #pass through packets queue
16 | > echo -n abc | bsdconv 'ascii:pass|pass:bsdconv-stdout'
17 | 0161 ( SKIP )
18 | 0162 ( SKIP )
19 | 0163 ( SKIP )
20 | > echo -n 測試 | bsdconv 'utf-8:utf-16le|pass:bsdconv-stdout'
21 | 2C6E ( FREE )
22 | 668A ( FREE )
23 |
--------------------------------------------------------------------------------
/modules/filter/PUNCTUATION.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x2000, 0x206F }, // General Punctuation
9 | { 0x2E00, 0x2E7F }, // Supplemental Punctuation
10 | { 0x3000, 0x303F }, // CJK Symbols and Punctuation
11 | { 0x12400, 0x1247F }, // Cuneiform Numbers and Punctuation
12 | { 0x16FE0, 0x16FFF }, // Ideographic Symbols and Punctuation
13 | };
14 | #include "unicode_range.c"
15 |
--------------------------------------------------------------------------------
/tools/_bsdconv-completion.zsh:
--------------------------------------------------------------------------------
1 | #compdef bsdconv
2 |
3 | _bsdconv() {
4 |
5 | _arguments \
6 | '1: :->first'\
7 | '2: :->second'\
8 | '*: :->files'
9 |
10 | case $state in
11 | first)
12 | if [ "$words[2]" = "-" ]
13 | then
14 | compadd -- "-l"
15 | else
16 | compadd `bsdconv-completion "$words[2]"`
17 | fi
18 | ;;
19 | second)
20 | if [ "$words[3]" = "-" ]
21 | then
22 | compadd -- "-i"
23 | else
24 | _files
25 | fi
26 | ;;
27 | files)
28 | _files
29 | ;;
30 | esac
31 | }
32 |
33 | _bsdconv "$@"
34 |
--------------------------------------------------------------------------------
/modules/from/BSDCONV-KEYWORD.man:
--------------------------------------------------------------------------------
1 | DESC
2 | Usually used for maintain bsdconv inter-mapping tables
3 |
4 | INPUT
5 | DESC Control characters for bsdconv table format
6 | EXAMPLE \n\t,?\\
7 |
8 | OUTPUT
9 | DESC Properly escaped form for to/BSDCONV-KEYWORD
10 |
11 | EXAMPLE
12 | > printf "測,試\t测,试\n" | bsdconv bsdconv-keyword,utf-8:bsdconv-keyword,bsdconv
13 | 016E2C,018A66 016D4B,018BD5
14 | > printf "測,試\t测,试\n" | bsdconv bsdconv-keyword,utf-8:bsdconv-keyword,bsdconv | bsdconv bsdconv-keyword,bsdconv:bsdconv-keyword,utf-8
15 | 測,試 测,试
16 |
--------------------------------------------------------------------------------
/modules/inter/TRIM-WIDTH.man:
--------------------------------------------------------------------------------
1 | INPUT
2 | TYPE ANY
3 |
4 | OUTPUT
5 | TYPE ANY
6 | DESC Trimed content
7 |
8 | ARGUMENT
9 | AMBIGUOUS-AS-WIDE
10 | AMBI-AS-WIDE
11 | Counter ambiguous width characters' width as 2
12 | $Integer (mandatory)
13 | The width of the desired trim
14 |
15 | EXAMPLE
16 | > echo ˋˊ這是個很長的字串啊啊啊 | bsdconv "utf-8:trim-width#22&ambi-as-wide:utf-8"
17 | ˋˊ這是個很長的字串啊
18 | > echo ˋˊ這是個很長的字串啊啊啊 | bsdconv utf-8:trim-width#22:utf-8
19 | ˋˊ這是個很長的字串啊啊
20 | > echo 三長兩短ˊˋ3長2短 | bsdconv utf-8:trim-width#10&ambiguous-as-wide:utf-8
21 |
--------------------------------------------------------------------------------
/src/fmalloc.h:
--------------------------------------------------------------------------------
1 | #ifndef FMALLOC_H
2 | #define FMALLOC_H
3 |
4 | #ifdef USE_FMALLOC
5 |
6 | void * fmalloc(size_t s);
7 | void fmfree(void *p);
8 | void fmsync(void);
9 | void fmcleanup(void);
10 |
11 | struct fmalloc_entry {
12 | void *z;
13 | size_t offset;
14 | int fd;
15 | struct fmalloc_entry *next;
16 | };
17 |
18 | #define FMALLOC_SIZE 256*1024*1024
19 | #define FMALLOC_NUM 6
20 | #define FMALLOC(X) fmalloc(X)
21 | #define FFREE(X) fmfree(X)
22 |
23 | #else
24 |
25 | #define FMALLOC(X) malloc(X)
26 | #define FFREE(X) free(X)
27 |
28 | #endif
29 |
30 | #endif
31 |
--------------------------------------------------------------------------------
/modules/inter/STRINGS.man:
--------------------------------------------------------------------------------
1 | DESC
2 | Similiar to STRINGS(1), but target for characters with specified filter
3 |
4 | ARGUMENT
5 | FOR
6 | FORMAT FILTER
7 | MIN-LEN
8 | FROMAT INTEGER
9 | AFTER
10 | TYPE Hex Value List
11 | DEFAULT 010A
12 | BEFORE
13 | TYPE Hex Value List
14 |
15 | EXAMPLE
16 | > echo abd測試efg功能,hij | bsdconv utf-8:strings#for=cjk:utf-8
17 | 測試
18 | 功能
19 |
20 | > echo aㄎabㄎabcㄉabcd|bsdconv utf-8:strings#min-len=3:utf-8
21 | abc
22 | abcd
23 |
24 | > echo aㄎabㄎabcㄉabcd | bsdconv 'utf-8:strings#min-len=3&sep=013b.010a:utf-8'
25 | abc;
26 | abcd;
27 |
--------------------------------------------------------------------------------
/modules/filter/LATIN.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x0000, 0x007F }, // Basic Latin
9 | { 0x0080, 0x00FF }, // Latin-1 Supplement
10 | { 0x0100, 0x017F }, // Latin Extended-A
11 | { 0x0180, 0x024F }, // Latin Extended-B
12 | { 0x1E00, 0x1EFF }, // Latin Extended Additional
13 | { 0x2C60, 0x2C7F }, // Latin Extended-C
14 | { 0xA720, 0xA7FF }, // Latin Extended-D
15 | { 0xAB30, 0xAB6F }, // Latin Extended-E
16 | };
17 | #include "unicode_range.c"
18 |
--------------------------------------------------------------------------------
/modules/filter/MATH.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x2200, 0x22FF }, // Mathematical Operators
9 | { 0x27C0, 0x27EF }, // Miscellaneous Mathematical Symbols-A
10 | { 0x2980, 0x29FF }, // Miscellaneous Mathematical Symbols-B
11 | { 0x2A00, 0x2AFF }, // Supplemental Mathematical Operators
12 | { 0x1D400, 0x1D7FF }, // Mathematical Alphanumeric Symbols
13 | { 0x1EE00, 0x1EEFF }, // Arabic Mathematical Alphabetic Symbols
14 | };
15 | #include "unicode_range.c"
16 |
--------------------------------------------------------------------------------
/modules/inter/AMBIGUOUS-PAD.man:
--------------------------------------------------------------------------------
1 | DESC Pad ambiguous width characters
2 |
3 | INPUT
4 | TYPE Unicode
5 |
6 | OUTPUT
7 | TYPE Unicode
8 | DESC Append U+000A after ambiguous characters
9 |
10 | CTL
11 | BSDCONV_AMBIGUOUS_PAD
12 | PTR
13 | don't care
14 | INT
15 | 1 Enable
16 | 2 Disable
17 |
18 | EXAMPLE
19 | #used in https://github.com/buganini/bug5
20 | > echo БИ 2 | bsdconv utf-8:ambiguous-pad:utf-8
21 | Б И 2
22 | > echo БИ 2 | bsdconv utf-8:ambiguous-pad:utf-8 | hexdump -C
23 | 00000000 d0 91 c2 a0 d0 98 c2 a0 20 32 0a |........ 2.|
24 | #(the padded character is U+00A0)
25 |
--------------------------------------------------------------------------------
/modules/inter/SUB.txt:
--------------------------------------------------------------------------------
1 | 0103B2 011D66
2 | 0103B3 011D67
3 | 0103C1 011D68
4 | 0103C6 011D69
5 | 0103C7 011D6A
6 | 012212 01208B
7 | 0128 01208D
8 | 0129 01208E
9 | 012B 01208A
10 | 0130 012080
11 | 0131 012081
12 | 0132 012082
13 | 0133 012083
14 | 0134 012084
15 | 0135 012085
16 | 0136 012086
17 | 0137 012087
18 | 0138 012088
19 | 0139 012089
20 | 013D 01208C
21 | 0161 012090
22 | 0165 012091
23 | 0168 012095
24 | 0169 011D62
25 | 016A 012C7C
26 | 016B 012096
27 | 016C 012097
28 | 016D 012098
29 | 016E 012099
30 | 016F 012092
31 | 0170 01209A
32 | 0172 011D63
33 | 0173 01209B
34 | 0174 01209C
35 | 0175 011D64
36 | 0176 011D65
37 | 0178 012093
38 |
--------------------------------------------------------------------------------
/modules/filter/ARABIC.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x0600, 0x06FF }, // Arabic
9 | { 0x0750, 0x077F }, // Arabic Supplement
10 | { 0x08A0, 0x08FF }, // Arabic Extended-A
11 | { 0xFB50, 0xFDFF }, // Arabic Presentation Forms-A
12 | { 0xFE70, 0xFEFF }, // Arabic Presentation Forms-B
13 | { 0x10A60, 0x10A7F }, // Old South Arabian
14 | { 0x10A80, 0x10A9F }, // Old North Arabian
15 | { 0x1EE00, 0x1EEFF }, // Arabic Mathematical Alphabetic Symbols
16 | };
17 | #include "unicode_range.c"
18 |
--------------------------------------------------------------------------------
/tools/AexcludeBCD.py:
--------------------------------------------------------------------------------
1 | # AexcludeBCD.py A B [C...]
2 |
3 | import sys
4 | import re
5 |
6 | sep = re.compile(r"\s+")
7 |
8 | excl={}
9 |
10 | for fn in sys.argv[2:]:
11 | f = open(fn)
12 | for l in f:
13 | l = l.strip()
14 | if l == "":
15 | continue
16 | if l.startswith("#"):
17 | continue
18 | a = sep.split(l)
19 | p = a[0].upper()
20 | excl[p] = 1
21 | f.close()
22 |
23 | f = open(sys.argv[1])
24 | for l in f:
25 | l2 = l.strip()
26 | if l2 == "":
27 | sys.stdout.write(l)
28 | continue
29 | if l2.startswith("#"):
30 | sys.stdout.write(l)
31 | continue
32 | a = sep.split(l2)
33 | p = a[0].upper()
34 | if p not in excl:
35 | sys.stdout.write(l)
36 | f.close()
37 |
--------------------------------------------------------------------------------
/modules/from/ANY.man:
--------------------------------------------------------------------------------
1 | INPUT
2 | DESC Any byte
3 | EXAMPLE *
4 |
5 | OUTPUT
6 | TYPE Sepcified by argument
7 | DESC Sepcified by argument
8 |
9 | ARGUMENT
10 | ERROR
11 | DESC Increase counter (IERR if no counter name specified)
12 | DROP
13 | DESC Don't output any data
14 | $DataList
15 | DESC Output value
16 | EXAMPLE 013F
17 | EXAMPLE 013F.012F
18 |
19 | EXAMPLE
20 | > echo test測試test | bsdconv ascii,any#013f&error:ascii
21 | test??????test
22 | > echo 陶喆測試|bsdconv utf-8:uao250|bsdconv cp950,3f:utf-8 #alias
23 | 陶?穘?試
24 | > echo 陶喆測試|bsdconv utf-8:uao250|bsdconv cp950,any#01fffd:utf-8
25 | 陶�穘�試
26 | > echo 陶喆測試|bsdconv utf-8:uao250|bsdconv cp950,sub:utf-8 #alias
27 | 陶�穘�試
28 |
--------------------------------------------------------------------------------
/modules/to/RAW.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include "../../src/bsdconv.h"
3 |
4 | void cbconv(struct bsdconv_instance *ins){
5 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
6 | int i;
7 |
8 | DATA_MALLOC(ins, this_phase->data_tail->next);
9 | this_phase->data_tail=this_phase->data_tail->next;
10 | this_phase->data_tail->next=NULL;
11 | this_phase->data_tail->len=ins->phase[ins->phase_index].curr->len-1;
12 | this_phase->data_tail->flags=F_FREE;
13 | this_phase->data_tail->data=malloc(this_phase->data_tail->len);
14 | for(i=0;idata_tail->len;++i){
15 | CP(this_phase->data_tail->data)[i]=CP(this_phase->curr->data)[i+1];
16 | }
17 | this_phase->state.status=NEXTPHASE;
18 | return;
19 | }
20 |
--------------------------------------------------------------------------------
/modules/from/BSDCONV-LOG.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "../../src/bsdconv.h"
4 |
5 | #define TAILIZE(p) while(*p){ p++ ;}
6 |
7 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){
8 | THIS_CODEC(ins)->priv=fopen(getenv("BSDCONV_FROM_LOG"),"a");
9 | return 0;
10 | }
11 |
12 | void cbdestroy(struct bsdconv_instance *ins){
13 | void *fp=THIS_CODEC(ins)->priv;
14 | fclose(fp);
15 | }
16 |
17 | void cbconv(struct bsdconv_instance *ins){
18 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
19 | FILE *fp=this_phase->codec[this_phase->index].priv;
20 | fprintf(fp,"%02X\n", (int)UCP(this_phase->curr->data)[this_phase->i]);
21 | this_phase->state.status=NEXTPHASE;
22 | fflush(fp);
23 | }
24 |
--------------------------------------------------------------------------------
/tools/zh_component.py:
--------------------------------------------------------------------------------
1 |
2 | #argv[1] CNS_component_word_yyyymmdd.txt
3 | #argv[2] CNS_component_yyyymmdd.txt
4 | import os,sys
5 |
6 | def p(s):
7 | if(len(s)%2):
8 | return '0'+s;
9 | return s;
10 |
11 | def tw(s):
12 | try:
13 | r=w[s];
14 | except:
15 | r=s;
16 | return r;
17 |
18 | w={};
19 | fi=open(sys.argv[1],'rU')
20 | for l in fi:
21 | a,b,c=l.strip().split('\t');
22 | w[int(a)]=int(b);
23 | fi.close()
24 |
25 | fi=open(sys.argv[2],'rU')
26 | for l in fi:
27 | a,b,c=l.strip().split('\t');
28 | a=p(a)
29 | b=p(b)
30 | cns="02%s%s" % (a,b)
31 | comps=c.strip(';').split(';');
32 | for comp in comps:
33 | com=','.join(["04"+p("%X" % tw(int(x.strip()))) for x in comp.strip(',').split(',')]);
34 | print "%s\t%s" % (cns,com)
35 |
--------------------------------------------------------------------------------
/src/missing_func.c:
--------------------------------------------------------------------------------
1 | #ifdef WIN32
2 |
3 | #include
4 | #include
5 | #include
6 | #include
7 |
8 | char * strsep(char **stringp, const char *delim){
9 | char *r=*stringp;
10 | if(!**stringp) return NULL;
11 | for(;**stringp && !strchr(delim, **stringp);++(*stringp));
12 | if(**stringp){
13 | **stringp=0x0;
14 | (*stringp)++;
15 | }
16 | return r;
17 | }
18 |
19 | char * strndup(const char *str, size_t len){
20 | char *r;
21 | size_t l=strlen(str);
22 | if(len
2 | #include
3 | #include "../../src/bsdconv.h"
4 |
5 | #define TAILIZE(p) while(*p){ p++ ;}
6 |
7 | void cbconv(struct bsdconv_instance *ins){
8 | int i;
9 | char *p;
10 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
11 |
12 | this_phase->state.status=NEXTPHASE;
13 |
14 | DATA_MALLOC(ins, this_phase->data_tail->next);
15 | this_phase->data_tail=this_phase->data_tail->next;
16 | this_phase->data_tail->next=NULL;
17 | this_phase->data_tail->flags=F_FREE;
18 |
19 | this_phase->data_tail->len=this_phase->curr->len*2;
20 | p=this_phase->data_tail->data=malloc(this_phase->data_tail->len+1);
21 | for(i=0;icurr->len;++i){
22 | sprintf(p,"%02X", UCP(this_phase->curr->data)[i]);
23 | TAILIZE(p);
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/modules/from/ESCAPE.man:
--------------------------------------------------------------------------------
1 | INPUT
2 | DESC URL escaped or C-style escaped sequence
3 | EXAMPLE %XX %uUUUU \uUUUU \xXX \OOO
4 |
5 | OUTPUT
6 | TYPE Unicode (for %uUUUU and \uUUUU) or Byte (others)
7 |
8 | EXAMPLE
9 | > echo %u6e2c | bsdconv escape:utf-8
10 | 測
11 | > echo %u6e2c%e8%a9%a6 | bsdconv escape:bsdconv-stdout
12 | 016E2C ( FREE )
13 | 03E8 ( FREE )
14 | 03A9 ( FREE )
15 | 03A6 ( FREE )
16 | > echo %u6e2c%e8%a9%a6 | bsdconv 'escape:unicode,byte|skip,utf-8:utf-8'
17 | 測試
18 | > echo %u6e2c%b8%d5功能 | bsdconv 'escape,utf-8:unicode,byte|skip,big5:utf-8'
19 | 測試功能
20 | > echo '%u6e2c%b8%d5功能' | bsdconv 'escape,ascii-numeric-html-entity,utf-8:unicode,byte|skip,big5:utf-8'
21 | 測試功能
22 | > echo '\346\270\254\350\251\246' | bsdconv 'escape:unicode,byte|skip,utf-8:utf-8'
23 | 測試
24 |
--------------------------------------------------------------------------------
/tools/codepage.py:
--------------------------------------------------------------------------------
1 | # python codepage.py CPXXXX.TXT | sort > _CPXXXX.txt
2 | import os
3 | import sys
4 | import re
5 |
6 | def v(s):
7 | return int(s, 16)
8 |
9 | def f1(s):
10 | s = s.strip()
11 | s = re.sub("^0x", "", s)
12 | return s
13 |
14 | def f2(s):
15 | s = s.strip()
16 | s = re.sub("^0x", "", s)
17 | s = s.lstrip("0")
18 | if len(s) & 1:
19 | s = "0"+s
20 | return "01"+s
21 |
22 | f = open(sys.argv[1])
23 | for l in f:
24 | l = l.strip()
25 | if l=="":
26 | continue
27 | if l.startswith("#"):
28 | continue
29 | l = l.split("#")[0].strip()
30 | a = l.split(" ")
31 | if len(a)<2:
32 | continue
33 | if v(a[0])<=0x7F and v(a[0])==v(a[1]):
34 | continue
35 | print f1(a[0])+"\t"+f2(a[1])
36 |
--------------------------------------------------------------------------------
/modules/from/_CP950.man:
--------------------------------------------------------------------------------
1 | #traditional chinese
2 |
3 | EXAMPLE
4 | #ascii is excluded, use same name without '_' prefixed to get ascii involved
5 | > perl -e 'print "\xa5\x5c\xaf\xe0"' | bsdconv _cp950:utf-8
6 | 功能
7 | > echo 喆 | bsdconv utf-8:_cp950,ascii | hexdump -C
8 | 00000000 0a |.|
9 | > echo 喆 | bsdconv utf-8:_uao241,ascii | hexdump -C
10 | 00000000 95 ed 0a |...|
11 | > echo 测试 | bsdconv utf-8:big5 | hexdump -C
12 | 00000000 0a |.|
13 | #cp950_trans is for transliteration
14 | > echo 测试 | bsdconv utf-8:big5,cp950_trans | hexdump -C
15 | 00000000 b4 fa b8 d5 0a |.....|
16 |
17 | SEEALSO
18 | from/CP950-UDA for UDA to PUA
19 |
--------------------------------------------------------------------------------
/modules/to/ESCAPE.man:
--------------------------------------------------------------------------------
1 | INPUT
2 | DESC Any byte
3 | EXAMPLE *
4 |
5 | OUTPUT
6 | TYPE Sepcified by argument
7 | DESC Sepcified by argument
8 |
9 | ARGUMENT
10 | PREFIX
11 | FORMAT Hex
12 | DESC Output prefix
13 | EXAMPLE 013F
14 | SUFFIX
15 | FORMAT Hex
16 | DESC Output suffix
17 | EXAMPLE 013F
18 | MODE
19 | FORMAT ENUM{hex,16,dec,10,oct,8}
20 | DESC Escape mode
21 | FOR
22 | FORMAT ENUM{UNICODE,1,BYTE,3}
23 | DESC Input type whitelist
24 |
25 | EXAMPLE
26 | > echo 測test試 | bsdconv 'utf-8:ascii,url' #URL === ESCAPE#MODE=16&PREFIX=2575,ESCAPE#MODE=16&PREFIX=25
27 | %u6E2Ctest%u8A66
28 | > echo 測test喆試 | bsdconv 'utf-8:big5,unicode|skip,ascii,byte:ascii,url'
29 | %B4%FAtest%u5586%B8%D5
30 | > echo -n test測試 | bsdconv 'utf-8:ascii,ESCAPE#FOR=UNICODE&MODE=16&PREFIX=5C75'
31 | test\u6e2c\u8a66
32 |
--------------------------------------------------------------------------------
/modules/to/UTF-32BE.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "../../src/bsdconv.h"
5 |
6 | void cbconv(struct bsdconv_instance *ins){
7 | char *data;
8 | unsigned int len, i;
9 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
10 | data=this_phase->curr->data;
11 |
12 | this_phase->state.status=NEXTPHASE;
13 | data+=1;
14 | len=this_phase->curr->len-1;
15 |
16 | DATA_MALLOC(ins, this_phase->data_tail->next);
17 | this_phase->data_tail=this_phase->data_tail->next;
18 | this_phase->data_tail->next=NULL;
19 | this_phase->data_tail->len=4;
20 | this_phase->data_tail->flags=F_FREE;
21 | this_phase->data_tail->data=malloc(4);
22 | for(i=0;i<4-len;++i){
23 | CP(this_phase->data_tail->data)[i]=0x0;
24 | }
25 | memcpy(CP(this_phase->data_tail->data)+i, data, len);
26 | }
27 |
--------------------------------------------------------------------------------
/modules/filter/unicode_range.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Some code come from http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | int cbfilter(struct data_rt *data){
8 | uint32_t ucs=0;
9 | int i;
10 | int max=sizeof(ranges) / sizeof(struct uint32_range) - 1;
11 | int min = 0;
12 | int mid;
13 |
14 | if(data->len<1 || UCP(data->data)[0]!=1){
15 | return 0;
16 | }
17 |
18 | for(i=1;ilen;++i){
19 | ucs<<=8;
20 | ucs|=UCP(data->data)[i];
21 | }
22 |
23 | if (ucs < ranges[0].first || ucs > ranges[max].last){
24 | //noop
25 | }else while (max >= min) {
26 | mid = (min + max) / 2;
27 | if (ucs > ranges[mid].last)
28 | min = mid + 1;
29 | else if (ucs < ranges[mid].first)
30 | max = mid - 1;
31 | else{
32 | return 1;
33 | }
34 | }
35 |
36 | return 0;
37 | }
38 |
--------------------------------------------------------------------------------
/modules/scorer/unicode_range.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Some code come from http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | uint32_t cbscorer(struct data_rt *data){
8 | uint32_t ucs=0;
9 | int i;
10 | int max=sizeof(ranges) / sizeof(struct uint32_range_with_score) - 1;
11 | int min = 0;
12 | int mid;
13 |
14 | if(data->len<1 || UCP(data->data)[0]!=1){
15 | return 0;
16 | }
17 |
18 | for(i=1;ilen;++i){
19 | ucs<<=8;
20 | ucs|=UCP(data->data)[i];
21 | }
22 |
23 | if (ucs < ranges[0].first || ucs > ranges[max].last){
24 | //noop
25 | }else while (max >= min) {
26 | mid = (min + max) / 2;
27 | if (ucs > ranges[mid].last)
28 | min = mid + 1;
29 | else if (ucs < ranges[mid].first)
30 | max = mid - 1;
31 | else{
32 | return ranges[mid].score;
33 | }
34 | }
35 |
36 | return 0;
37 | }
38 |
--------------------------------------------------------------------------------
/modules/inter/BONUS.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include "../../src/bsdconv.h"
3 |
4 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){
5 | THIS_CODEC(ins)->priv=bsdconv_counter(ins, "SCORE");
6 | return 0;
7 | }
8 |
9 | void cbconv(struct bsdconv_instance *ins){
10 | struct data_rt *data_ptr;
11 | bsdconv_counter_t *counter=THIS_CODEC(ins)->priv;
12 | unsigned char *data;
13 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
14 | struct data_st data_st;
15 | memcpy(&data_st, (char *)((this_phase->codec[this_phase->index].data_z)+(uintptr_t)this_phase->state.data), sizeof(struct data_st));
16 | data=UCP((THIS_CODEC(ins)->data_z)+(uintptr_t)de_offset(data_st.data));
17 |
18 | *counter += *data;
19 |
20 | LISTCPY_ST(ins, this_phase->data_tail, (void *)(uintptr_t)de_offset(data_st.next), THIS_CODEC(ins)->data_z);
21 |
22 | this_phase->state.status=NEXTPHASE;
23 | return;
24 | }
25 |
--------------------------------------------------------------------------------
/modules/to/UCS-2BE.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "../../src/bsdconv.h"
5 |
6 | void cbconv(struct bsdconv_instance *ins){
7 | char *data;
8 | unsigned int len, i;
9 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
10 | data=this_phase->curr->data;
11 | if(this_phase->curr->len > 3){
12 | this_phase->state.status=DEADEND;
13 | return;
14 | }
15 |
16 | this_phase->state.status=NEXTPHASE;
17 | data+=1;
18 | len=this_phase->curr->len-1;
19 |
20 | DATA_MALLOC(ins, this_phase->data_tail->next);
21 | this_phase->data_tail=this_phase->data_tail->next;
22 | this_phase->data_tail->next=NULL;
23 | this_phase->data_tail->len=2;
24 | this_phase->data_tail->flags=F_FREE;
25 | this_phase->data_tail->data=malloc(2);
26 | for(i=0;i<2-len;++i){
27 | CP(this_phase->data_tail->data)[i]=0x0;
28 | }
29 | memcpy(CP(this_phase->data_tail->data)+i, data, len);
30 | }
31 |
--------------------------------------------------------------------------------
/tools/findAinB.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import re
3 |
4 | sep = re.compile(r"\s+")
5 | stp = re.compile(r"^0[xX]")
6 |
7 | fa = open(sys.argv[1])
8 | fb = open(sys.argv[2])
9 |
10 | la = {}
11 | lb = {}
12 |
13 | for f,l in ((fa, la), (fb, lb)):
14 | for ln in f:
15 | ln = ln.strip().upper()
16 | if ln == "":
17 | continue
18 | if ln.startswith("#"):
19 | continue
20 | a = sep.split(ln)
21 | p = stp.sub("", a[0])
22 | l[p]=1
23 |
24 | allnotin = True
25 | allin = True
26 |
27 | total=0
28 | inc=0
29 | ninc=0
30 |
31 | for k in la:
32 | total+=1
33 | if k in lb:
34 | inc+=1
35 | print("IN\t%s " % k)
36 | allnotin = False
37 | else:
38 | ninc+=1
39 | print("NOTIN\t%s " % k)
40 | allin = False
41 |
42 | if allin:
43 | print("All In")
44 | elif allnotin:
45 | print("All Not In")
46 | else:
47 | print("Not All In")
48 | print("Total: ", total)
49 | print("In: ", inc)
50 | print("Not In: ", ninc)
51 |
--------------------------------------------------------------------------------
/modules/to/UTF-32LE.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "../../src/bsdconv.h"
5 |
6 | #define SWAP(a,b,i) ((i)=(a), (a)=(b), (b)=(i))
7 |
8 | void cbconv(struct bsdconv_instance *ins){
9 | char *data;
10 | unsigned int len, i;
11 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
12 | data=this_phase->curr->data;
13 |
14 | this_phase->state.status=NEXTPHASE;
15 | data+=1;
16 | len=this_phase->curr->len-1;
17 |
18 | DATA_MALLOC(ins, this_phase->data_tail->next);
19 | this_phase->data_tail=this_phase->data_tail->next;
20 | this_phase->data_tail->next=NULL;
21 | this_phase->data_tail->len=4;
22 | this_phase->data_tail->flags=F_FREE;
23 | this_phase->data_tail->data=malloc(4);
24 | for(i=0;i<4-len;++i){
25 | CP(this_phase->data_tail->data)[i]=0x0;
26 | }
27 | memcpy(CP(this_phase->data_tail->data)+i, data, len);
28 | data=this_phase->data_tail->data;
29 | SWAP(data[0],data[3],i);
30 | SWAP(data[1],data[2],i);
31 | return;
32 | }
33 |
--------------------------------------------------------------------------------
/modules/to/BSDCONV-LOG.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "../../src/bsdconv.h"
4 |
5 | #define TAILIZE(p) while(*p){ p++ ;}
6 |
7 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){
8 | THIS_CODEC(ins)->priv=fopen(getenv("BSDCONV_TO_LOG"),"a");
9 | return 0;
10 | }
11 |
12 | void cbdestroy(struct bsdconv_instance *ins){
13 | void *p=THIS_CODEC(ins)->priv;
14 | fclose(p);
15 | }
16 |
17 | void cbconv(struct bsdconv_instance *ins){
18 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
19 | FILE *fp=THIS_CODEC(ins)->priv;
20 | int i;
21 | this_phase->state.status=NEXTPHASE;
22 |
23 | for(i=0;icurr->len;++i){
24 | fprintf(fp,"%02X",UCP(this_phase->curr->data)[i]);
25 | }
26 | if(this_phase->curr->flags){
27 | fprintf(fp," (");
28 | if(this_phase->curr->flags & F_FREE) fprintf(fp, " FREE");
29 | if(this_phase->curr->flags & F_MARK) fprintf(fp, " MARK");
30 | fprintf(fp," )");
31 | }
32 | fprintf(fp,"\n");
33 | fflush(fp);
34 | }
35 |
--------------------------------------------------------------------------------
/modules/from/_JIS0201.txt:
--------------------------------------------------------------------------------
1 | # source: ftp://unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0201.TXT
2 | 5C 01A5
3 | 7E 01203E
4 | A1 01FF61
5 | A2 01FF62
6 | A3 01FF63
7 | A4 01FF64
8 | A5 01FF65
9 | A6 01FF66
10 | A7 01FF67
11 | A8 01FF68
12 | A9 01FF69
13 | AA 01FF6A
14 | AB 01FF6B
15 | AC 01FF6C
16 | AD 01FF6D
17 | AE 01FF6E
18 | AF 01FF6F
19 | B0 01FF70
20 | B1 01FF71
21 | B2 01FF72
22 | B3 01FF73
23 | B4 01FF74
24 | B5 01FF75
25 | B6 01FF76
26 | B7 01FF77
27 | B8 01FF78
28 | B9 01FF79
29 | BA 01FF7A
30 | BB 01FF7B
31 | BC 01FF7C
32 | BD 01FF7D
33 | BE 01FF7E
34 | BF 01FF7F
35 | C0 01FF80
36 | C1 01FF81
37 | C2 01FF82
38 | C3 01FF83
39 | C4 01FF84
40 | C5 01FF85
41 | C6 01FF86
42 | C7 01FF87
43 | C8 01FF88
44 | C9 01FF89
45 | CA 01FF8A
46 | CB 01FF8B
47 | CC 01FF8C
48 | CD 01FF8D
49 | CE 01FF8E
50 | CF 01FF8F
51 | D0 01FF90
52 | D1 01FF91
53 | D2 01FF92
54 | D3 01FF93
55 | D4 01FF94
56 | D5 01FF95
57 | D6 01FF96
58 | D7 01FF97
59 | D8 01FF98
60 | D9 01FF99
61 | DA 01FF9A
62 | DB 01FF9B
63 | DC 01FF9C
64 | DD 01FF9D
65 | DE 01FF9E
66 | DF 01FF9F
67 |
--------------------------------------------------------------------------------
/tools/simple_gen.py:
--------------------------------------------------------------------------------
1 | # simple_gen.py phase_type from_column to_column file
2 | import sys
3 | import re
4 |
5 | def bsdconv01(dt):
6 | dt=dt.strip().lstrip("0").upper()
7 | if len(dt) & 1:
8 | return "010"+dt
9 | else:
10 | return "01"+dt
11 |
12 | def raw(dt):
13 | return dt
14 |
15 | pt = sys.argv[1].upper()
16 | if pt == "FROM":
17 | ff = raw
18 | tf = bsdconv01
19 | elif pt == "INTER":
20 | ff = bsdconv01
21 | tf = bsdconv01
22 | else:
23 | ff = bsdconv01
24 | tf = raw
25 |
26 | stp = re.compile(r"^(U\+|0X)")
27 | sep = re.compile(r"\s+")
28 | vld = re.compile(r"^[a-fA-F0-9,]+$")
29 |
30 | from_column = int(sys.argv[2])
31 | to_column = int(sys.argv[3])
32 |
33 | f=open(sys.argv[4])
34 | for l in f:
35 | l = l.strip().upper()
36 | if l == "":
37 | continue
38 | if l.startswith("#"):
39 | continue
40 | a = sep.split(l)
41 | fr = stp.sub("", a[from_column])
42 | to = stp.sub("", a[to_column])
43 | if not vld.match(fr):
44 | continue
45 | if not vld.match(to):
46 | continue
47 |
48 | print("%s\t%s" % (ff(fr), tf(to)))
49 |
--------------------------------------------------------------------------------
/modules/to/UCS-2LE.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "../../src/bsdconv.h"
5 |
6 | #define SWAP(a,b,i) ((i)=(a), (a)=(b), (b)=(i))
7 |
8 | void cbconv(struct bsdconv_instance *ins){
9 | char *data;
10 | unsigned int len, i;
11 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
12 | data=this_phase->curr->data;
13 | if(this_phase->curr->len > 3){
14 | this_phase->state.status=DEADEND;
15 | return;
16 | }
17 | this_phase->state.status=NEXTPHASE;
18 | data+=1;
19 | len=this_phase->curr->len-1;
20 |
21 | DATA_MALLOC(ins, this_phase->data_tail->next);
22 | this_phase->data_tail=this_phase->data_tail->next;
23 | this_phase->data_tail->next=NULL;
24 | this_phase->data_tail->len=2;
25 | this_phase->data_tail->flags=F_FREE;
26 | this_phase->data_tail->data=malloc(2);
27 | for(i=0;i<2-len;++i){
28 | CP(this_phase->data_tail->data)[i]=0x0;
29 | }
30 | memcpy(CP(this_phase->data_tail->data)+i, data, len);
31 | data=this_phase->data_tail->data;
32 |
33 | SWAP(data[0],data[1],i);
34 | return;
35 | }
36 |
--------------------------------------------------------------------------------
/src/libbsdconv_counter.c:
--------------------------------------------------------------------------------
1 | bsdconv_counter_t * bsdconv_counter(struct bsdconv_instance *ins, const char *_key){
2 | struct bsdconv_counter_entry *p=ins->counter;
3 | struct bsdconv_counter_entry *t;
4 | char *key=strdup(_key);
5 | strtoupper(key);
6 | if(p==NULL){
7 | ins->counter=malloc(sizeof(struct bsdconv_counter_entry));
8 | ins->counter->key=key;
9 | ins->counter->val=0;
10 | ins->counter->next=0;
11 | return &ins->counter->val;
12 | }else{
13 | do{
14 | t=p;
15 | if(strcmp(p->key, key)==0){
16 | free(key);
17 | return &p->val;
18 | }
19 | p=p->next;
20 | }while(p!=NULL);
21 | t->next=malloc(sizeof(struct bsdconv_counter_entry));
22 | t=t->next;
23 | t->key=key;
24 | t->val=0;
25 | t->next=0;
26 | return &t->val;
27 | }
28 | }
29 |
30 | void bsdconv_counter_reset(struct bsdconv_instance *ins, const char *key){
31 | struct bsdconv_counter_entry *p=ins->counter;
32 | bsdconv_counter_t *v;
33 | if(key==NULL){
34 | while(p){
35 | p->val=0;
36 | p=p->next;
37 | }
38 | }else{
39 | v=bsdconv_counter(ins, key);
40 | *v=0;
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/modules/to/BSDCONV-OUTPUT.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "../../src/bsdconv.h"
5 |
6 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){
7 | FILE *fp=stdout;
8 | while(arg){
9 | if(strcasecmp(arg->key, "STDERR")==0){
10 | fp=stderr;
11 | }else if(strcasecmp(arg->key, "STDOUT")==0){
12 | fp=stdout;
13 | }else{
14 | return EINVAL;
15 | }
16 | arg=arg->next;
17 | }
18 | THIS_CODEC(ins)->priv=fp;
19 | return 0;
20 | }
21 |
22 | void cbconv(struct bsdconv_instance *ins){
23 | FILE *fp=THIS_CODEC(ins)->priv;
24 | int i;
25 | ins->phase[ins->phase_index].state.status=NEXTPHASE;
26 |
27 | for(i=0;iphase[ins->phase_index].curr->len;++i){
28 | fprintf(fp, "%02X",UCP(ins->phase[ins->phase_index].curr->data)[i]);
29 | }
30 | if(ins->phase[ins->phase_index].curr->flags){
31 | fprintf(fp, " (");
32 | if(ins->phase[ins->phase_index].curr->flags & F_FREE) fprintf(fp, " FREE");
33 | if(ins->phase[ins->phase_index].curr->flags & F_MARK) fprintf(fp, " MARK");
34 | fprintf(fp, " )");
35 | }
36 | fprintf(fp, "\n");
37 | }
38 |
--------------------------------------------------------------------------------
/src/libbsdconv_filter.c:
--------------------------------------------------------------------------------
1 | struct bsdconv_filter *load_filter(const char *_name){
2 | struct bsdconv_filter *filter;
3 |
4 | char *cwd;
5 | char *c;
6 | char path[PATH_MAX+1];
7 | char *name=strdup(_name);
8 | strtoupper(name);
9 |
10 | while(!bsdconv_module_check(FILTER, name)){
11 | c=bsdconv_solve_alias(FILTER, name);
12 | if(c==NULL || strcmp(c, name)==0){
13 | free(name);
14 | free(c);
15 | return NULL;
16 | }
17 | free(name);
18 | name=c;
19 | }
20 | cwd=getcwd(NULL, 0);
21 | if((c=getenv("BSDCONV_PATH"))){
22 | chdir(c);
23 | }else{
24 | chdir(BSDCONV_PATH);
25 | }
26 | chdir(MODULES_SUBPATH);
27 | chdir("filter");
28 | REALPATH(name, path);
29 | chdir(cwd);
30 | free(cwd);
31 | free(name);
32 | strcat(path, "." SHLIBEXT);
33 |
34 | filter=malloc(sizeof(struct bsdconv_filter));
35 | filter->so=OPEN_SHAREOBJECT(path);
36 | if(!filter->so){
37 | free(filter);
38 | return NULL;
39 | }
40 |
41 | filter->cbfilter=SHAREOBJECT_SYMBOL(filter->so, "cbfilter");
42 |
43 | return filter;
44 | }
45 |
46 | void unload_filter(struct bsdconv_filter *filter){
47 | CLOSE_SHAREOBJECT(filter->so);
48 | free(filter);
49 | }
50 |
--------------------------------------------------------------------------------
/src/libbsdconv_scorer.c:
--------------------------------------------------------------------------------
1 | struct bsdconv_scorer *load_scorer(const char *_name){
2 | struct bsdconv_scorer *scorer;
3 |
4 | char *cwd;
5 | char *c;
6 | char path[PATH_MAX+1];
7 | char *name=strdup(_name);
8 | strtoupper(name);
9 |
10 | while(!bsdconv_module_check(SCORER, name)){
11 | c=bsdconv_solve_alias(SCORER, name);
12 | if(c==NULL || strcmp(c, name)==0){
13 | free(name);
14 | free(c);
15 | return NULL;
16 | }
17 | free(name);
18 | name=c;
19 | }
20 | cwd=getcwd(NULL, 0);
21 | if((c=getenv("BSDCONV_PATH"))){
22 | chdir(c);
23 | }else{
24 | chdir(BSDCONV_PATH);
25 | }
26 | chdir(MODULES_SUBPATH);
27 | chdir("scorer");
28 | REALPATH(name, path);
29 | chdir(cwd);
30 | free(cwd);
31 | free(name);
32 | strcat(path, "." SHLIBEXT);
33 |
34 | scorer=malloc(sizeof(struct bsdconv_scorer));
35 | scorer->so=OPEN_SHAREOBJECT(path);
36 | if(!scorer->so){
37 | free(scorer);
38 | return NULL;
39 | }
40 |
41 | scorer->cbscorer=SHAREOBJECT_SYMBOL(scorer->so, "cbscorer");
42 |
43 | return scorer;
44 | }
45 |
46 | void unload_scorer(struct bsdconv_scorer *scorer){
47 | CLOSE_SHAREOBJECT(scorer->so);
48 | free(scorer);
49 | }
50 |
--------------------------------------------------------------------------------
/tools/gen_hex.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import sys
3 |
4 | if len(sys.argv) != 3:
5 | print("Usage: {} fromfile tofile".format(sys.argv[0]))
6 | sys.exit(1)
7 |
8 | fromfile = sys.argv[1]
9 | tofile = sys.argv[2]
10 |
11 | m = {
12 | "0": "0",
13 | "1": "1",
14 | "2": "2",
15 | "3": "3",
16 | "4": "4",
17 | "5": "5",
18 | "6": "6",
19 | "7": "7",
20 | "8": "8",
21 | "9": "9",
22 | "A": "aA",
23 | "B": "bB",
24 | "C": "cC",
25 | "D": "dD",
26 | "E": "eE",
27 | "F": "fF",
28 | }
29 |
30 | with open(tofile, "w") as tof:
31 | for i in range(256):
32 | hh = "{:02X}".format(i)
33 | bb = "".join(["{:02X}".format(ord(c)) for c in hh])
34 | tof.write("03{}\t{}\n".format(hh, bb))
35 |
36 | with open(fromfile, "w") as fromf:
37 | for i in range(256):
38 | hh = "{:02X}".format(i)
39 | hhs = [""]
40 | for c in hh:
41 | nhh = []
42 | for x in m[c]:
43 | for kk in hhs:
44 | nhh.append(kk+x)
45 | hhs = nhh
46 |
47 | for hh in hhs:
48 | bb = "".join(["{:02X}".format(ord(c)) for c in hh])
49 | fromf.write("{}\t03{}\n".format(bb, hh))
50 |
--------------------------------------------------------------------------------
/tools/mkbonus.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | #mkbonus.py src_list char_list phrase_list
4 |
5 | import sys
6 | import re
7 | from bsdconv import Bsdconv
8 |
9 | clist=open(sys.argv[2], "w")
10 | plist=open(sys.argv[3], "w")
11 |
12 | sc=Bsdconv("utf-8:score#with=cjk:null")
13 | bcv=Bsdconv("utf-8:insert#after=002c:bsdconv-keyword,bsdconv")
14 | bcv_zhtw=Bsdconv("utf-8:zhtw:insert#after=002c:bsdconv-keyword,bsdconv")
15 |
16 | sep=re.compile(r"\s+")
17 |
18 | f=open(sys.argv[1])
19 | for l in f:
20 | l = l.strip()
21 | if l == "":
22 | continue
23 | if l.startswith("#"):
24 | clist.write(l+"\n")
25 | plist.write(l+"\n")
26 | a = sep.split(l)
27 | p = a[0]
28 | ln = len(p.decode("utf-8"))
29 | if ln > 1:
30 | bonus = 6
31 | p = bcv_zhtw.conv(p).rstrip(",")
32 | of = plist
33 | else:
34 | try:
35 | bonus = int(a[1])
36 | except:
37 | bonus = 0
38 | sc.counter_reset()
39 | sc.conv(p)
40 | score = sc.counter("SCORE")
41 | if score < 5*ln:
42 | bonus += 5*ln - score
43 | if bonus == 0:
44 | continue
45 | p = bcv.conv(p).rstrip(",")
46 | of = clist
47 | of.write("%s\t?%02X,%s\n" % (p, bonus, p))
48 |
49 | f.close()
50 | clist.close()
51 | plist.close()
52 |
--------------------------------------------------------------------------------
/modules/inter/ZH-BONUS.txt:
--------------------------------------------------------------------------------
1 | # Source: http://www.w3.org/html/ig/zh/wiki/Big5-hkscs-vs-uao-in-hk
2 | 0123 ?01,0123
3 | 013639 ?02,013639
4 | 010233B4 ?04,010233B4
5 | 01C5 ?05,01C5
6 | 01FC ?05,01FC
7 | 014E24 ?01,014E24
8 | 015179 ?01,015179
9 | 0152B9 ?01,0152B9
10 | 0153F7 ?01,0153F7
11 | 015553 ?01,015553
12 | 0163F8 ?01,0163F8
13 | 016B74 ?01,016B74
14 | 0170DF ?01,0170DF
15 | 01714A ?01,01714A
16 | 0171DF ?01,0171DF
17 | 017371 ?01,017371
18 | 01743C ?01,01743C
19 | 017468 ?01,017468
20 | 017740 ?01,017740
21 | 017793 ?01,017793
22 | 01781C ?01,01781C
23 | 0178B1 ?01,0178B1
24 | 01793C ?01,01793C
25 | 017962 ?01,017962
26 | 017AEA ?02,017AEA
27 | 017DAB ?01,017DAB
28 | 017EDF ?01,017EDF
29 | 0183D3 ?01,0183D3
30 | 0184AD ?01,0184AD
31 | 01885E ?01,01885E
32 | 0189A7 ?01,0189A7
33 | 0189C6 ?01,0189C6
34 | 018D4E ?01,018D4E
35 | 018D77 ?01,018D77
36 | 018E2A ?01,018E2A
37 | 018E46 ?01,018E46
38 | 018EAD ?01,018EAD
39 | 018FF9 ?01,018FF9
40 | 0190A8 ?02,0190A8
41 | 0190D1 ?02,0190D1
42 | 0190FD ?01,0190FD
43 | 019176 ?03,019176
44 | 0194C3 ?01,0194C3
45 | 0194CA ?01,0194CA
46 | 01976D ?01,01976D
47 | 019938 ?02,019938
48 | 019A90 ?01,019A90
49 | 019DC4 ?01,019DC4
50 | 019E37 ?01,019E37
51 | 019FA5 ?01,019FA5
52 |
--------------------------------------------------------------------------------
/modules/from/_ISO-8859-1.txt:
--------------------------------------------------------------------------------
1 | A0 01A0
2 | A1 01A1
3 | A2 01A2
4 | A3 01A3
5 | A4 01A4
6 | A5 01A5
7 | A6 01A6
8 | A7 01A7
9 | A8 01A8
10 | A9 01A9
11 | AA 01AA
12 | AB 01AB
13 | AC 01AC
14 | AD 01AD
15 | AE 01AE
16 | AF 01AF
17 | B0 01B0
18 | B1 01B1
19 | B2 01B2
20 | B3 01B3
21 | B4 01B4
22 | B5 01B5
23 | B6 01B6
24 | B7 01B7
25 | B8 01B8
26 | B9 01B9
27 | BA 01BA
28 | BB 01BB
29 | BC 01BC
30 | BD 01BD
31 | BE 01BE
32 | BF 01BF
33 | C0 01C0
34 | C1 01C1
35 | C2 01C2
36 | C3 01C3
37 | C4 01C4
38 | C5 01C5
39 | C6 01C6
40 | C7 01C7
41 | C8 01C8
42 | C9 01C9
43 | CA 01CA
44 | CB 01CB
45 | CC 01CC
46 | CD 01CD
47 | CE 01CE
48 | CF 01CF
49 | D0 01D0
50 | D1 01D1
51 | D2 01D2
52 | D3 01D3
53 | D4 01D4
54 | D5 01D5
55 | D6 01D6
56 | D7 01D7
57 | D8 01D8
58 | D9 01D9
59 | DA 01DA
60 | DB 01DB
61 | DC 01DC
62 | DD 01DD
63 | DE 01DE
64 | DF 01DF
65 | E0 01E0
66 | E1 01E1
67 | E2 01E2
68 | E3 01E3
69 | E4 01E4
70 | E5 01E5
71 | E6 01E6
72 | E7 01E7
73 | E8 01E8
74 | E9 01E9
75 | EA 01EA
76 | EB 01EB
77 | EC 01EC
78 | ED 01ED
79 | EE 01EE
80 | EF 01EF
81 | F0 01F0
82 | F1 01F1
83 | F2 01F2
84 | F3 01F3
85 | F4 01F4
86 | F5 01F5
87 | F6 01F6
88 | F7 01F7
89 | F8 01F8
90 | F9 01F9
91 | FA 01FA
92 | FB 01FB
93 | FC 01FC
94 | FD 01FD
95 | FE 01FE
96 | FF 01FF
97 |
--------------------------------------------------------------------------------
/modules/scorer/CJK.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Reference: http://blog.oasisfeng.com/2006/10/19/full-cjk-unicode-range/
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range_with_score ranges[] = {
8 | { 0x0, 0x7F, 4 }, //ASCII
9 | { 0x3000, 0x303F, 4 }, //CJK punctuation
10 | { 0x3040, 0x309F, 5 }, //Japanese hiragana
11 | { 0x30A0, 0x30FF, 5 }, //Japanese katakana
12 | { 0x3100, 0x312F, 4 }, //Chinese Bopomofo
13 | { 0x3400, 0x4DB5, 3 }, //CJK Unified Ideographs Extension A ;Unicode3.0
14 | { 0x4E00, 0x6FFF, 5 }, //CJK Unified Ideographs ;Unicode 1.1 ;HF
15 | { 0x7000, 0x9FA5, 4 }, //CJK Unified Ideographs ;Unicode 1.1 ;LF
16 | { 0x9FA6, 0x9FBB, 3 }, //CJK Unified Ideographs ;Unicode 4.1
17 | { 0xAC00, 0xD7AF, 3 }, //Korean word
18 | { 0xF900, 0xFA2D, 4 }, //CJK Compatibility Ideographs ;Unicode 1.1
19 | { 0xFA30, 0xFA6A, 4 }, //CJK Compatibility Ideographs ;Unicode 3.2
20 | { 0xFA70, 0xFAD9, 2 }, //CJK Compatibility Ideographs ;Unicode 4.1
21 | { 0xFF00, 0xFFEF, 3}, //Fullwidth ASCII, punctuation, Japanese, Korean
22 | { 0x20000, 0x2A6D6, 1 },//CJK Unified Ideographs Extension B ;Unicode 3.1
23 | { 0x2F800, 0x2FA1D, 1 },//CJK Compatibility Supplement ;Unicode 3.1
24 | };
25 |
26 | #include "unicode_range.c"
27 |
--------------------------------------------------------------------------------
/modules/inter/SCORE.man:
--------------------------------------------------------------------------------
1 | DESC
2 | Increase SCORE counter according to score table.
3 | If environment variable BSDCONV_SCORE is presented, it will be used as score table path, else if ~/.bsdconv.score is presented, it will be used as score table.
4 |
5 | ARGUMENT
6 | WITH
7 | FORMAT Scorer
8 | DESC
9 | If TRAINED is specified, default score file will be used
10 | AS
11 | DESC
12 | Counter Name
13 |
14 | EXAMPLE
15 | > echo 123Б測試 | bsdconv utf-8:score#with=cjk:null
16 | Score: 25
17 | > echo 123Б測試 | bsdconv big5:score#with=cjk:null
18 | IERR: 2
19 | Score: 29
20 | #if score data exists, it will use it.
21 | #default score data path is $HOME/.bsdconv.score
22 | > echo 123Б測試 | bsdconv utf-8:score-train:null
23 | > echo 123Б測試 | bsdconv utf-8:score#with=trained:null
24 | Score: 7
25 | > echo 123Б測試功能 | bsdconv utf-8:score#with=trained:null
26 | Score: 7
27 | > echo 123Б測試功能 | bsdconv utf-8:score-train:null
28 | > echo 123Б測試功能 | bsdconv utf-8:score#with=trained:null
29 | Score: 16
30 | #specify score data path in C
31 | bsdconv_ctl(
32 | struct bsdconv_instance *, /*conversion instance*/
33 | BSDCONV_CTL_ATTACH_SCORE, /* Ctl constant*/
34 | FILE *, /* File pointer to the score data*/
35 | 0 /* unused argument */
36 | );
37 |
--------------------------------------------------------------------------------
/modules/to/alias:
--------------------------------------------------------------------------------
1 | UAO UAO250
2 | SOURCE ASCII-ESCAPED-UNICODE
3 | JAVA ASCII-ESCAPED-UNICODE
4 | HTMLENTITY ASCII-HEX-NUMERIC-HTML-ENTITY
5 | NAMED-HTMLENTITY ASCII-NAMED-HTML-ENTITY
6 | HTML-IMG ASCII-HTML-UNICODE-IMG
7 | UCS-4 UTF-32LE
8 | UCS-4BE UTF-32BE
9 | UCS-4LE UTF-32LE
10 | UTF-32 UTF-32LE
11 | UTF-16 UTF-16LE
12 | UCS-2 UCS-2LE
13 | BIG5 CP950
14 | CNS11643 ASCII,_CNS11643
15 | CP1251 _CP1251,ASCII
16 | CP1252 _CP1252,ASCII
17 | CP1253 _CP1253,ASCII
18 | CP874 _CP874,ASCII
19 | CP936 _CP936,ASCII
20 | CP949 _CP949,ASCII
21 | CP950 _CP950,ASCII
22 | GB18030 _GB18030,ASCII
23 | GB2312 _GB2312,ASCII
24 | GBK _GBK,ASCII
25 | ISO-8859-1 _ISO-8859-1,ASCII
26 | JIS _JIS0212,ASCII
27 | SHIFT-JIS _SHIFT-JIS,ASCII
28 | UAO241 _UAO241,ASCII
29 | UAO250 _UAO250,ASCII
30 | UTF-8 _UTF-8,ASCII
31 | 3F ANY#3F&ERROR
32 | URL ESCAPE#FOR=UNICODE&MODE=16&PREFIX=2575,ESCAPE#FOR=BYTE&MODE=16&PREFIX=25
33 | ASCII-ESCAPED-UNICODE ESCAPE#FOR=UNICODE&MODE=16&PREFIX=5C75
34 | ASCII-HEX-NUMERIC-HTML-ENTITY ESCAPE#FOR=UNICODE&PREFIX=262378&MODE=16&SUFFIX=3B
35 | ASCII-DEC-NUMERIC-HTML-ENTITY ESCAPE#FOR=UNICODE&PREFIX=2623&MODE=10&SUFFIX=3B
36 | # backward compatibility
37 | UNICODE PASS#MARK&FOR=UNICODE
38 | ANSI-CONTROL PASS#MARK&FOR=ANSI
39 | BSDCONV-STDOUT BSDCONV-OUTPUT
40 |
--------------------------------------------------------------------------------
/modules/to/_ISO-8859-1.txt:
--------------------------------------------------------------------------------
1 | # transposed from from/iso-8859-1
2 | 01A0 A0
3 | 01A1 A1
4 | 01A2 A2
5 | 01A3 A3
6 | 01A4 A4
7 | 01A5 A5
8 | 01A6 A6
9 | 01A7 A7
10 | 01A8 A8
11 | 01A9 A9
12 | 01AA AA
13 | 01AB AB
14 | 01AC AC
15 | 01AD AD
16 | 01AE AE
17 | 01AF AF
18 | 01B0 B0
19 | 01B1 B1
20 | 01B2 B2
21 | 01B3 B3
22 | 01B4 B4
23 | 01B5 B5
24 | 01B6 B6
25 | 01B7 B7
26 | 01B8 B8
27 | 01B9 B9
28 | 01BA BA
29 | 01BB BB
30 | 01BC BC
31 | 01BD BD
32 | 01BE BE
33 | 01BF BF
34 | 01C0 C0
35 | 01C1 C1
36 | 01C2 C2
37 | 01C3 C3
38 | 01C4 C4
39 | 01C5 C5
40 | 01C6 C6
41 | 01C7 C7
42 | 01C8 C8
43 | 01C9 C9
44 | 01CA CA
45 | 01CB CB
46 | 01CC CC
47 | 01CD CD
48 | 01CE CE
49 | 01CF CF
50 | 01D0 D0
51 | 01D1 D1
52 | 01D2 D2
53 | 01D3 D3
54 | 01D4 D4
55 | 01D5 D5
56 | 01D6 D6
57 | 01D7 D7
58 | 01D8 D8
59 | 01D9 D9
60 | 01DA DA
61 | 01DB DB
62 | 01DC DC
63 | 01DD DD
64 | 01DE DE
65 | 01DF DF
66 | 01E0 E0
67 | 01E1 E1
68 | 01E2 E2
69 | 01E3 E3
70 | 01E4 E4
71 | 01E5 E5
72 | 01E6 E6
73 | 01E7 E7
74 | 01E8 E8
75 | 01E9 E9
76 | 01EA EA
77 | 01EB EB
78 | 01EC EC
79 | 01ED ED
80 | 01EE EE
81 | 01EF EF
82 | 01F0 F0
83 | 01F1 F1
84 | 01F2 F2
85 | 01F3 F3
86 | 01F4 F4
87 | 01F5 F5
88 | 01F6 F6
89 | 01F7 F7
90 | 01F8 F8
91 | 01F9 F9
92 | 01FA FA
93 | 01FB FB
94 | 01FC FC
95 | 01FD FD
96 | 01FE FE
97 | 01FF FF
98 |
--------------------------------------------------------------------------------
/modules/from/alias:
--------------------------------------------------------------------------------
1 | BIG5 UAO250
2 | BIG5E ASCII,_BIG5E
3 | BIG5-2003 ASCII,_BIG5-2003
4 | BIG5-ETEN ASCII,_BIG5-ETEN
5 | MOZ18 UAO241
6 | HTMLENTITY ASCII-NUMERIC-HTML-ENTITY
7 | NCR ASCII-NUMERIC-HTML-ENTITY
8 | NAMED-HTMLENTITY ASCII-NAMED-HTML-ENTITY
9 | UCS-2 UTF-16LE
10 | UCS-2BE UTF-16BE
11 | UCS-2LE UTF-16LE
12 | CNS11643 ASCII,_CNS11643
13 | CP1251 _CP1251,ASCII,FALLBACK-UNICODE
14 | CP1252 _CP1252,ASCII,FALLBACK-UNICODE
15 | CP1253 _CP1253,ASCII,FALLBACK-UNICODE
16 | CP1254 _CP1254,ASCII,FALLBACK-UNICODE
17 | CP1255 _CP1255,ASCII,FALLBACK-UNICODE
18 | CP1256 _CP1256,ASCII,FALLBACK-UNICODE
19 | CP1257 _CP1257,ASCII,FALLBACK-UNICODE
20 | CP1258 _CP1258,ASCII,FALLBACK-UNICODE
21 | CP874 ASCII,_CP874,FALLBACK-UNICODE
22 | CP932 ASCII,_CP932
23 | CP936 ASCII,_CP936
24 | CP949 ASCII,_CP949
25 | CP950 ASCII,_CP950,CP950-UDA
26 | GB18030 ASCII,_GB18030
27 | GB2312 ASCII,_GB2312
28 | GBK ASCII,_GBK
29 | HKSCS1999 ASCII,_CP950,_HKSCS1999
30 | HKSCS2001 ASCII,_CP950,_HKSCS2001
31 | HKSCS2004 ASCII,_CP950,_HKSCS2004
32 | ISO-8859-1 ASCII,_ISO-8859-1
33 | JIS _JIS,ASCII
34 | LATIN1 ISO-8859-1
35 | SHIFT-JIS ASCII,_SHIFT-JIS
36 | UAO241 ASCII,_UAO241
37 | UAO250 ASCII,_UAO250
38 | UTF-8 ASCII,_UTF-8
39 | 3F ANY#013F&ERROR
40 | SUB ANY#01FFFD
41 | # backward compatibility
42 | SKIP PASS#UNMARK
43 |
--------------------------------------------------------------------------------
/modules/to/ASCII-HTML-UNICODE-IMG.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "../../src/bsdconv.h"
5 |
6 | #define TAILIZE(p) while(*p){ p++ ;}
7 |
8 | void cbconv(struct bsdconv_instance *ins){
9 | char *data, *p, buf[128]={0};
10 | unsigned int len, i;
11 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
12 | data=this_phase->curr->data;
13 | if(*data!=0x01){
14 | this_phase->state.status=DEADEND;
15 | return;
16 | }
17 | this_phase->state.status=NEXTPHASE;
18 | p=buf;
19 | i=*data;
20 | data+=1;
21 | len=this_phase->curr->len-1;
22 | DATA_MALLOC(ins, this_phase->data_tail->next);
23 | this_phase->data_tail=this_phase->data_tail->next;
24 | this_phase->data_tail->next=NULL;
25 |
26 | sprintf(p,"
");
37 | TAILIZE(p);
38 | len=p-buf;
39 | this_phase->data_tail->len=len;
40 | this_phase->data_tail->flags=F_FREE;
41 | this_phase->data_tail->data=malloc(len);
42 | memcpy(this_phase->data_tail->data, buf, len);
43 |
44 | return;
45 | }
46 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2009-2016 Kuan-Chung Chiu
2 |
3 | Redistribution and use in source and binary forms, with or without
4 | modification, are permitted provided that the following conditions
5 | are met:
6 | 1. Redistributions of source code must retain the above copyright
7 | notice, this list of conditions and the following disclaimer.
8 | 2. Redistributions in binary form must reproduce the above copyright
9 | notice, this list of conditions and the following disclaimer in the
10 | documentation and/or other materials provided with the distribution.
11 |
12 | THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
13 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
14 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
15 | ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
16 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
17 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
18 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
19 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
20 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
21 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
22 | SUCH DAMAGE.
23 |
--------------------------------------------------------------------------------
/modules/inter/ALIAS-INTER.txt:
--------------------------------------------------------------------------------
1 | 014E,0146,0144 015F,014E,0146,0144,013A,015F,014E,0146,012D,0148,0141,014E,0147,0155,014C,012D,0144,0145,0143,014F,014D,0150,014F,0153,0149,0154,0149,014F,014E,013A,015F,014E,0146,012D,014F,0152,0144,0145,0152
2 | 014E,0146,014B,0144 015F,014E,0146,014B,0144,013A,015F,014E,0146,012D,0148,0141,014E,0147,0155,014C,012D,0144,0145,0143,014F,014D,0150,014F,0153,0149,0154,0149,014F,014E,013A,015F,014E,0146,012D,014F,0152,0144,0145,0152
3 | 014E,0146,0143 014E,0146,0144,013A,015F,014E,0146,0143,013A,015F,014E,0146,012D,0148,0141,014E,0147,0155,014C,012D,0143,014F,014D,0150,014F,0153,0149,0154,0149,014F,014E
4 | 014E,0146,014B,0143 014E,0146,014B,0144,013A,015F,014E,0146,0143,013A,015F,014E,0146,012D,0148,0141,014E,0147,0155,014C,012D,0143,014F,014D,0150,014F,0153,0149,0154,0149,014F,014E
5 | 014E,0146,014B,0144,012D,0143,0141,0153,0145,0146,014F,014C,0144 014E,0146,0144,013A,0143,0141,0153,0145,0146,014F,014C,0144,013A,014E,0146,014B,0144,013A,0143,0141,0153,0145,0146,014F,014C,0144,013A,014E,0146,014B,0144
6 | 0153,0150,014C,0149,0154 0149,014E,0153,0145,0152,0154,0123,0141,0146,0154,0145,0152,013D,0130,0130,0132,0143
7 | 014E,014F,0142,014F,014D 0152,0145,0150,014C,0141,0143,0145,0123,0130,0131,0146,0145,0146,0146
8 | 015A,0148,012D,0153,0154,0152,0149,014E,0147,0153 0153,0154,0152,0149,014E,0147,0153,0123,0146,014F,0152,013D,0143,014A,014B
9 | 0155,014E,0149,0143,014F,0144,0145 ?
10 |
--------------------------------------------------------------------------------
/modules/from/EBCDIC.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "../../src/bsdconv.h"
4 |
5 | struct my_s{
6 | int status;
7 | };
8 |
9 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){
10 | struct my_s *r=malloc(sizeof(struct my_s));
11 | THIS_CODEC(ins)->priv=r;
12 | return 0;
13 | }
14 |
15 | void cbinit(struct bsdconv_instance *ins){
16 | struct my_s *r=THIS_CODEC(ins)->priv;
17 | r->status=0;
18 | }
19 |
20 | void cbdestroy(struct bsdconv_instance *ins){
21 | struct my_s *r=THIS_CODEC(ins)->priv;
22 | free(r);
23 | }
24 |
25 | void cbconv(struct bsdconv_instance *ins){
26 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
27 | struct my_s *t=THIS_CODEC(ins)->priv;
28 | struct data_st data;
29 | unsigned char *c;
30 |
31 | memcpy(&data, (char *)(this_phase->codec[this_phase->index].data_z+(uintptr_t)this_phase->state.data), sizeof(struct data_st));
32 | c=UCP(this_phase->codec[this_phase->index].data_z+de_offset(data.data));
33 |
34 | if(data.len==2 && c[0]=='\x01'){
35 | if(c[1]=='\x0E'){
36 | t->status=1;
37 | this_phase->state.status=NEXTPHASE;
38 | return;
39 | }else if(c[1]=='\x0F'){
40 | t->status=0;
41 | this_phase->state.status=NEXTPHASE;
42 | return;
43 | }
44 | }
45 |
46 | if(t->status==0){
47 | this_phase->state.status=MATCH;
48 | }else{
49 | this_phase->state.status=SUBMATCH;
50 | }
51 |
52 | return;
53 | }
54 |
--------------------------------------------------------------------------------
/modules/inter/ALIAS-FROM.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "../../src/bsdconv.h"
4 |
5 |
6 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){
7 | THIS_CODEC(ins)->priv=bsdconv_create("ASCII:PASS");
8 | return 0;
9 | }
10 |
11 | void cbdestroy(struct bsdconv_instance *ins){
12 | bsdconv_destroy(THIS_CODEC(ins)->priv);
13 | }
14 |
15 | void cbconv(struct bsdconv_instance *ins){
16 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
17 | struct bsdconv_instance *uni=THIS_CODEC(ins)->priv;
18 | const char *locale;
19 | const char *s;
20 |
21 | if (((locale=getenv("LC_ALL")) || (locale=getenv("LC_CTYPE")) || (locale=getenv ("LANG"))) && ((s=strstr(locale, "."))!=NULL)){
22 | s+=1;
23 | }else{
24 | s=locale;
25 | }
26 | if(s==NULL || *s==0 || strcmp(s, "C")==0 || strcmp(s, "POSIX")==0){
27 | s="ASCII";
28 | }
29 | bsdconv_init(uni);
30 | uni->input.data=strdup(s);
31 | uni->input.len=strlen(s);
32 | uni->input.flags=F_FREE;
33 | uni->input.next=NULL;
34 | uni->flush=1;
35 | bsdconv(uni);
36 | this_phase->data_tail->next=uni->phase[uni->phasen].data_head->next;
37 | uni->phase[uni->phasen].data_head->next=NULL;
38 | uni->phase[uni->phasen].data_tail=uni->phase[uni->phasen].data_head;
39 | while(this_phase->data_tail->next!=NULL){
40 | this_phase->data_tail=this_phase->data_tail->next;
41 | }
42 |
43 | this_phase->state.status=NEXTPHASE;
44 | return;
45 | }
46 |
--------------------------------------------------------------------------------
/modules/inter/COUNT.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "../../src/bsdconv.h"
4 |
5 | struct my_s{
6 | struct bsdconv_filter *filter;
7 | bsdconv_counter_t *counter;
8 | };
9 |
10 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){
11 | struct my_s *r=malloc(sizeof(struct my_s));
12 | THIS_CODEC(ins)->priv=r;
13 | r->filter=NULL;
14 |
15 | char *filter=NULL;
16 | char *key="COUNT";
17 | while(arg){
18 | if(strcasecmp(arg->key, "FOR")==0){
19 | filter=arg->ptr;
20 | }else{
21 | key=arg->key;
22 | }
23 | arg=arg->next;
24 | }
25 | if(filter!=NULL){
26 | r->filter=load_filter(filter);
27 | if(r->filter==NULL){
28 | free(r);
29 | return EOPNOTSUPP;
30 | }
31 | }
32 | r->counter=bsdconv_counter(ins, key);
33 | return 0;
34 | }
35 |
36 | void cbdestroy(struct bsdconv_instance *ins){
37 | struct my_s *r=THIS_CODEC(ins)->priv;
38 | if(r->filter)
39 | unload_filter(r->filter);
40 | free(r);
41 | }
42 |
43 | void cbconv(struct bsdconv_instance *ins){
44 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
45 | struct my_s *r=THIS_CODEC(ins)->priv;
46 |
47 | if(r->filter==NULL || r->filter->cbfilter(this_phase->curr))
48 | *(r->counter)+=1;
49 |
50 | this_phase->data_tail->next=dup_data_rt(ins, this_phase->curr);
51 | this_phase->data_tail=this_phase->data_tail->next;
52 | this_phase->data_tail->next=NULL;
53 |
54 | this_phase->state.status=NEXTPHASE;
55 | return;
56 | }
57 |
--------------------------------------------------------------------------------
/modules/inter/SUPER.txt:
--------------------------------------------------------------------------------
1 | 010126 01A7F8
2 | 01014B 011D51
3 | 010153 01A7F9
4 | 01018E 011D32
5 | 010251 011D45
6 | 010254 011D53
7 | 01025B 011D4B
8 | 010266 0102B1
9 | 010275 011DB1
10 | 010292 011DBE
11 | 0103B2 011D5D
12 | 0103B3 011D5E
13 | 0103B4 011D5F
14 | 0103B8 011DBF
15 | 0103C6 011D60
16 | 0103C7 011D61
17 | 01043D 011D78
18 | 01044A 01A69C
19 | 01044C 01A69D
20 | 0110DC 0110FC
21 | 012212 01207B
22 | 0128 01207D
23 | 0129 01207E
24 | 012B 01207A
25 | 012D61 012D6F
26 | 0130 012070
27 | 0131 01B9
28 | 0132 01B2
29 | 0133 01B3
30 | 0134 012074
31 | 0135 012075
32 | 0136 012076
33 | 0137 012077
34 | 0138 012078
35 | 0139 012079
36 | 013D 01207C
37 | 0141 011D2C
38 | 0142 011D2E
39 | 0144 011D30
40 | 0145 011D31
41 | 0147 011D33
42 | 0148 011D34
43 | 0149 011D35
44 | 014A 011D36
45 | 014B 011D37
46 | 014C 011D38
47 | 014D 011D39
48 | 014E 011D3A
49 | 014F 011D3C
50 | 0150 011D3E
51 | 0152 011D3F
52 | 0154 011D40
53 | 0155 011D41
54 | 0156 012C7D
55 | 0157 011D42
56 | 0161 011D43
57 | 0161 01AA
58 | 0162 011D47
59 | 0163 011D9C
60 | 0164 011D48
61 | 0165 011D49
62 | 0166 011DA0
63 | 0167 011D4D
64 | 0168 0102B0
65 | 0169 012071
66 | 016A 0102B2
67 | 016B 011D4F
68 | 016C 0102E1
69 | 016D 011D50
70 | 016E 01207F
71 | 016F 011D52
72 | 016F 01BA
73 | 0170 011D56
74 | 0172 0102B3
75 | 0173 0102E2
76 | 0174 011D57
77 | 0175 011D58
78 | 0176 011D5B
79 | 0177 0102B7
80 | 0178 0102E3
81 | 0179 0102B8
82 | 017A 011DBB
83 | 01A76F 01A770
84 | 01C6 011D2D
85 | 01F0 011D9E
86 |
--------------------------------------------------------------------------------
/modules/to/_CP874.txt:
--------------------------------------------------------------------------------
1 | #transposed from from/_CP874
2 | 0120AC 80
3 | 012026 85
4 | 012018 91
5 | 012019 92
6 | 01201C 93
7 | 01201D 94
8 | 012022 95
9 | 012013 96
10 | 012014 97
11 | 0100A0 A0
12 | 010E01 A1
13 | 010E02 A2
14 | 010E03 A3
15 | 010E04 A4
16 | 010E05 A5
17 | 010E06 A6
18 | 010E07 A7
19 | 010E08 A8
20 | 010E09 A9
21 | 010E0A AA
22 | 010E0B AB
23 | 010E0C AC
24 | 010E0D AD
25 | 010E0E AE
26 | 010E0F AF
27 | 010E10 B0
28 | 010E11 B1
29 | 010E12 B2
30 | 010E13 B3
31 | 010E14 B4
32 | 010E15 B5
33 | 010E16 B6
34 | 010E17 B7
35 | 010E18 B8
36 | 010E19 B9
37 | 010E1A BA
38 | 010E1B BB
39 | 010E1C BC
40 | 010E1D BD
41 | 010E1E BE
42 | 010E1F BF
43 | 010E20 C0
44 | 010E21 C1
45 | 010E22 C2
46 | 010E23 C3
47 | 010E24 C4
48 | 010E25 C5
49 | 010E26 C6
50 | 010E27 C7
51 | 010E28 C8
52 | 010E29 C9
53 | 010E2A CA
54 | 010E2B CB
55 | 010E2C CC
56 | 010E2D CD
57 | 010E2E CE
58 | 010E2F CF
59 | 010E30 D0
60 | 010E31 D1
61 | 010E32 D2
62 | 010E33 D3
63 | 010E34 D4
64 | 010E35 D5
65 | 010E36 D6
66 | 010E37 D7
67 | 010E38 D8
68 | 010E39 D9
69 | 010E3A DA
70 | 010E3F DF
71 | 010E40 E0
72 | 010E41 E1
73 | 010E42 E2
74 | 010E43 E3
75 | 010E44 E4
76 | 010E45 E5
77 | 010E46 E6
78 | 010E47 E7
79 | 010E48 E8
80 | 010E49 E9
81 | 010E4A EA
82 | 010E4B EB
83 | 010E4C EC
84 | 010E4D ED
85 | 010E4E EE
86 | 010E4F EF
87 | 010E50 F0
88 | 010E51 F1
89 | 010E52 F2
90 | 010E53 F3
91 | 010E54 F4
92 | 010E55 F5
93 | 010E56 F6
94 | 010E57 F7
95 | 010E58 F8
96 | 010E59 F9
97 | 010E5A FA
98 | 010E5B FB
99 |
--------------------------------------------------------------------------------
/modules/from/_CP874.txt:
--------------------------------------------------------------------------------
1 | #http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP874.TXT
2 | 80 0120AC
3 | 85 012026
4 | 91 012018
5 | 92 012019
6 | 93 01201C
7 | 94 01201D
8 | 95 012022
9 | 96 012013
10 | 97 012014
11 | A0 01A0
12 | A1 010E01
13 | A2 010E02
14 | A3 010E03
15 | A4 010E04
16 | A5 010E05
17 | A6 010E06
18 | A7 010E07
19 | A8 010E08
20 | A9 010E09
21 | AA 010E0A
22 | AB 010E0B
23 | AC 010E0C
24 | AD 010E0D
25 | AE 010E0E
26 | AF 010E0F
27 | B0 010E10
28 | B1 010E11
29 | B2 010E12
30 | B3 010E13
31 | B4 010E14
32 | B5 010E15
33 | B6 010E16
34 | B7 010E17
35 | B8 010E18
36 | B9 010E19
37 | BA 010E1A
38 | BB 010E1B
39 | BC 010E1C
40 | BD 010E1D
41 | BE 010E1E
42 | BF 010E1F
43 | C0 010E20
44 | C1 010E21
45 | C2 010E22
46 | C3 010E23
47 | C4 010E24
48 | C5 010E25
49 | C6 010E26
50 | C7 010E27
51 | C8 010E28
52 | C9 010E29
53 | CA 010E2A
54 | CB 010E2B
55 | CC 010E2C
56 | CD 010E2D
57 | CE 010E2E
58 | CF 010E2F
59 | D0 010E30
60 | D1 010E31
61 | D2 010E32
62 | D3 010E33
63 | D4 010E34
64 | D5 010E35
65 | D6 010E36
66 | D7 010E37
67 | D8 010E38
68 | D9 010E39
69 | DA 010E3A
70 | DF 010E3F
71 | E0 010E40
72 | E1 010E41
73 | E2 010E42
74 | E3 010E43
75 | E4 010E44
76 | E5 010E45
77 | E6 010E46
78 | E7 010E47
79 | E8 010E48
80 | E9 010E49
81 | EA 010E4A
82 | EB 010E4B
83 | EC 010E4C
84 | ED 010E4D
85 | EE 010E4E
86 | EF 010E4F
87 | F0 010E50
88 | F1 010E51
89 | F2 010E52
90 | F3 010E53
91 | F4 010E54
92 | F5 010E55
93 | F6 010E56
94 | F7 010E57
95 | F8 010E58
96 | F9 010E59
97 | FA 010E5A
98 | FB 010E5B
99 |
--------------------------------------------------------------------------------
/modules/to/UTF-16BE.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "../../src/bsdconv.h"
5 |
6 | void cbconv(struct bsdconv_instance *ins){
7 | char *data, *p, c;
8 | unsigned int len, i;
9 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
10 | data=this_phase->curr->data;
11 |
12 | data+=1;
13 | if(this_phase->curr->len > 3){
14 | this_phase->state.status=NEXTPHASE;
15 |
16 | DATA_MALLOC(ins, this_phase->data_tail->next);
17 | this_phase->data_tail=this_phase->data_tail->next;
18 | this_phase->data_tail->next=NULL;
19 | this_phase->data_tail->len=4;
20 | this_phase->data_tail->flags=F_FREE;
21 | p=this_phase->data_tail->data=malloc(4);
22 |
23 | c=*data-1;
24 | *p=bb11011000;
25 | *p |= (c >> 2) & bb00000011;
26 | ++p;
27 | *p=(c << 6) & bb11000000;
28 | ++data;
29 | *p |= (*data >> 2) & bb00111111;
30 | ++p;
31 | *p=bb11011100;
32 | *p |= *data & bb00000011;
33 | ++p;
34 | ++data;
35 | *p=*data;
36 | }else{
37 | this_phase->state.status=NEXTPHASE;
38 | len=this_phase->curr->len-1;
39 |
40 | DATA_MALLOC(ins, this_phase->data_tail->next);
41 | this_phase->data_tail=this_phase->data_tail->next;
42 | this_phase->data_tail->next=NULL;
43 | this_phase->data_tail->len=2;
44 | this_phase->data_tail->flags=F_FREE;
45 | this_phase->data_tail->data=malloc(2);
46 | for(i=0;i<2-len;++i){
47 | CP(this_phase->data_tail->data)[i]=0x0;
48 | }
49 | memcpy(CP(this_phase->data_tail->data)+i, data, len);
50 | }
51 | return;
52 | }
53 |
--------------------------------------------------------------------------------
/Makefile.win:
--------------------------------------------------------------------------------
1 | PREFIX?=C:/bsdconv
2 | BSDCONV_PATH?=${PREFIX}
3 | CFLAGS+=-Wall -O2 -DPREFIX='"${PREFIX}"' -DBSDCONV_PATH='"${BSDCONV_PATH}"' -DWIN32 -D_BSDCONV_INTERNAL
4 | CC?=mingw32-cc
5 |
6 | all: builddir libbsdconv bsdconv_mktable bsdconv modules meta
7 |
8 | builddir:
9 | mkdir -p build/
10 | mkdir -p build/include
11 | mkdir -p build/modules
12 | mkdir -p build/modules/filter
13 | mkdir -p build/modules/from
14 | mkdir -p build/modules/inter
15 | mkdir -p build/modules/scorer
16 | mkdir -p build/modules/src
17 | mkdir -p build/modules/to
18 |
19 | libbsdconv: builddir
20 | $(CC) ${CFLAGS} src/missing_func.c src/libbsdconv.c -shared -o build/libbsdconv.dll
21 |
22 | bsdconv: builddir libbsdconv src/bsdconv.c
23 | $(CC) ${CFLAGS} -lbsdconv -L./build/ src/bsdconv.c -o build/bsdconv.exe
24 |
25 | bsdconv_mktable: builddir src/bsdconv-mktable.c
26 | $(CC) ${CFLAGS} src/missing_func.c src/bsdconv-mktable.c -o build/bsdconv_mktable.exe
27 |
28 | codecs_table: builddir bsdconv_mktable
29 | cd modules && \
30 | find */*.txt -type f | awk -F. '{cmd="bsdconv_mktable ../modules/"$$1"."$$2" modules/"$$1; print(cmd);}' > ../build/mk_table.bat
31 |
32 | codecs_callback: builddir libbsdconv
33 | cd modules && \
34 | find */*.c -type f | awk -F. '{cmd="$(CC) ${CFLAGS} -shared -lbsdconv -L../build/ -o ../build/modules/"$$1".dll "$$1"."$$2" -lwsock32"; system(cmd);}'
35 |
36 | modules: builddir codecs_table codecs_callback
37 |
38 | meta:
39 | cp src/bsdconv.h build/include/
40 | cp tools/setEnvVar.bat build/
41 |
42 | clean:
43 | rm -rf build
44 |
--------------------------------------------------------------------------------
/modules/from/ANY.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "../../src/bsdconv.h"
4 |
5 | struct my_st {
6 | struct data_rt *data;
7 | bsdconv_counter_t *counter;
8 | };
9 |
10 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){
11 | struct my_st *r=malloc(sizeof(struct my_st));
12 | struct data_rt *bak;
13 | int e;
14 | r->data=str2data("013F", &e, ins);
15 | r->counter=NULL;
16 | while(arg){
17 | if(strcasecmp(arg->key, "ERROR")==0){
18 | if(arg->ptr)
19 | r->counter=bsdconv_counter(ins, arg->ptr);
20 | else
21 | r->counter=bsdconv_counter(ins, "IERR");
22 | }else if(strcasecmp(arg->key, "DROP")==0){
23 | DATA_FREE(ins, r->data);
24 | r->data = NULL;
25 | }else{
26 | bak=r->data;
27 | r->data=str2data(arg->key, &e, ins);
28 | DATA_FREE(ins, bak);
29 | if(e){
30 | DATA_FREE(ins, r->data);
31 | free(r);
32 | return e;
33 | }
34 | }
35 | arg=arg->next;
36 | }
37 | THIS_CODEC(ins)->priv=r;
38 | return 0;
39 | }
40 |
41 | void cbdestroy(struct bsdconv_instance *ins){
42 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
43 | struct my_st *r=this_phase->codec[this_phase->index].priv;
44 | DATA_FREE(ins, r->data);
45 | free(r);
46 | }
47 |
48 | void cbconv(struct bsdconv_instance *ins){
49 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
50 | struct my_st *r=this_phase->codec[this_phase->index].priv;
51 |
52 | LISTCPY(ins, this_phase->data_tail, r->data);
53 |
54 | this_phase->state.status=NEXTPHASE;
55 |
56 | if(r->counter)
57 | *(r->counter)+=1;
58 | return;
59 | }
60 |
--------------------------------------------------------------------------------
/modules/to/ANY.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "../../src/bsdconv.h"
4 |
5 | struct my_st {
6 | struct data_rt *data;
7 | bsdconv_counter_t *counter;
8 | };
9 |
10 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){
11 | struct my_st *r=malloc(sizeof(struct my_st));
12 | struct data_rt *bak;
13 | int e;
14 | r->data=str2data("3F", &e, ins);
15 | r->counter=NULL;
16 | while(arg){
17 | if(strcasecmp(arg->key, "ERROR")==0){
18 | if(arg->ptr)
19 | r->counter=bsdconv_counter(ins, arg->ptr);
20 | else
21 | r->counter=bsdconv_counter(ins, "OERR");
22 | }else if(strcasecmp(arg->key, "DROP")==0){
23 | DATA_FREE(ins, r->data);
24 | r->data = NULL;
25 | }else{
26 | bak=r->data;
27 | r->data=str2data(arg->key, &e, ins);
28 | DATA_FREE(ins, bak);
29 | if(e){
30 | DATA_FREE(ins, r->data);
31 | free(r);
32 | return e;
33 | }
34 | }
35 | arg=arg->next;
36 | }
37 | THIS_CODEC(ins)->priv=r;
38 | return 0;
39 | }
40 |
41 | void cbdestroy(struct bsdconv_instance *ins){
42 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
43 | struct my_st *r=this_phase->codec[this_phase->index].priv;
44 | DATA_FREE(ins, r->data);
45 | free(r);
46 | }
47 |
48 | void cbconv(struct bsdconv_instance *ins){
49 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
50 | struct my_st *r=this_phase->codec[this_phase->index].priv;
51 |
52 | LISTCPY(ins, this_phase->data_tail, r->data);
53 |
54 | this_phase->state.status=NEXTPHASE;
55 |
56 | if(r->counter)
57 | *(r->counter)+=1;
58 | return;
59 | }
60 |
--------------------------------------------------------------------------------
/modules/to/PASS.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "../../src/bsdconv.h"
4 |
5 | struct my_s{
6 | struct bsdconv_filter *filter;
7 | int mark;
8 | };
9 |
10 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){
11 | struct my_s *r=malloc(sizeof(struct my_s));
12 | THIS_CODEC(ins)->priv=r;
13 | r->filter=NULL;
14 | r->mark=0;
15 |
16 | char *filter=NULL;
17 | while(arg){
18 | if(strcasecmp(arg->key, "MARK")==0){
19 | r->mark=1;
20 | }else if(strcasecmp(arg->key, "FOR")==0){
21 | filter=arg->ptr;
22 | }else{
23 | free(r);
24 | return EINVAL;
25 | }
26 | arg=arg->next;
27 | }
28 | if(filter!=NULL){
29 | r->filter=load_filter(filter);
30 | if(r->filter==NULL){
31 | free(r);
32 | return EOPNOTSUPP;
33 | }
34 | }
35 | return 0;
36 | }
37 |
38 | void cbdestroy(struct bsdconv_instance *ins){
39 | struct my_s *r=THIS_CODEC(ins)->priv;
40 | if(r->filter)
41 | unload_filter(r->filter);
42 | free(r);
43 | }
44 |
45 | void cbconv(struct bsdconv_instance *ins){
46 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
47 | struct my_s *t=THIS_CODEC(ins)->priv;
48 | int pass=1;
49 |
50 | if(t->filter!=NULL && !t->filter->cbfilter(this_phase->curr))
51 | pass=0;
52 |
53 | if(pass){
54 | this_phase->data_tail->next=dup_data_rt(ins, this_phase->curr);
55 | this_phase->data_tail=this_phase->data_tail->next;
56 | this_phase->data_tail->next=NULL;
57 |
58 | if(t->mark)
59 | this_phase->data_tail->flags |= F_MARK;
60 |
61 | this_phase->state.status=NEXTPHASE;
62 | }else{
63 | this_phase->state.status=DEADEND;
64 | }
65 |
66 | return;
67 | }
68 |
--------------------------------------------------------------------------------
/modules/from/BIG5-5C.txt:
--------------------------------------------------------------------------------
1 | #extract from cp950
2 | A15C5C 01FE4F
3 | A25C5C 01515D
4 | A35C5C 0103B1
5 | A45C5C 014E48
6 | A55C5C 01529F
7 | A65C5C 015412
8 | A75C5C 01542D
9 | A85C5C 016C94
10 | A95C5C 01577C
11 | AA5C5C 016B7F
12 | AB5C5C 014FDE
13 | AC5C5C 0167AF
14 | AD5C5C 0182D2
15 | AE5C5C 015A09
16 | AF5C5C 0173EE
17 | B05C5C 018C79
18 | B15C5C 015D24
19 | B25C5C 016DDA
20 | B35C5C 018A31
21 | B45C5C 015EC4
22 | B55C5C 017435
23 | B65C5C 018DDA
24 | B75C5C 016127
25 | B85C5C 017A1E
26 | B95C5C 01923E
27 | BA5C5C 01669D
28 | BB5C5C 0184CB
29 | BC5C5C 0158A6
30 | BD5C5C 017A40
31 | BE5C5C 0195B1
32 | BF5C5C 01749E
33 | C05C5C 019910
34 | C15C5C 017E37
35 | C25C5C 0164FA
36 | C35C5C 019EE0
37 | C45C5C 015B40
38 | C55C5C 019ACF
39 | C65C5C 018EA1
40 | C95C5C 015C10
41 | CA5C5C 014F62
42 | CB5C5C 016C7B
43 | CC5C5C 015CA4
44 | CD5C5C 0172D6
45 | CE5C5C 0157A5
46 | CF5C5C 0167E6
47 | D05C5C 0180D0
48 | D15C5C 015A16
49 | D25C5C 016D82
50 | D35C5C 017F61
51 | D45C5C 015045
52 | D55C5C 0160DD
53 | D65C5C 01727E
54 | D75C5C 01838D
55 | D85C5C 01509C
56 | D95C5C 0163CA
57 | DA5C5C 01712E
58 | DB5C5C 01833B
59 | DC5C5C 019103
60 | DD5C5C 015E4B
61 | DE5C5C 016EDC
62 | DF5C5C 017D85
63 | E05C5C 018D68
64 | E15C5C 01587F
65 | E25C5C 0169D9
66 | E35C5C 017BA4
67 | E45C5C 018E0A
68 | E55C5C 015AF9
69 | E65C5C 016F7F
70 | E75C5C 01850C
71 | E85C5C 019186
72 | E95C5C 015B1E
73 | EA5C5C 017366
74 | EB5C5C 01878F
75 | EC5C5C 019924
76 | ED5C5C 0171E1
77 | EE5C5C 0187B0
78 | EF5C5C 0199F9
79 | F05C5C 017912
80 | F15C5C 0193AA
81 | F25C5C 017019
82 | F35C5C 019140
83 | F45C5C 017035
84 | F55C5C 019A31
85 | F65C5C 019145
86 | F75C5C 018D15
87 | F85C5C 019C4B
88 | F95C5C 019C6D
89 |
--------------------------------------------------------------------------------
/modules/to/BIG5-5C.txt:
--------------------------------------------------------------------------------
1 | # transpose from from/big5-5C
2 | 01FE4F A15C5C
3 | 01515D A25C5C
4 | 0103B1 A35C5C
5 | 014E48 A45C5C
6 | 01529F A55C5C
7 | 015412 A65C5C
8 | 01542D A75C5C
9 | 016C94 A85C5C
10 | 01577C A95C5C
11 | 016B7F AA5C5C
12 | 014FDE AB5C5C
13 | 0167AF AC5C5C
14 | 0182D2 AD5C5C
15 | 015A09 AE5C5C
16 | 0173EE AF5C5C
17 | 018C79 B05C5C
18 | 015D24 B15C5C
19 | 016DDA B25C5C
20 | 018A31 B35C5C
21 | 015EC4 B45C5C
22 | 017435 B55C5C
23 | 018DDA B65C5C
24 | 016127 B75C5C
25 | 017A1E B85C5C
26 | 01923E B95C5C
27 | 01669D BA5C5C
28 | 0184CB BB5C5C
29 | 0158A6 BC5C5C
30 | 017A40 BD5C5C
31 | 0195B1 BE5C5C
32 | 01749E BF5C5C
33 | 019910 C05C5C
34 | 017E37 C15C5C
35 | 0164FA C25C5C
36 | 019EE0 C35C5C
37 | 015B40 C45C5C
38 | 019ACF C55C5C
39 | 018EA1 C65C5C
40 | 015C10 C95C5C
41 | 014F62 CA5C5C
42 | 016C7B CB5C5C
43 | 015CA4 CC5C5C
44 | 0172D6 CD5C5C
45 | 0157A5 CE5C5C
46 | 0167E6 CF5C5C
47 | 0180D0 D05C5C
48 | 015A16 D15C5C
49 | 016D82 D25C5C
50 | 017F61 D35C5C
51 | 015045 D45C5C
52 | 0160DD D55C5C
53 | 01727E D65C5C
54 | 01838D D75C5C
55 | 01509C D85C5C
56 | 0163CA D95C5C
57 | 01712E DA5C5C
58 | 01833B DB5C5C
59 | 019103 DC5C5C
60 | 015E4B DD5C5C
61 | 016EDC DE5C5C
62 | 017D85 DF5C5C
63 | 018D68 E05C5C
64 | 01587F E15C5C
65 | 0169D9 E25C5C
66 | 017BA4 E35C5C
67 | 018E0A E45C5C
68 | 015AF9 E55C5C
69 | 016F7F E65C5C
70 | 01850C E75C5C
71 | 019186 E85C5C
72 | 015B1E E95C5C
73 | 017366 EA5C5C
74 | 01878F EB5C5C
75 | 019924 EC5C5C
76 | 0171E1 ED5C5C
77 | 0187B0 EE5C5C
78 | 0199F9 EF5C5C
79 | 017912 F05C5C
80 | 0193AA F15C5C
81 | 017019 F25C5C
82 | 019140 F35C5C
83 | 017035 F45C5C
84 | 019A31 F55C5C
85 | 019145 F65C5C
86 | 018D15 F75C5C
87 | 019C4B F85C5C
88 | 019C6D F95C5C
89 |
--------------------------------------------------------------------------------
/modules/from/_CP1255.txt:
--------------------------------------------------------------------------------
1 | # http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1255.TXT
2 | 80 0120AC
3 | 82 01201A
4 | 83 010192
5 | 84 01201E
6 | 85 012026
7 | 86 012020
8 | 87 012021
9 | 88 0102C6
10 | 89 012030
11 | 8B 012039
12 | 91 012018
13 | 92 012019
14 | 93 01201C
15 | 94 01201D
16 | 95 012022
17 | 96 012013
18 | 97 012014
19 | 98 0102DC
20 | 99 012122
21 | 9B 01203A
22 | A0 01A0
23 | A1 01A1
24 | A2 01A2
25 | A3 01A3
26 | A4 0120AA
27 | A5 01A5
28 | A6 01A6
29 | A7 01A7
30 | A8 01A8
31 | A9 01A9
32 | AA 01D7
33 | AB 01AB
34 | AC 01AC
35 | AD 01AD
36 | AE 01AE
37 | AF 01AF
38 | B0 01B0
39 | B1 01B1
40 | B2 01B2
41 | B3 01B3
42 | B4 01B4
43 | B5 01B5
44 | B6 01B6
45 | B7 01B7
46 | B8 01B8
47 | B9 01B9
48 | BA 01F7
49 | BB 01BB
50 | BC 01BC
51 | BD 01BD
52 | BE 01BE
53 | BF 01BF
54 | C0 0105B0
55 | C1 0105B1
56 | C2 0105B2
57 | C3 0105B3
58 | C4 0105B4
59 | C5 0105B5
60 | C6 0105B6
61 | C7 0105B7
62 | C8 0105B8
63 | C9 0105B9
64 | CB 0105BB
65 | CC 0105BC
66 | CD 0105BD
67 | CE 0105BE
68 | CF 0105BF
69 | D0 0105C0
70 | D1 0105C1
71 | D2 0105C2
72 | D3 0105C3
73 | D4 0105F0
74 | D5 0105F1
75 | D6 0105F2
76 | D7 0105F3
77 | D8 0105F4
78 | E0 0105D0
79 | E1 0105D1
80 | E2 0105D2
81 | E3 0105D3
82 | E4 0105D4
83 | E5 0105D5
84 | E6 0105D6
85 | E7 0105D7
86 | E8 0105D8
87 | E9 0105D9
88 | EA 0105DA
89 | EB 0105DB
90 | EC 0105DC
91 | ED 0105DD
92 | EE 0105DE
93 | EF 0105DF
94 | F0 0105E0
95 | F1 0105E1
96 | F2 0105E2
97 | F3 0105E3
98 | F4 0105E4
99 | F5 0105E5
100 | F6 0105E6
101 | F7 0105E7
102 | F8 0105E8
103 | F9 0105E9
104 | FA 0105EA
105 | FD 01200E
106 | FE 01200F
107 |
--------------------------------------------------------------------------------
/modules/inter/ZHTW-WORDS.txt:
--------------------------------------------------------------------------------
1 | 014E00,01500B,01613F 014E00,01500B,019858
2 | 014E0D,018981,0191C7 014E0D,018981,0163A1
3 | 014E4B,01540E 014E4B,015F8C
4 | 014E91,016CB3 0196F2,016CB3
5 | 014EC0,014E48 014EC0,019EBC
6 | 014EE5,01540E 014EE5,015F8C
7 | 014F46,01613F 014F46,019858
8 | 015165,01591C 015165,01591C
9 | 0151E0,01591A 015E7E,01591A
10 | 0151E0,015EA6 015E7E,015EA6
11 | 015206,01949F 015206,019418
12 | 0153D1,01653E 01767C,01653E
13 | 0153D1,0171D2 01767C,0171D2
14 | 0153D1,01734E,0191D1 01767C,01734E,0191D1
15 | 0153D1,0173FE 01767C,0173FE
16 | 0153D1,019001 01767C,019001
17 | 01540E,015929 015F8C,015929
18 | 01540E,016094 015F8C,016094
19 | 01540E,01679C 015F8C,01679C
20 | 015411,015C0E 0156AE,015C0E
21 | 015446,016703 015F85,016703
22 | 01591A,014E48 01591A,019EBC
23 | 01591C,014E4B,01540E 01591C,014E4B,01540E
24 | 015C6C,014E8E 015C6C,0165BC
25 | 015F69,014E91 015F69,0196F2
26 | 015F81,015A5A 015FB5,015A5A
27 | 015F81,016C42 015FB5,016C42
28 | 01600E,014E48 01600E,019EBC
29 | 01613F,01541B 019858,01541B
30 | 016642,01949F 016642,019418
31 | 016700,01540E 016700,015F8C
32 | 016709,0168F1,016709,0189D2 016709,017A1C,016709,0189D2
33 | 0168F1,0189D2 017A1C,0189D2
34 | 016A4B,016881 016A4B,016A11
35 | 016A6B,016881 016A6B,016A11
36 | 016D77,0191CC 016D77,0188E1
37 | 017528,014E8E 017528,0165BC
38 | 0179D2,01949F 0179D2,019418
39 | 01820D,015F97 016368,015F97
40 | 018868,015F81 018868,015FB5
41 | 018C61,015F81 018C61,015FB5
42 | 019019,014E48 019019,019EBC
43 | 019019,0191CC 019019,0188E1
44 | 0190A3,014E48 0190A3,019EBC
45 | 0190A3,0191CC 0190A3,0188E1
46 | 0191C7,016AB3,016994 0163A1,016AB3,016994
47 | 0191CC,019762 0188E1,019762
48 | 01949F,01611B 01937E,01611B
49 | 019632,018303 019632,017BC4
50 | 01982D,0153D1 01982D,019AEE
51 |
--------------------------------------------------------------------------------
/modules/from/ANSI-CONTROL.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "../../src/bsdconv.h"
4 |
5 | #define F_CLEAR 0
6 | #define F_A 1
7 | #define F_B 2
8 |
9 | struct my_s {
10 | char *buf;
11 | char *p,f;
12 | };
13 |
14 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){
15 | struct my_s *r=malloc(sizeof(struct my_s));
16 | r->buf=malloc(32);
17 | THIS_CODEC(ins)->priv=r;
18 | return 0;
19 | }
20 |
21 | void cbinit(struct bsdconv_instance *ins){
22 | struct my_s *r=THIS_CODEC(ins)->priv;
23 | r->p=r->buf;
24 | r->f=0;
25 | }
26 |
27 | void cbdestroy(struct bsdconv_instance *ins){
28 | struct my_s *r=THIS_CODEC(ins)->priv;
29 | free(r->buf);
30 | free(r);
31 | }
32 |
33 | void cbconv(struct bsdconv_instance *ins){
34 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
35 | struct my_s *t=THIS_CODEC(ins)->priv;
36 | char d=CP(this_phase->curr->data)[this_phase->i];
37 |
38 | if(t->f){
39 | *(t->p)=d;
40 | t->p+=1;
41 | this_phase->state.status=CONTINUE;
42 | if((d>='a' && d<='z') || (d>='A' && d<='N') || (d>='P' && d<='Z') || (t->p - t->buf)==30 || ((t->p - t->buf)==2 && d!='[')){
43 | DATA_MALLOC(ins, this_phase->data_tail->next);
44 | this_phase->data_tail=this_phase->data_tail->next;
45 | this_phase->data_tail->next=NULL;
46 | this_phase->data_tail->len=t->p - t->buf;
47 | this_phase->data_tail->flags=F_FREE;
48 | this_phase->state.status=NEXTPHASE;
49 | this_phase->data_tail->data=t->buf;
50 | t->f=0;
51 | t->buf=malloc(32);
52 | t->p=t->buf;
53 | }
54 | }else if(d==0x1b){
55 | t->f=1;
56 | *(t->p)=d;
57 | t->p+=1;
58 | this_phase->state.status=CONTINUE;
59 | }else{
60 | this_phase->state.status=DEADEND;
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/testsuite/api.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | int main(int argc, char *argv[]){
6 | char *in=strdup("utf-8:utf-8,ascii");
7 | char *out;
8 | char *expect;
9 | struct bsdconv_instance *ins;
10 |
11 | expect="ASCII,_UTF-8,ANY#013F&ERROR:count#lowercase:AsCiI,any#3f";
12 | ins=bsdconv_create("utf-8,3f:count#lowercase:AsCiI,any#3f");
13 | out=bsdconv_pack(ins);
14 | if(strcmp(expect, out)){
15 | printf("Test failed at bsdconv_pack\nexpect: %s\nresult: %s\n", expect, out);
16 | return 1;
17 | }
18 | free(out);
19 | bsdconv_destroy(ins);
20 |
21 | expect="utf-8:upper:utf-8,ascii";
22 | out=bsdconv_insert_phase(in, "upper", INTER, 1);
23 | if(strcmp(expect, out)){
24 | printf("Test failed at bsdconv_insert_phase\nexpect: %s\nresult: %s\n", expect, out);
25 | return 1;
26 | }
27 | free(in);
28 |
29 | in=out;
30 | expect="utf-8:full:utf-8,ascii";
31 | out=bsdconv_replace_phase(in, "full", INTER, 1);
32 | if(strcmp(expect, out)){
33 | printf("Test failed at bsdconv_replace_phase\nexpect: %s\nresult: %s\n", expect, out);
34 | return 1;
35 | }
36 | free(in);
37 |
38 | in=out;
39 | expect="utf-8:full:utf-8,big5";
40 | out=bsdconv_replace_codec(in, "big5", 2, 1);
41 | if(strcmp(expect, out)){
42 | printf("Test failed at bsdconv_replace_codec\nexpect: %s\nresult: %s\n", expect, out);
43 | return 1;
44 | }
45 | free(in);
46 |
47 | in=out;
48 | expect="utf-8,ascii:full:utf-8,big5";
49 | out=bsdconv_insert_codec(in, "ascii", 0, 1);
50 | if(strcmp(expect, out)){
51 | printf("Test failed at bsdconv_insert_codec\nexpect: %s\nresult: %s\n", expect, out);
52 | return 1;
53 | }
54 | free(in);
55 |
56 | printf("API tests passed\n");
57 | free(out);
58 | return 0;
59 | }
60 |
--------------------------------------------------------------------------------
/modules/from/ASCII.txt:
--------------------------------------------------------------------------------
1 | 01 0101
2 | 02 0102
3 | 03 0103
4 | 04 0104
5 | 05 0105
6 | 06 0106
7 | 07 0107
8 | 08 0108
9 | 09 0109
10 | 0A 010A
11 | 0B 010B
12 | 0C 010C
13 | 0D 010D
14 | 0E 010E
15 | 0F 010F
16 | 10 0110
17 | 11 0111
18 | 12 0112
19 | 13 0113
20 | 14 0114
21 | 15 0115
22 | 16 0116
23 | 17 0117
24 | 18 0118
25 | 19 0119
26 | 1A 011A
27 | 1B 011B
28 | 1C 011C
29 | 1D 011D
30 | 1E 011E
31 | 1F 011F
32 | 20 0120
33 | 21 0121
34 | 22 0122
35 | 23 0123
36 | 24 0124
37 | 25 0125
38 | 26 0126
39 | 27 0127
40 | 28 0128
41 | 29 0129
42 | 2A 012A
43 | 2B 012B
44 | 2C 012C
45 | 2D 012D
46 | 2E 012E
47 | 2F 012F
48 | 30 0130
49 | 31 0131
50 | 32 0132
51 | 33 0133
52 | 34 0134
53 | 35 0135
54 | 36 0136
55 | 37 0137
56 | 38 0138
57 | 39 0139
58 | 3A 013A
59 | 3B 013B
60 | 3C 013C
61 | 3D 013D
62 | 3E 013E
63 | 3F 013F
64 | 40 0140
65 | 41 0141
66 | 42 0142
67 | 43 0143
68 | 44 0144
69 | 45 0145
70 | 46 0146
71 | 47 0147
72 | 48 0148
73 | 49 0149
74 | 4A 014A
75 | 4B 014B
76 | 4C 014C
77 | 4D 014D
78 | 4E 014E
79 | 4F 014F
80 | 50 0150
81 | 51 0151
82 | 52 0152
83 | 53 0153
84 | 54 0154
85 | 55 0155
86 | 56 0156
87 | 57 0157
88 | 58 0158
89 | 59 0159
90 | 5A 015A
91 | 5B 015B
92 | 5C 015C
93 | 5D 015D
94 | 5E 015E
95 | 5F 015F
96 | 60 0160
97 | 61 0161
98 | 62 0162
99 | 63 0163
100 | 64 0164
101 | 65 0165
102 | 66 0166
103 | 67 0167
104 | 68 0168
105 | 69 0169
106 | 6A 016A
107 | 6B 016B
108 | 6C 016C
109 | 6D 016D
110 | 6E 016E
111 | 6F 016F
112 | 70 0170
113 | 71 0171
114 | 72 0172
115 | 73 0173
116 | 74 0174
117 | 75 0175
118 | 76 0176
119 | 77 0177
120 | 78 0178
121 | 79 0179
122 | 7A 017A
123 | 7B 017B
124 | 7C 017C
125 | 7D 017D
126 | 7E 017E
127 | 7F 017F
128 |
--------------------------------------------------------------------------------
/modules/to/UTF-16LE.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "../../src/bsdconv.h"
5 |
6 | #define SWAP(a,b,i) ((i)=(a), (a)=(b), (b)=(i))
7 |
8 | void cbconv(struct bsdconv_instance *ins){
9 | char *data, *p, c;
10 | unsigned int len, i;
11 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
12 | data=this_phase->curr->data;
13 |
14 | data+=1;
15 | if(this_phase->curr->len > 3){
16 | this_phase->state.status=NEXTPHASE;
17 |
18 | DATA_MALLOC(ins, this_phase->data_tail->next);
19 | this_phase->data_tail=this_phase->data_tail->next;
20 | this_phase->data_tail->next=NULL;
21 | this_phase->data_tail->len=4;
22 | this_phase->data_tail->flags=F_FREE;
23 | p=this_phase->data_tail->data=malloc(4);
24 |
25 | c=*data-1;
26 | *p=bb11011000;
27 | *p |= (c >> 2) & bb00000011;
28 | ++p;
29 | *p=(c << 6) & bb11000000;
30 | ++data;
31 | *p |= (*data >> 2) & bb00111111;
32 | ++p;
33 | *p=bb11011100;
34 | *p |= *data & bb00000011;
35 | ++p;
36 | ++data;
37 | *p=*data;
38 |
39 | data=this_phase->data_tail->data;
40 |
41 | SWAP(data[0],data[1],i);
42 | SWAP(data[2],data[3],i);
43 | }else{
44 | this_phase->state.status=NEXTPHASE;
45 | len=this_phase->curr->len-1;
46 |
47 | DATA_MALLOC(ins, this_phase->data_tail->next);
48 | this_phase->data_tail=this_phase->data_tail->next;
49 | this_phase->data_tail->next=NULL;
50 | this_phase->data_tail->len=2;
51 | this_phase->data_tail->flags=F_FREE;
52 | this_phase->data_tail->data=malloc(2);
53 | for(i=0;i<2-len;++i){
54 | CP(this_phase->data_tail->data)[i]=0x0;
55 | }
56 | memcpy(CP(this_phase->data_tail->data)+i, data, len);
57 | data=this_phase->data_tail->data;
58 |
59 | SWAP(data[0],data[1],i);
60 | }
61 | return;
62 | }
63 |
--------------------------------------------------------------------------------
/modules/from/_CP1253.txt:
--------------------------------------------------------------------------------
1 | # http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1253.TXT
2 | 80 0120AC
3 | 82 01201A
4 | 83 010192
5 | 84 01201E
6 | 85 012026
7 | 86 012020
8 | 87 012021
9 | 89 012030
10 | 8B 012039
11 | 91 012018
12 | 92 012019
13 | 93 01201C
14 | 94 01201D
15 | 95 012022
16 | 96 012013
17 | 97 012014
18 | 99 012122
19 | 9B 01203A
20 | A0 01A0
21 | A1 010385
22 | A2 010386
23 | A3 01A3
24 | A4 01A4
25 | A5 01A5
26 | A6 01A6
27 | A7 01A7
28 | A8 01A8
29 | A9 01A9
30 | AB 01AB
31 | AC 01AC
32 | AD 01AD
33 | AE 01AE
34 | AF 012015
35 | B0 01B0
36 | B1 01B1
37 | B2 01B2
38 | B3 01B3
39 | B4 010384
40 | B5 01B5
41 | B6 01B6
42 | B7 01B7
43 | B8 010388
44 | B9 010389
45 | BA 01038A
46 | BB 01BB
47 | BC 01038C
48 | BD 01BD
49 | BE 01038E
50 | BF 01038F
51 | C0 010390
52 | C1 010391
53 | C2 010392
54 | C3 010393
55 | C4 010394
56 | C5 010395
57 | C6 010396
58 | C7 010397
59 | C8 010398
60 | C9 010399
61 | CA 01039A
62 | CB 01039B
63 | CC 01039C
64 | CD 01039D
65 | CE 01039E
66 | CF 01039F
67 | D0 0103A0
68 | D1 0103A1
69 | D3 0103A3
70 | D4 0103A4
71 | D5 0103A5
72 | D6 0103A6
73 | D7 0103A7
74 | D8 0103A8
75 | D9 0103A9
76 | DA 0103AA
77 | DB 0103AB
78 | DC 0103AC
79 | DD 0103AD
80 | DE 0103AE
81 | DF 0103AF
82 | E0 0103B0
83 | E1 0103B1
84 | E2 0103B2
85 | E3 0103B3
86 | E4 0103B4
87 | E5 0103B5
88 | E6 0103B6
89 | E7 0103B7
90 | E8 0103B8
91 | E9 0103B9
92 | EA 0103BA
93 | EB 0103BB
94 | EC 0103BC
95 | ED 0103BD
96 | EE 0103BE
97 | EF 0103BF
98 | F0 0103C0
99 | F1 0103C1
100 | F2 0103C2
101 | F3 0103C3
102 | F4 0103C4
103 | F5 0103C5
104 | F6 0103C6
105 | F7 0103C7
106 | F8 0103C8
107 | F9 0103C9
108 | FA 0103CA
109 | FB 0103CB
110 | FC 0103CC
111 | FD 0103CD
112 | FE 0103CE
113 |
--------------------------------------------------------------------------------
/modules/to/_CP1253.txt:
--------------------------------------------------------------------------------
1 | # transposed from from/_CP1253
2 | 0120AC 80
3 | 01201A 82
4 | 010192 83
5 | 01201E 84
6 | 012026 85
7 | 012020 86
8 | 012021 87
9 | 012030 89
10 | 012039 8B
11 | 012018 91
12 | 012019 92
13 | 01201C 93
14 | 01201D 94
15 | 012022 95
16 | 012013 96
17 | 012014 97
18 | 012122 99
19 | 01203A 9B
20 | 0100A0 A0
21 | 010385 A1
22 | 010386 A2
23 | 0100A3 A3
24 | 0100A4 A4
25 | 0100A5 A5
26 | 0100A6 A6
27 | 0100A7 A7
28 | 0100A8 A8
29 | 0100A9 A9
30 | 0100AB AB
31 | 0100AC AC
32 | 0100AD AD
33 | 0100AE AE
34 | 012015 AF
35 | 0100B0 B0
36 | 0100B1 B1
37 | 0100B2 B2
38 | 0100B3 B3
39 | 010384 B4
40 | 0100B5 B5
41 | 0100B6 B6
42 | 0100B7 B7
43 | 010388 B8
44 | 010389 B9
45 | 01038A BA
46 | 0100BB BB
47 | 01038C BC
48 | 0100BD BD
49 | 01038E BE
50 | 01038F BF
51 | 010390 C0
52 | 010391 C1
53 | 010392 C2
54 | 010393 C3
55 | 010394 C4
56 | 010395 C5
57 | 010396 C6
58 | 010397 C7
59 | 010398 C8
60 | 010399 C9
61 | 01039A CA
62 | 01039B CB
63 | 01039C CC
64 | 01039D CD
65 | 01039E CE
66 | 01039F CF
67 | 0103A0 D0
68 | 0103A1 D1
69 | 0103A3 D3
70 | 0103A4 D4
71 | 0103A5 D5
72 | 0103A6 D6
73 | 0103A7 D7
74 | 0103A8 D8
75 | 0103A9 D9
76 | 0103AA DA
77 | 0103AB DB
78 | 0103AC DC
79 | 0103AD DD
80 | 0103AE DE
81 | 0103AF DF
82 | 0103B0 E0
83 | 0103B1 E1
84 | 0103B2 E2
85 | 0103B3 E3
86 | 0103B4 E4
87 | 0103B5 E5
88 | 0103B6 E6
89 | 0103B7 E7
90 | 0103B8 E8
91 | 0103B9 E9
92 | 0103BA EA
93 | 0103BB EB
94 | 0103BC EC
95 | 0103BD ED
96 | 0103BE EE
97 | 0103BF EF
98 | 0103C0 F0
99 | 0103C1 F1
100 | 0103C2 F2
101 | 0103C3 F3
102 | 0103C4 F4
103 | 0103C5 F5
104 | 0103C6 F6
105 | 0103C7 F7
106 | 0103C8 F8
107 | 0103C9 F9
108 | 0103CA FA
109 | 0103CB FB
110 | 0103CC FC
111 | 0103CD FD
112 | 0103CE FE
113 |
--------------------------------------------------------------------------------
/modules/from/_CP1258.txt:
--------------------------------------------------------------------------------
1 | # http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1258.TXT
2 | 80 0120AC
3 | 82 01201A
4 | 83 010192
5 | 84 01201E
6 | 85 012026
7 | 86 012020
8 | 87 012021
9 | 88 0102C6
10 | 89 012030
11 | 8B 012039
12 | 8C 010152
13 | 91 012018
14 | 92 012019
15 | 93 01201C
16 | 94 01201D
17 | 95 012022
18 | 96 012013
19 | 97 012014
20 | 98 0102DC
21 | 99 012122
22 | 9B 01203A
23 | 9C 010153
24 | 9F 010178
25 | A0 01A0
26 | A1 01A1
27 | A2 01A2
28 | A3 01A3
29 | A4 01A4
30 | A5 01A5
31 | A6 01A6
32 | A7 01A7
33 | A8 01A8
34 | A9 01A9
35 | AA 01AA
36 | AB 01AB
37 | AC 01AC
38 | AD 01AD
39 | AE 01AE
40 | AF 01AF
41 | B0 01B0
42 | B1 01B1
43 | B2 01B2
44 | B3 01B3
45 | B4 01B4
46 | B5 01B5
47 | B6 01B6
48 | B7 01B7
49 | B8 01B8
50 | B9 01B9
51 | BA 01BA
52 | BB 01BB
53 | BC 01BC
54 | BD 01BD
55 | BE 01BE
56 | BF 01BF
57 | C0 01C0
58 | C1 01C1
59 | C2 01C2
60 | C3 010102
61 | C4 01C4
62 | C5 01C5
63 | C6 01C6
64 | C7 01C7
65 | C8 01C8
66 | C9 01C9
67 | CA 01CA
68 | CB 01CB
69 | CC 010300
70 | CD 01CD
71 | CE 01CE
72 | CF 01CF
73 | D0 010110
74 | D1 01D1
75 | D2 010309
76 | D3 01D3
77 | D4 01D4
78 | D5 0101A0
79 | D6 01D6
80 | D7 01D7
81 | D8 01D8
82 | D9 01D9
83 | DA 01DA
84 | DB 01DB
85 | DC 01DC
86 | DD 0101AF
87 | DE 010303
88 | DF 01DF
89 | E0 01E0
90 | E1 01E1
91 | E2 01E2
92 | E3 010103
93 | E4 01E4
94 | E5 01E5
95 | E6 01E6
96 | E7 01E7
97 | E8 01E8
98 | E9 01E9
99 | EA 01EA
100 | EB 01EB
101 | EC 010301
102 | ED 01ED
103 | EE 01EE
104 | EF 01EF
105 | F0 010111
106 | F1 01F1
107 | F2 010323
108 | F3 01F3
109 | F4 01F4
110 | F5 0101A1
111 | F6 01F6
112 | F7 01F7
113 | F8 01F8
114 | F9 01F9
115 | FA 01FA
116 | FB 01FB
117 | FC 01FC
118 | FD 0101B0
119 | FE 0120AB
120 | FF 01FF
121 |
--------------------------------------------------------------------------------
/modules/to/_CP1252.txt:
--------------------------------------------------------------------------------
1 | #transposed from from/_CP1252.txt
2 | 0120AC 80
3 | 01201A 82
4 | 010192 83
5 | 01201E 84
6 | 012026 85
7 | 012020 86
8 | 012021 87
9 | 0102C6 88
10 | 012030 89
11 | 010160 8A
12 | 012039 8B
13 | 010152 8C
14 | 01017D 8E
15 | 012018 91
16 | 012019 92
17 | 01201C 93
18 | 01201D 94
19 | 012022 95
20 | 012013 96
21 | 012014 97
22 | 0102DC 98
23 | 012122 99
24 | 010161 9A
25 | 01203A 9B
26 | 010153 9C
27 | 01017E 9E
28 | 010178 9F
29 | 01A0 A0
30 | 01A1 A1
31 | 01A2 A2
32 | 01A3 A3
33 | 01A4 A4
34 | 01A5 A5
35 | 01A6 A6
36 | 01A7 A7
37 | 01A8 A8
38 | 01A9 A9
39 | 01AA AA
40 | 01AB AB
41 | 01AC AC
42 | 01AD AD
43 | 01AE AE
44 | 01AF AF
45 | 01B0 B0
46 | 01B1 B1
47 | 01B2 B2
48 | 01B3 B3
49 | 01B4 B4
50 | 01B5 B5
51 | 01B6 B6
52 | 01B7 B7
53 | 01B8 B8
54 | 01B9 B9
55 | 01BA BA
56 | 01BB BB
57 | 01BC BC
58 | 01BD BD
59 | 01BE BE
60 | 01BF BF
61 | 01C0 C0
62 | 01C1 C1
63 | 01C2 C2
64 | 01C3 C3
65 | 01C4 C4
66 | 01C5 C5
67 | 01C6 C6
68 | 01C7 C7
69 | 01C8 C8
70 | 01C9 C9
71 | 01CA CA
72 | 01CB CB
73 | 01CC CC
74 | 01CD CD
75 | 01CE CE
76 | 01CF CF
77 | 01D0 D0
78 | 01D1 D1
79 | 01D2 D2
80 | 01D3 D3
81 | 01D4 D4
82 | 01D5 D5
83 | 01D6 D6
84 | 01D7 D7
85 | 01D8 D8
86 | 01D9 D9
87 | 01DA DA
88 | 01DB DB
89 | 01DC DC
90 | 01DD DD
91 | 01DE DE
92 | 01DF DF
93 | 01E0 E0
94 | 01E1 E1
95 | 01E2 E2
96 | 01E3 E3
97 | 01E4 E4
98 | 01E5 E5
99 | 01E6 E6
100 | 01E7 E7
101 | 01E8 E8
102 | 01E9 E9
103 | 01EA EA
104 | 01EB EB
105 | 01EC EC
106 | 01ED ED
107 | 01EE EE
108 | 01EF EF
109 | 01F0 F0
110 | 01F1 F1
111 | 01F2 F2
112 | 01F3 F3
113 | 01F4 F4
114 | 01F5 F5
115 | 01F6 F6
116 | 01F7 F7
117 | 01F8 F8
118 | 01F9 F9
119 | 01FA FA
120 | 01FB FB
121 | 01FC FC
122 | 01FD FD
123 | 01FE FE
124 | 01FF FF
125 |
--------------------------------------------------------------------------------
/modules/from/_CP1254.txt:
--------------------------------------------------------------------------------
1 | # http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1254.TXT
2 | 80 0120AC
3 | 82 01201A
4 | 83 010192
5 | 84 01201E
6 | 85 012026
7 | 86 012020
8 | 87 012021
9 | 88 0102C6
10 | 89 012030
11 | 8A 010160
12 | 8B 012039
13 | 8C 010152
14 | 91 012018
15 | 92 012019
16 | 93 01201C
17 | 94 01201D
18 | 95 012022
19 | 96 012013
20 | 97 012014
21 | 98 0102DC
22 | 99 012122
23 | 9A 010161
24 | 9B 01203A
25 | 9C 010153
26 | 9F 010178
27 | A0 01A0
28 | A1 01A1
29 | A2 01A2
30 | A3 01A3
31 | A4 01A4
32 | A5 01A5
33 | A6 01A6
34 | A7 01A7
35 | A8 01A8
36 | A9 01A9
37 | AA 01AA
38 | AB 01AB
39 | AC 01AC
40 | AD 01AD
41 | AE 01AE
42 | AF 01AF
43 | B0 01B0
44 | B1 01B1
45 | B2 01B2
46 | B3 01B3
47 | B4 01B4
48 | B5 01B5
49 | B6 01B6
50 | B7 01B7
51 | B8 01B8
52 | B9 01B9
53 | BA 01BA
54 | BB 01BB
55 | BC 01BC
56 | BD 01BD
57 | BE 01BE
58 | BF 01BF
59 | C0 01C0
60 | C1 01C1
61 | C2 01C2
62 | C3 01C3
63 | C4 01C4
64 | C5 01C5
65 | C6 01C6
66 | C7 01C7
67 | C8 01C8
68 | C9 01C9
69 | CA 01CA
70 | CB 01CB
71 | CC 01CC
72 | CD 01CD
73 | CE 01CE
74 | CF 01CF
75 | D0 01011E
76 | D1 01D1
77 | D2 01D2
78 | D3 01D3
79 | D4 01D4
80 | D5 01D5
81 | D6 01D6
82 | D7 01D7
83 | D8 01D8
84 | D9 01D9
85 | DA 01DA
86 | DB 01DB
87 | DC 01DC
88 | DD 010130
89 | DE 01015E
90 | DF 01DF
91 | E0 01E0
92 | E1 01E1
93 | E2 01E2
94 | E3 01E3
95 | E4 01E4
96 | E5 01E5
97 | E6 01E6
98 | E7 01E7
99 | E8 01E8
100 | E9 01E9
101 | EA 01EA
102 | EB 01EB
103 | EC 01EC
104 | ED 01ED
105 | EE 01EE
106 | EF 01EF
107 | F0 01011F
108 | F1 01F1
109 | F2 01F2
110 | F3 01F3
111 | F4 01F4
112 | F5 01F5
113 | F6 01F6
114 | F7 01F7
115 | F8 01F8
116 | F9 01F9
117 | FA 01FA
118 | FB 01FB
119 | FC 01FC
120 | FD 010131
121 | FE 01015F
122 | FF 01FF
123 |
--------------------------------------------------------------------------------
/modules/from/_CP1257.txt:
--------------------------------------------------------------------------------
1 | # http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1257.TXT
2 | 80 0120AC
3 | 82 01201A
4 | 84 01201E
5 | 85 012026
6 | 86 012020
7 | 87 012021
8 | 89 012030
9 | 8B 012039
10 | 8D 01A8
11 | 8E 0102C7
12 | 8F 01B8
13 | 91 012018
14 | 92 012019
15 | 93 01201C
16 | 94 01201D
17 | 95 012022
18 | 96 012013
19 | 97 012014
20 | 99 012122
21 | 9B 01203A
22 | 9D 01AF
23 | 9E 0102DB
24 | A0 01A0
25 | A2 01A2
26 | A3 01A3
27 | A4 01A4
28 | A6 01A6
29 | A7 01A7
30 | A8 01D8
31 | A9 01A9
32 | AA 010156
33 | AB 01AB
34 | AC 01AC
35 | AD 01AD
36 | AE 01AE
37 | AF 01C6
38 | B0 01B0
39 | B1 01B1
40 | B2 01B2
41 | B3 01B3
42 | B4 01B4
43 | B5 01B5
44 | B6 01B6
45 | B7 01B7
46 | B8 01F8
47 | B9 01B9
48 | BA 010157
49 | BB 01BB
50 | BC 01BC
51 | BD 01BD
52 | BE 01BE
53 | BF 01E6
54 | C0 010104
55 | C1 01012E
56 | C2 010100
57 | C3 010106
58 | C4 01C4
59 | C5 01C5
60 | C6 010118
61 | C7 010112
62 | C8 01010C
63 | C9 01C9
64 | CA 010179
65 | CB 010116
66 | CC 010122
67 | CD 010136
68 | CE 01012A
69 | CF 01013B
70 | D0 010160
71 | D1 010143
72 | D2 010145
73 | D3 01D3
74 | D4 01014C
75 | D5 01D5
76 | D6 01D6
77 | D7 01D7
78 | D8 010172
79 | D9 010141
80 | DA 01015A
81 | DB 01016A
82 | DC 01DC
83 | DD 01017B
84 | DE 01017D
85 | DF 01DF
86 | E0 010105
87 | E1 01012F
88 | E2 010101
89 | E3 010107
90 | E4 01E4
91 | E5 01E5
92 | E6 010119
93 | E7 010113
94 | E8 01010D
95 | E9 01E9
96 | EA 01017A
97 | EB 010117
98 | EC 010123
99 | ED 010137
100 | EE 01012B
101 | EF 01013C
102 | F0 010161
103 | F1 010144
104 | F2 010146
105 | F3 01F3
106 | F4 01014D
107 | F5 01F5
108 | F6 01F6
109 | F7 01F7
110 | F8 010173
111 | F9 010142
112 | FA 01015B
113 | FB 01016B
114 | FC 01FC
115 | FD 01017C
116 | FE 01017E
117 | FF 0102D9
118 |
--------------------------------------------------------------------------------
/src/libbsdconv_hash.c:
--------------------------------------------------------------------------------
1 | void bsdconv_hash_set(struct bsdconv_instance *ins, const char *key, void *ptr){
2 | char *tk;
3 | void *tp;
4 | struct bsdconv_hash_entry *p=ins->hash;
5 | while(p!=NULL){
6 | if(strcmp(p->key, key)==0){
7 | tp=ptr;
8 | tk=p->key;
9 | p->key=ins->hash->key;
10 | p->ptr=ins->hash->ptr;
11 | ins->hash->key=tk;
12 | ins->hash->ptr=tp;
13 | return;
14 | }
15 | p=p->next;
16 | }
17 | p=malloc(sizeof(struct bsdconv_hash_entry));
18 | p->next=ins->hash;
19 | ins->hash=p;
20 | p->key=strdup(key);
21 | p->ptr=ptr;
22 | return;
23 | }
24 |
25 | void *bsdconv_hash_get(struct bsdconv_instance *ins, const char *key){
26 | char *tk;
27 | void *tp;
28 | struct bsdconv_hash_entry *p=ins->hash;
29 | while(p!=NULL){
30 | if(strcmp(p->key, key)==0){
31 | tk=p->key;
32 | tp=p->ptr;
33 | p->key=ins->hash->key;
34 | p->ptr=ins->hash->ptr;
35 | ins->hash->key=tk;
36 | ins->hash->ptr=tp;
37 | return p->ptr;
38 | }
39 | p=p->next;
40 | }
41 | return NULL;
42 | }
43 |
44 | int bsdconv_hash_has(struct bsdconv_instance *ins, const char *key){
45 | char *tk;
46 | void *tp;
47 | struct bsdconv_hash_entry *p=ins->hash;
48 | while(p!=NULL){
49 | if(strcmp(p->key, key)==0){
50 | tk=p->key;
51 | tp=p->ptr;
52 | p->key=ins->hash->key;
53 | p->ptr=ins->hash->ptr;
54 | ins->hash->key=tk;
55 | ins->hash->ptr=tp;
56 | return 1;
57 | }
58 | p=p->next;
59 | }
60 | return 0;
61 | }
62 |
63 | void bsdconv_hash_del(struct bsdconv_instance *ins, const char *key){
64 | struct bsdconv_hash_entry *p=ins->hash;
65 | struct bsdconv_hash_entry **q=&ins->hash;
66 | while(p!=NULL){
67 | if(strcmp(p->key, key)==0){
68 | free(p->key);
69 | *q=p->next;
70 | free(p);
71 | return;
72 | }
73 | p=p->next;
74 | q=&p->next;
75 | }
76 | }
77 |
--------------------------------------------------------------------------------
/modules/to/ASCII.txt:
--------------------------------------------------------------------------------
1 | #transposed from from/ASCII
2 | # add 0102DC record
3 | 0101 01
4 | 0102 02
5 | 0103 03
6 | 0104 04
7 | 0105 05
8 | 0106 06
9 | 0107 07
10 | 0108 08
11 | 0109 09
12 | 010A 0A
13 | 010B 0B
14 | 010C 0C
15 | 010D 0D
16 | 010E 0E
17 | 010F 0F
18 | 0110 10
19 | 0111 11
20 | 0112 12
21 | 0113 13
22 | 0114 14
23 | 0115 15
24 | 0116 16
25 | 0117 17
26 | 0118 18
27 | 0119 19
28 | 011A 1A
29 | 011B 1B
30 | 011C 1C
31 | 011D 1D
32 | 011E 1E
33 | 011F 1F
34 | 0120 20
35 | 0121 21
36 | 0122 22
37 | 0123 23
38 | 0124 24
39 | 0125 25
40 | 0126 26
41 | 0127 27
42 | 0128 28
43 | 0129 29
44 | 012A 2A
45 | 012B 2B
46 | 012C 2C
47 | 012D 2D
48 | 012E 2E
49 | 012F 2F
50 | 0130 30
51 | 0131 31
52 | 0132 32
53 | 0133 33
54 | 0134 34
55 | 0135 35
56 | 0136 36
57 | 0137 37
58 | 0138 38
59 | 0139 39
60 | 013A 3A
61 | 013B 3B
62 | 013C 3C
63 | 013D 3D
64 | 013E 3E
65 | 013F 3F
66 | 0140 40
67 | 0141 41
68 | 0142 42
69 | 0143 43
70 | 0144 44
71 | 0145 45
72 | 0146 46
73 | 0147 47
74 | 0148 48
75 | 0149 49
76 | 014A 4A
77 | 014B 4B
78 | 014C 4C
79 | 014D 4D
80 | 014E 4E
81 | 014F 4F
82 | 0150 50
83 | 0151 51
84 | 0152 52
85 | 0153 53
86 | 0154 54
87 | 0155 55
88 | 0156 56
89 | 0157 57
90 | 0158 58
91 | 0159 59
92 | 015A 5A
93 | 015B 5B
94 | 015C 5C
95 | 015D 5D
96 | 015E 5E
97 | 015F 5F
98 | 0160 60
99 | 0161 61
100 | 0162 62
101 | 0163 63
102 | 0164 64
103 | 0165 65
104 | 0166 66
105 | 0167 67
106 | 0168 68
107 | 0169 69
108 | 016A 6A
109 | 016B 6B
110 | 016C 6C
111 | 016D 6D
112 | 016E 6E
113 | 016F 6F
114 | 0170 70
115 | 0171 71
116 | 0172 72
117 | 0173 73
118 | 0174 74
119 | 0175 75
120 | 0176 76
121 | 0177 77
122 | 0178 78
123 | 0179 79
124 | 017A 7A
125 | 017B 7B
126 | 017C 7C
127 | 017D 7D
128 | 017E 7E
129 | 0102DC 7E
130 | 017F 7F
131 |
--------------------------------------------------------------------------------
/modules/from/_CP1252.txt:
--------------------------------------------------------------------------------
1 | # http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT
2 | 80 0120AC
3 | 82 01201A
4 | 83 010192
5 | 84 01201E
6 | 85 012026
7 | 86 012020
8 | 87 012021
9 | 88 0102C6
10 | 89 012030
11 | 8A 010160
12 | 8B 012039
13 | 8C 010152
14 | 8E 01017D
15 | 91 012018
16 | 92 012019
17 | 93 01201C
18 | 94 01201D
19 | 95 012022
20 | 96 012013
21 | 97 012014
22 | 98 0102DC
23 | 99 012122
24 | 9A 010161
25 | 9B 01203A
26 | 9C 010153
27 | 9E 01017E
28 | 9F 010178
29 | A0 01A0
30 | A1 01A1
31 | A2 01A2
32 | A3 01A3
33 | A4 01A4
34 | A5 01A5
35 | A6 01A6
36 | A7 01A7
37 | A8 01A8
38 | A9 01A9
39 | AA 01AA
40 | AB 01AB
41 | AC 01AC
42 | AD 01AD
43 | AE 01AE
44 | AF 01AF
45 | B0 01B0
46 | B1 01B1
47 | B2 01B2
48 | B3 01B3
49 | B4 01B4
50 | B5 01B5
51 | B6 01B6
52 | B7 01B7
53 | B8 01B8
54 | B9 01B9
55 | BA 01BA
56 | BB 01BB
57 | BC 01BC
58 | BD 01BD
59 | BE 01BE
60 | BF 01BF
61 | C0 01C0
62 | C1 01C1
63 | C2 01C2
64 | C3 01C3
65 | C4 01C4
66 | C5 01C5
67 | C6 01C6
68 | C7 01C7
69 | C8 01C8
70 | C9 01C9
71 | CA 01CA
72 | CB 01CB
73 | CC 01CC
74 | CD 01CD
75 | CE 01CE
76 | CF 01CF
77 | D0 01D0
78 | D1 01D1
79 | D2 01D2
80 | D3 01D3
81 | D4 01D4
82 | D5 01D5
83 | D6 01D6
84 | D7 01D7
85 | D8 01D8
86 | D9 01D9
87 | DA 01DA
88 | DB 01DB
89 | DC 01DC
90 | DD 01DD
91 | DE 01DE
92 | DF 01DF
93 | E0 01E0
94 | E1 01E1
95 | E2 01E2
96 | E3 01E3
97 | E4 01E4
98 | E5 01E5
99 | E6 01E6
100 | E7 01E7
101 | E8 01E8
102 | E9 01E9
103 | EA 01EA
104 | EB 01EB
105 | EC 01EC
106 | ED 01ED
107 | EE 01EE
108 | EF 01EF
109 | F0 01F0
110 | F1 01F1
111 | F2 01F2
112 | F3 01F3
113 | F4 01F4
114 | F5 01F5
115 | F6 01F6
116 | F7 01F7
117 | F8 01F8
118 | F9 01F9
119 | FA 01FA
120 | FB 01FB
121 | FC 01FC
122 | FD 01FD
123 | FE 01FE
124 | FF 01FF
125 |
--------------------------------------------------------------------------------
/modules/from/PASS.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "../../src/bsdconv.h"
5 |
6 | struct my_s{
7 | struct bsdconv_filter *filter;
8 | int unmark;
9 | };
10 |
11 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){
12 | struct my_s *r=malloc(sizeof(struct my_s));
13 | THIS_CODEC(ins)->priv=r;
14 | r->filter=NULL;
15 | r->unmark=0;
16 |
17 | char *filter=NULL;
18 | while(arg){
19 | if(strcasecmp(arg->key, "UNMARK")==0){
20 | r->unmark=1;
21 | }else if(strcasecmp(arg->key, "FOR")==0){
22 | filter=arg->ptr;
23 | }else{
24 | free(r);
25 | return EINVAL;
26 | }
27 | arg=arg->next;
28 | }
29 | if(filter!=NULL){
30 | r->filter=load_filter(filter);
31 | if(r->filter==NULL){
32 | free(r);
33 | return EOPNOTSUPP;
34 | }
35 | }
36 | return 0;
37 | }
38 |
39 | void cbdestroy(struct bsdconv_instance *ins){
40 | struct my_s *r=THIS_CODEC(ins)->priv;
41 | if(r->filter)
42 | unload_filter(r->filter);
43 | free(r);
44 | }
45 |
46 | void cbconv(struct bsdconv_instance *ins){
47 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
48 | struct my_s *t=THIS_CODEC(ins)->priv;
49 | int pass=1;
50 |
51 | if(this_phase->i!=0)
52 | pass=0;
53 | else if(t->filter!=NULL && !t->filter->cbfilter(this_phase->curr))
54 | pass=0;
55 | else if(t->unmark && !(this_phase->curr->flags & F_MARK))
56 | pass=0;
57 |
58 | if(pass){
59 | this_phase->data_tail->next=dup_data_rt(ins, this_phase->curr);
60 | this_phase->data_tail=this_phase->data_tail->next;
61 | this_phase->data_tail->next=NULL;
62 |
63 | if(t->unmark)
64 | this_phase->data_tail->flags &= ~F_MARK;
65 |
66 | this_phase->i=this_phase->curr->len-1;
67 | this_phase->state.status=NEXTPHASE;
68 | }else{
69 | this_phase->state.status=DEADEND;
70 | }
71 |
72 | return;
73 | }
74 |
--------------------------------------------------------------------------------
/modules/from/UTF-32BE.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "../../src/bsdconv.h"
4 |
5 |
6 | struct my_s{
7 | int status;
8 | char buf[4];
9 | };
10 |
11 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){
12 | THIS_CODEC(ins)->priv=malloc(sizeof(struct my_s));
13 | return 0;
14 | }
15 |
16 | void cbinit(struct bsdconv_instance *ins){
17 | struct my_s *r=THIS_CODEC(ins)->priv;
18 | r->status=0;
19 | }
20 |
21 | void cbdestroy(struct bsdconv_instance *ins){
22 | struct my_s *p=THIS_CODEC(ins)->priv;
23 | free(p);
24 | }
25 |
26 | void cbconv(struct bsdconv_instance *ins){
27 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
28 | struct my_s *t=THIS_CODEC(ins)->priv;
29 | char d;
30 | int i;
31 | size_t l;
32 |
33 | for(;this_phase->icurr->len;this_phase->i+=1){
34 | d=CP(this_phase->curr->data)[this_phase->i];
35 | switch(t->status){
36 | case 0:
37 | t->buf[0]=d;
38 | t->status=1;
39 | continue;
40 | break;
41 | case 1:
42 | t->buf[1]=d;
43 | t->status=2;
44 | continue;
45 | break;
46 | case 2:
47 | t->buf[2]=d;
48 | t->status=3;
49 | continue;
50 | break;
51 | case 3:
52 | t->buf[3]=d;
53 | t->status=0;
54 | for(i=0;i<4;++i){
55 | if(t->buf[i]) break;
56 | }
57 | l=(4-i)+1;
58 | DATA_MALLOC(ins, this_phase->data_tail->next);
59 | this_phase->data_tail=this_phase->data_tail->next;
60 | this_phase->data_tail->next=NULL;
61 | this_phase->data_tail->len=l;
62 | this_phase->data_tail->flags=F_FREE;
63 | this_phase->data_tail->data=malloc(l);
64 | CP(this_phase->data_tail->data)[0]=0x01;
65 | memcpy(CP(this_phase->data_tail->data)+1, &t->buf[i], l-1);
66 | this_phase->state.status=NEXTPHASE;
67 | return;
68 | break;
69 | }
70 | }
71 | this_phase->state.status=CONTINUE;
72 | return;
73 | }
74 |
--------------------------------------------------------------------------------
/modules/from/UTF-32LE.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "../../src/bsdconv.h"
4 |
5 |
6 | struct my_s{
7 | int status;
8 | char buf[4];
9 | };
10 |
11 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){
12 | THIS_CODEC(ins)->priv=malloc(sizeof(struct my_s));
13 | return 0;
14 | }
15 |
16 | void cbinit(struct bsdconv_instance *ins){
17 | struct my_s *r=THIS_CODEC(ins)->priv;
18 | r->status=0;
19 | }
20 |
21 | void cbdestroy(struct bsdconv_instance *ins){
22 | struct my_s *p=THIS_CODEC(ins)->priv;
23 | free(p);
24 | }
25 |
26 | void cbconv(struct bsdconv_instance *ins){
27 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
28 | struct my_s *t=THIS_CODEC(ins)->priv;
29 | char d;
30 | int i;
31 | size_t l;
32 |
33 | for(;this_phase->icurr->len;this_phase->i+=1){
34 | d=CP(this_phase->curr->data)[this_phase->i];
35 | switch(t->status){
36 | case 0:
37 | t->buf[3]=d;
38 | t->status=1;
39 | continue;
40 | break;
41 | case 1:
42 | t->buf[2]=d;
43 | t->status=2;
44 | continue;
45 | break;
46 | case 2:
47 | t->buf[1]=d;
48 | t->status=3;
49 | continue;
50 | break;
51 | case 3:
52 | t->buf[0]=d;
53 | t->status=0;
54 | for(i=0;i<4;++i){
55 | if(t->buf[i]) break;
56 | }
57 | l=(4-i)+1;
58 | DATA_MALLOC(ins, this_phase->data_tail->next);
59 | this_phase->data_tail=this_phase->data_tail->next;
60 | this_phase->data_tail->next=NULL;
61 | this_phase->data_tail->len=l;
62 | this_phase->data_tail->flags=F_FREE;
63 | this_phase->data_tail->data=malloc(l);
64 | CP(this_phase->data_tail->data)[0]=0x01;
65 | memcpy(CP(this_phase->data_tail->data)+1, &t->buf[i], l-1);
66 | this_phase->state.status=NEXTPHASE;
67 | return;
68 | break;
69 | }
70 | }
71 | this_phase->state.status=CONTINUE;
72 | return;
73 | }
74 |
--------------------------------------------------------------------------------
/modules/inter/INSERT.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "../../src/bsdconv.h"
4 |
5 | struct my_s{
6 | struct data_rt *after;
7 | struct data_rt *before;
8 | };
9 |
10 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){
11 | struct my_s *r=malloc(sizeof(struct my_s));
12 | int e;
13 | r->after=NULL;
14 | r->before=NULL;
15 |
16 | char *after=NULL;
17 | char *before=NULL;
18 | while(arg){
19 | if(strcasecmp(arg->key, "AFTER")==0){
20 | after=arg->ptr;
21 | }else if(strcasecmp(arg->key, "BEFORE")==0){
22 | before=arg->ptr;
23 | }else{
24 | return EINVAL;
25 | }
26 | arg=arg->next;
27 | }
28 |
29 | if(after){
30 | r->after=str2data(after, &e, ins);
31 | if(e){
32 | if(r->after)
33 | DATA_FREE(ins, r->after);
34 | free(r);
35 | return e;
36 | }
37 | }
38 |
39 | if(before){
40 | r->before=str2data(before, &e, ins);
41 | if(e){
42 | if(r->after)
43 | DATA_FREE(ins, r->after);
44 | if(r->before)
45 | DATA_FREE(ins, r->before);
46 | free(r);
47 | return e;
48 | }
49 | }
50 |
51 | THIS_CODEC(ins)->priv=r;
52 | return 0;
53 | }
54 |
55 | void cbdestroy(struct bsdconv_instance *ins){
56 | struct my_s *r=THIS_CODEC(ins)->priv;
57 | if(r->after)
58 | DATA_FREE(ins, r->after);
59 | if(r->before)
60 | DATA_FREE(ins, r->before);
61 | free(r);
62 | }
63 |
64 | void cbconv(struct bsdconv_instance *ins){
65 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
66 | struct my_s *r=THIS_CODEC(ins)->priv;
67 |
68 | if(r->before)
69 | LISTCPY(ins, this_phase->data_tail, r->before);
70 |
71 | this_phase->data_tail->next=dup_data_rt(ins, this_phase->curr);
72 | this_phase->data_tail=this_phase->data_tail->next;
73 | this_phase->data_tail->next=NULL;
74 |
75 | if(r->after)
76 | LISTCPY(ins, this_phase->data_tail, r->after);
77 |
78 | this_phase->state.status=NEXTPHASE;
79 | return;
80 | }
81 |
--------------------------------------------------------------------------------
/modules/inter/ALIAS-INTER.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "../../src/bsdconv.h"
4 |
5 |
6 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){
7 | THIS_CODEC(ins)->priv=bsdconv_create("ASCII:PASS");
8 | return 0;
9 | }
10 |
11 | void cbdestroy(struct bsdconv_instance *ins){
12 | bsdconv_destroy(THIS_CODEC(ins)->priv);
13 | }
14 |
15 | #define CNS11643_UNICODE "CNS11643-UNICODE"
16 |
17 | void cbconv(struct bsdconv_instance *ins){
18 | struct bsdconv_phase *this_phase = THIS_PHASE(ins);
19 | struct bsdconv_instance *uni = THIS_CODEC(ins)->priv;
20 | int found = 0;
21 | const char *s;
22 |
23 | if(bsdconv_module_check(INTER, CNS11643_UNICODE)){
24 | found += 1;
25 | s = CNS11643_UNICODE;
26 | bsdconv_init(uni);
27 | uni->input.data=strdup(s);
28 | uni->input.len=strlen(s);
29 | uni->input.flags=F_FREE;
30 | uni->input.next=NULL;
31 | uni->flush=1;
32 | bsdconv(uni);
33 | this_phase->data_tail->next=uni->phase[uni->phasen].data_head->next;
34 | uni->phase[uni->phasen].data_head->next=NULL;
35 | uni->phase[uni->phasen].data_tail=uni->phase[uni->phasen].data_head;
36 | while(this_phase->data_tail->next!=NULL){
37 | this_phase->data_tail=this_phase->data_tail->next;
38 | }
39 | }
40 |
41 | if(found==0){
42 | s = "PASS";
43 | bsdconv_init(uni);
44 | uni->input.data=strdup(s);
45 | uni->input.len=strlen(s);
46 | uni->input.flags=F_FREE;
47 | uni->input.next=NULL;
48 | uni->flush=1;
49 | bsdconv(uni);
50 | this_phase->data_tail->next=uni->phase[uni->phasen].data_head->next;
51 | uni->phase[uni->phasen].data_head->next=NULL;
52 | uni->phase[uni->phasen].data_tail=uni->phase[uni->phasen].data_head;
53 | while(this_phase->data_tail->next!=NULL){
54 | this_phase->data_tail=this_phase->data_tail->next;
55 | }
56 | }
57 |
58 | this_phase->state.status=NEXTPHASE;
59 | return;
60 | }
61 |
--------------------------------------------------------------------------------
/modules/inter/WIDTH.c:
--------------------------------------------------------------------------------
1 | #include "../../src/bsdconv.h"
2 |
3 | #define HALF 1
4 | #define FULL 2
5 | #define AMBI -1
6 |
7 | #include "_WIDTH.h"
8 |
9 | struct my_s{
10 | bsdconv_counter_t *full;
11 | bsdconv_counter_t *half;
12 | bsdconv_counter_t *ambi;
13 | };
14 |
15 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){
16 | struct my_s *r=THIS_CODEC(ins)->priv=malloc(sizeof(struct my_s));
17 |
18 | r->full=bsdconv_counter(ins, "FULL");
19 | r->half=bsdconv_counter(ins, "HALF");
20 | r->ambi=bsdconv_counter(ins, "AMBI");
21 | return 0;
22 | }
23 |
24 | void cbconv(struct bsdconv_instance *ins){
25 | struct my_s *r=THIS_CODEC(ins)->priv;
26 | unsigned char *data;
27 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
28 | data=this_phase->curr->data;
29 | int i;
30 | int max=sizeof(width_table) / sizeof(struct width_interval) - 1;
31 | int min = 0;
32 | int mid;
33 | uint32_t ucs=0;
34 |
35 | this_phase->data_tail->next=dup_data_rt(ins, this_phase->curr);
36 | this_phase->data_tail=this_phase->data_tail->next;
37 | this_phase->data_tail->next=NULL;
38 |
39 | if(data[0]==0x1){
40 | for(i=1;icurr->len;++i){
41 | ucs<<=8;
42 | ucs|=data[i];
43 | }
44 | if (ucs < width_table[0].beg || ucs > width_table[max].end){
45 | //noop
46 | }else while (max >= min) {
47 | mid = (min + max) / 2;
48 | if (ucs > width_table[mid].end)
49 | min = mid + 1;
50 | else if (ucs < width_table[mid].beg)
51 | max = mid - 1;
52 | else{
53 | switch(width_table[mid].width){
54 | case FULL:
55 | *(r->full)+=1;
56 | break;
57 | case HALF:
58 | *(r->half)+=1;
59 | break;
60 | case AMBI:
61 | *(r->ambi)+=1;
62 | break;
63 | }
64 | break;
65 | }
66 | }
67 | }
68 |
69 | this_phase->state.status=NEXTPHASE;
70 | return;
71 | }
72 |
73 |
74 | void cbdestroy(struct bsdconv_instance *ins){
75 | struct my_s *r=THIS_CODEC(ins)->priv;
76 | free(r);
77 | }
78 |
--------------------------------------------------------------------------------
/modules/inter/PASS.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include "../../src/bsdconv.h"
6 |
7 | struct my_s{
8 | struct bsdconv_filter *filter;
9 | int limit;
10 | int passed;
11 | };
12 |
13 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){
14 | struct my_s *r=malloc(sizeof(struct my_s));
15 | THIS_CODEC(ins)->priv=r;
16 | r->filter=NULL;
17 | r->limit=0;
18 | int i;
19 |
20 | char *filter=NULL;
21 | while(arg){
22 | if(strcasecmp(arg->key, "FOR")==0){
23 | filter=arg->ptr;
24 | }else if(strcasecmp(arg->key, "LIMIT")==0 && sscanf(arg->ptr, "%d", &i)==1){
25 | r->limit=i;
26 | }else{
27 | free(r);
28 | return EINVAL;
29 | }
30 | arg=arg->next;
31 | }
32 | if(filter!=NULL){
33 | r->filter=load_filter(filter);
34 | if(r->filter==NULL){
35 | free(r);
36 | return EOPNOTSUPP;
37 | }
38 | }
39 | return 0;
40 | }
41 |
42 | void cbinit(struct bsdconv_instance *ins){
43 | struct my_s *r=THIS_CODEC(ins)->priv;
44 | r->passed=0;
45 | }
46 |
47 | void cbdestroy(struct bsdconv_instance *ins){
48 | struct my_s *r=THIS_CODEC(ins)->priv;
49 | if(r->filter)
50 | unload_filter(r->filter);
51 | free(r);
52 | }
53 |
54 | void cbconv(struct bsdconv_instance *ins){
55 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
56 | struct my_s *t=THIS_CODEC(ins)->priv;
57 | int pass=1;
58 |
59 | if(t->filter!=NULL && !t->filter->cbfilter(this_phase->curr)){
60 | pass=0;
61 | }
62 |
63 | if(pass && t->limit!=0){
64 | if(t->passed < t->limit){
65 | t->passed += 1;
66 | }else{
67 | pass=0;
68 | }
69 | }
70 |
71 | if(pass){
72 | this_phase->data_tail->next=dup_data_rt(ins, this_phase->curr);
73 | this_phase->data_tail=this_phase->data_tail->next;
74 | this_phase->data_tail->next=NULL;
75 |
76 | this_phase->i=this_phase->curr->len-1;
77 | this_phase->state.status=NEXTPHASE;
78 | }else{
79 | this_phase->state.status=DEADEND;
80 | }
81 |
82 | return;
83 | }
84 |
--------------------------------------------------------------------------------
/modules/from/_CP1256.txt:
--------------------------------------------------------------------------------
1 | # http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1256.TXT
2 | 80 0120AC
3 | 81 01067E
4 | 82 01201A
5 | 83 010192
6 | 84 01201E
7 | 85 012026
8 | 86 012020
9 | 87 012021
10 | 88 0102C6
11 | 89 012030
12 | 8A 010679
13 | 8B 012039
14 | 8C 010152
15 | 8D 010686
16 | 8E 010698
17 | 8F 010688
18 | 90 0106AF
19 | 91 012018
20 | 92 012019
21 | 93 01201C
22 | 94 01201D
23 | 95 012022
24 | 96 012013
25 | 97 012014
26 | 98 0106A9
27 | 99 012122
28 | 9A 010691
29 | 9B 01203A
30 | 9C 010153
31 | 9D 01200C
32 | 9E 01200D
33 | 9F 0106BA
34 | A0 01A0
35 | A1 01060C
36 | A2 01A2
37 | A3 01A3
38 | A4 01A4
39 | A5 01A5
40 | A6 01A6
41 | A7 01A7
42 | A8 01A8
43 | A9 01A9
44 | AA 0106BE
45 | AB 01AB
46 | AC 01AC
47 | AD 01AD
48 | AE 01AE
49 | AF 01AF
50 | B0 01B0
51 | B1 01B1
52 | B2 01B2
53 | B3 01B3
54 | B4 01B4
55 | B5 01B5
56 | B6 01B6
57 | B7 01B7
58 | B8 01B8
59 | B9 01B9
60 | BA 01061B
61 | BB 01BB
62 | BC 01BC
63 | BD 01BD
64 | BE 01BE
65 | BF 01061F
66 | C0 0106C1
67 | C1 010621
68 | C2 010622
69 | C3 010623
70 | C4 010624
71 | C5 010625
72 | C6 010626
73 | C7 010627
74 | C8 010628
75 | C9 010629
76 | CA 01062A
77 | CB 01062B
78 | CC 01062C
79 | CD 01062D
80 | CE 01062E
81 | CF 01062F
82 | D0 010630
83 | D1 010631
84 | D2 010632
85 | D3 010633
86 | D4 010634
87 | D5 010635
88 | D6 010636
89 | D7 01D7
90 | D8 010637
91 | D9 010638
92 | DA 010639
93 | DB 01063A
94 | DC 010640
95 | DD 010641
96 | DE 010642
97 | DF 010643
98 | E0 01E0
99 | E1 010644
100 | E2 01E2
101 | E3 010645
102 | E4 010646
103 | E5 010647
104 | E6 010648
105 | E7 01E7
106 | E8 01E8
107 | E9 01E9
108 | EA 01EA
109 | EB 01EB
110 | EC 010649
111 | ED 01064A
112 | EE 01EE
113 | EF 01EF
114 | F0 01064B
115 | F1 01064C
116 | F2 01064D
117 | F3 01064E
118 | F4 01F4
119 | F5 01064F
120 | F6 010650
121 | F7 01F7
122 | F8 010651
123 | F9 01F9
124 | FA 010652
125 | FB 01FB
126 | FC 01FC
127 | FD 01200E
128 | FE 01200F
129 | FF 0106D2
130 |
--------------------------------------------------------------------------------
/modules/to/_CP1251.txt:
--------------------------------------------------------------------------------
1 | #transposed from from/_CP1251.txt
2 | 010402 80
3 | 010403 81
4 | 01201A 82
5 | 010453 83
6 | 01201E 84
7 | 012026 85
8 | 012020 86
9 | 012021 87
10 | 0120AC 88
11 | 012030 89
12 | 010409 8A
13 | 012039 8B
14 | 01040A 8C
15 | 01040C 8D
16 | 01040B 8E
17 | 01040F 8F
18 | 010452 90
19 | 012018 91
20 | 012019 92
21 | 01201C 93
22 | 01201D 94
23 | 012022 95
24 | 012013 96
25 | 012014 97
26 | 012122 99
27 | 010459 9A
28 | 01203A 9B
29 | 01045A 9C
30 | 01045C 9D
31 | 01045B 9E
32 | 01045F 9F
33 | 01A0 A0
34 | 01040E A1
35 | 01045E A2
36 | 010408 A3
37 | 01A4 A4
38 | 010490 A5
39 | 01A6 A6
40 | 01A7 A7
41 | 010401 A8
42 | 01A9 A9
43 | 010404 AA
44 | 01AB AB
45 | 01AC AC
46 | 01AD AD
47 | 01AE AE
48 | 010407 AF
49 | 01B0 B0
50 | 01B1 B1
51 | 010406 B2
52 | 010456 B3
53 | 010491 B4
54 | 01B5 B5
55 | 01B6 B6
56 | 01B7 B7
57 | 010451 B8
58 | 012116 B9
59 | 010454 BA
60 | 01BB BB
61 | 010458 BC
62 | 010405 BD
63 | 010455 BE
64 | 010457 BF
65 | 010410 C0
66 | 010411 C1
67 | 010412 C2
68 | 010413 C3
69 | 010414 C4
70 | 010415 C5
71 | 010416 C6
72 | 010417 C7
73 | 010418 C8
74 | 010419 C9
75 | 01041A CA
76 | 01041B CB
77 | 01041C CC
78 | 01041D CD
79 | 01041E CE
80 | 01041F CF
81 | 010420 D0
82 | 010421 D1
83 | 010422 D2
84 | 010423 D3
85 | 010424 D4
86 | 010425 D5
87 | 010426 D6
88 | 010427 D7
89 | 010428 D8
90 | 010429 D9
91 | 01042A DA
92 | 01042B DB
93 | 01042C DC
94 | 01042D DD
95 | 01042E DE
96 | 01042F DF
97 | 010430 E0
98 | 010431 E1
99 | 010432 E2
100 | 010433 E3
101 | 010434 E4
102 | 010435 E5
103 | 010436 E6
104 | 010437 E7
105 | 010438 E8
106 | 010439 E9
107 | 01043A EA
108 | 01043B EB
109 | 01043C EC
110 | 01043D ED
111 | 01043E EE
112 | 01043F EF
113 | 010440 F0
114 | 010441 F1
115 | 010442 F2
116 | 010443 F3
117 | 010444 F4
118 | 010445 F5
119 | 010446 F6
120 | 010447 F7
121 | 010448 F8
122 | 010449 F9
123 | 01044A FA
124 | 01044B FB
125 | 01044C FC
126 | 01044D FD
127 | 01044E FE
128 | 01044F FF
129 |
--------------------------------------------------------------------------------
/src/libfmalloc.c:
--------------------------------------------------------------------------------
1 | #ifdef USE_FMALLOC
2 |
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include "fmalloc.h"
8 |
9 | const char *fmalloc_template="/tmp/.fmalloc.XXXXXX";
10 | struct fmalloc_entry * fmalloc_pools=NULL;
11 | int fmalloc_num=0;
12 |
13 | void * fmalloc(size_t s){
14 | void *m;
15 | char *tmpfile;
16 | int tmpfd;
17 | size_t o_offset;
18 | struct fmalloc_entry * last;
19 | if(fmalloc_pools==NULL || ((fmalloc_pools->offset+s) > FMALLOC_SIZE)){
20 | if(fmalloc_num < FMALLOC_NUM){
21 | tmpfile=strdup(fmalloc_template);
22 | if((tmpfd=mkstemp(tmpfile))==-1){
23 | free(tmpfile);
24 | return malloc(s);
25 | }
26 | unlink(tmpfile);
27 | free(tmpfile);
28 | ftruncate(tmpfd, FMALLOC_SIZE);
29 | m=mmap(0, FMALLOC_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, tmpfd, 0);
30 | if(m==MAP_FAILED){
31 | return malloc(s);
32 | }
33 | last=fmalloc_pools;
34 | fmalloc_pools=malloc(sizeof(struct fmalloc_entry));
35 | fmalloc_pools->z=m;
36 | fmalloc_pools->offset=0;
37 | fmalloc_pools->fd=tmpfd;
38 | fmalloc_pools->next=last;
39 | fmalloc_num+=1;
40 | }else{
41 | return malloc(s);
42 | }
43 | }
44 | o_offset=fmalloc_pools->offset;
45 | fmalloc_pools->offset+=s;
46 | return fmalloc_pools->z + o_offset;
47 | }
48 |
49 | void fmfree(void *p){
50 | struct fmalloc_entry *entry=fmalloc_pools;
51 | while(entry){
52 | if(p>=entry->z && pz+entry->offset){
53 | return;
54 | }
55 | entry=entry->next;
56 | }
57 | free(p);
58 | }
59 |
60 | void fmsync(void){
61 | struct fmalloc_entry *entry=fmalloc_pools;
62 | while(entry){
63 | msync(entry->z, entry->offset, MS_SYNC);
64 | entry=entry->next;
65 | }
66 | }
67 |
68 | void fmcleanup(void){
69 | struct fmalloc_entry *next=fmalloc_pools;
70 | while(fmalloc_pools){
71 | next=fmalloc_pools->next;
72 | munmap(fmalloc_pools->z, FMALLOC_SIZE);
73 | close(fmalloc_pools->fd);
74 | free(fmalloc_pools);
75 | fmalloc_pools=next;
76 | }
77 | }
78 | #endif
79 |
--------------------------------------------------------------------------------
/modules/filter/CJK.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generated from: ftp://ftp.unicode.org/Public/13.0.0/ucd/Blocks.txt
3 | */
4 |
5 | #include "../../src/bsdconv.h"
6 |
7 | static const struct uint32_range ranges[] = {
8 | { 0x1100, 0x11FF }, // Hangul Jamo
9 | { 0x2E80, 0x2EFF }, // CJK Radicals Supplement
10 | { 0x2F00, 0x2FDF }, // Kangxi Radicals
11 | { 0x2FF0, 0x2FFF }, // Ideographic Description Characters
12 | { 0x3000, 0x303F }, // CJK Symbols and Punctuation
13 | { 0x3040, 0x309F }, // Hiragana
14 | { 0x30A0, 0x30FF }, // Katakana
15 | { 0x3100, 0x312F }, // Bopomofo
16 | { 0x3130, 0x318F }, // Hangul Compatibility Jamo
17 | { 0x3190, 0x319F }, // Kanbun
18 | { 0x31A0, 0x31BF }, // Bopomofo Extended
19 | { 0x31C0, 0x31EF }, // CJK Strokes
20 | { 0x31F0, 0x31FF }, // Katakana Phonetic Extensions
21 | { 0x3200, 0x32FF }, // Enclosed CJK Letters and Months
22 | { 0x3300, 0x33FF }, // CJK Compatibility
23 | { 0x3400, 0x4DBF }, // CJK Unified Ideographs Extension A
24 | { 0x4DC0, 0x4DFF }, // Yijing Hexagram Symbols
25 | { 0x4E00, 0x9FFF }, // CJK Unified Ideographs
26 | { 0xA000, 0xA48F }, // Yi Syllables
27 | { 0xA490, 0xA4CF }, // Yi Radicals
28 | { 0xA960, 0xA97F }, // Hangul Jamo Extended-A
29 | { 0xAC00, 0xD7AF }, // Hangul Syllables
30 | { 0xD7B0, 0xD7FF }, // Hangul Jamo Extended-B
31 | { 0xF900, 0xFAFF }, // CJK Compatibility Ideographs
32 | { 0xFE30, 0xFE4F }, // CJK Compatibility Forms
33 | { 0x1B000, 0x1B0FF }, // Kana Supplement
34 | { 0x1B100, 0x1B12F }, // Kana Extended-A
35 | { 0x1D300, 0x1D35F }, // Tai Xuan Jing Symbols
36 | { 0x20000, 0x2A6DF }, // CJK Unified Ideographs Extension B
37 | { 0x2A700, 0x2B73F }, // CJK Unified Ideographs Extension C
38 | { 0x2B740, 0x2B81F }, // CJK Unified Ideographs Extension D
39 | { 0x2B820, 0x2CEAF }, // CJK Unified Ideographs Extension E
40 | { 0x2CEB0, 0x2EBEF }, // CJK Unified Ideographs Extension F
41 | { 0x2F800, 0x2FA1F }, // CJK Compatibility Ideographs Supplement
42 | { 0x30000, 0x3134F }, // CJK Unified Ideographs Extension G
43 | };
44 | #include "unicode_range.c"
45 |
--------------------------------------------------------------------------------
/modules/from/_CP1251.txt:
--------------------------------------------------------------------------------
1 | # http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT
2 | 80 010402
3 | 81 010403
4 | 82 01201A
5 | 83 010453
6 | 84 01201E
7 | 85 012026
8 | 86 012020
9 | 87 012021
10 | 88 0120AC
11 | 89 012030
12 | 8A 010409
13 | 8B 012039
14 | 8C 01040A
15 | 8D 01040C
16 | 8E 01040B
17 | 8F 01040F
18 | 90 010452
19 | 91 012018
20 | 92 012019
21 | 93 01201C
22 | 94 01201D
23 | 95 012022
24 | 96 012013
25 | 97 012014
26 | 99 012122
27 | 9A 010459
28 | 9B 01203A
29 | 9C 01045A
30 | 9D 01045C
31 | 9E 01045B
32 | 9F 01045F
33 | A0 01A0
34 | A1 01040E
35 | A2 01045E
36 | A3 010408
37 | A4 01A4
38 | A5 010490
39 | A6 01A6
40 | A7 01A7
41 | A8 010401
42 | A9 01A9
43 | AA 010404
44 | AB 01AB
45 | AC 01AC
46 | AD 01AD
47 | AE 01AE
48 | AF 010407
49 | B0 01B0
50 | B1 01B1
51 | B2 010406
52 | B3 010456
53 | B4 010491
54 | B5 01B5
55 | B6 01B6
56 | B7 01B7
57 | B8 010451
58 | B9 012116
59 | BA 010454
60 | BB 01BB
61 | BC 010458
62 | BD 010405
63 | BE 010455
64 | BF 010457
65 | C0 010410
66 | C1 010411
67 | C2 010412
68 | C3 010413
69 | C4 010414
70 | C5 010415
71 | C6 010416
72 | C7 010417
73 | C8 010418
74 | C9 010419
75 | CA 01041A
76 | CB 01041B
77 | CC 01041C
78 | CD 01041D
79 | CE 01041E
80 | CF 01041F
81 | D0 010420
82 | D1 010421
83 | D2 010422
84 | D3 010423
85 | D4 010424
86 | D5 010425
87 | D6 010426
88 | D7 010427
89 | D8 010428
90 | D9 010429
91 | DA 01042A
92 | DB 01042B
93 | DC 01042C
94 | DD 01042D
95 | DE 01042E
96 | DF 01042F
97 | E0 010430
98 | E1 010431
99 | E2 010432
100 | E3 010433
101 | E4 010434
102 | E5 010435
103 | E6 010436
104 | E7 010437
105 | E8 010438
106 | E9 010439
107 | EA 01043A
108 | EB 01043B
109 | EC 01043C
110 | ED 01043D
111 | EE 01043E
112 | EF 01043F
113 | F0 010440
114 | F1 010441
115 | F2 010442
116 | F3 010443
117 | F4 010444
118 | F5 010445
119 | F6 010446
120 | F7 010447
121 | F8 010448
122 | F9 010449
123 | FA 01044A
124 | FB 01044B
125 | FC 01044C
126 | FD 01044D
127 | FE 01044E
128 | FF 01044F
129 |
--------------------------------------------------------------------------------
/modules/inter/REPLACE.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "../../src/bsdconv.h"
4 |
5 | struct my_s{
6 | struct data_rt *from;
7 | struct data_rt *to;
8 | struct data_rt *cursor;
9 | };
10 |
11 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){
12 | struct my_s *r=malloc(sizeof(struct my_s));
13 | int e;
14 | r->from=NULL;
15 | r->to=NULL;
16 | while(arg){
17 | DATA_FREE(ins, r->from);
18 | DATA_FREE(ins, r->to);
19 | r->from=str2data(arg->key, &e, ins);
20 | if(e){
21 | free(r);
22 | return e;
23 | }
24 | if(r->from==NULL){
25 | free(r);
26 | return EINVAL;
27 | }
28 | if(arg->ptr){
29 | r->to=str2data(arg->ptr, &e, ins);
30 | if(e){
31 | DATA_FREE(ins, r->from);
32 | free(r);
33 | return e;
34 | }
35 | }
36 | arg=arg->next;
37 | }
38 | THIS_CODEC(ins)->priv=r;
39 | return 0;
40 | }
41 |
42 | void cbdestroy(struct bsdconv_instance *ins){
43 | struct my_s *r=THIS_CODEC(ins)->priv;
44 | DATA_FREE(ins, r->from);
45 | DATA_FREE(ins, r->to);
46 | free(r);
47 | }
48 |
49 | void cbinit(struct bsdconv_instance *ins){
50 | struct my_s *r=THIS_CODEC(ins)->priv;
51 | r->cursor=r->from;
52 | }
53 |
54 | void cbconv(struct bsdconv_instance *ins){
55 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
56 | struct my_s *r=THIS_CODEC(ins)->priv;
57 | unsigned char *datai=this_phase->curr->data;
58 | unsigned char *datar=r->cursor->data;
59 | size_t l=this_phase->curr->len;
60 | size_t i;
61 |
62 | if(l != r->cursor->len){
63 | r->cursor=r->from;
64 | this_phase->state.status=DEADEND;
65 | return;
66 | }
67 |
68 | for(i=0;icursor=r->from;
71 | this_phase->state.status=DEADEND;
72 | return;
73 | }
74 | }
75 |
76 | if(r->cursor->next != NULL){
77 | r->cursor = r->cursor->next;
78 | this_phase->state.status=CONTINUE;
79 | return;
80 | }else{
81 | r->cursor = r->from;
82 | LISTCPY(ins, this_phase->data_tail, r->to);
83 | this_phase->state.status=NEXTPHASE;
84 | return;
85 | }
86 | }
87 |
--------------------------------------------------------------------------------
/modules/inter/UPSIDEDOWN.txt:
--------------------------------------------------------------------------------
1 | #source: http://www.fileformat.info/convert/text/upside-down-map.htm
2 | 01010412 0142
3 | 010131,010323 0169
4 | 01017F 014A
5 | 010183 0167
6 | 01018E 0145
7 | 010190 0133
8 | 0101DD 0165
9 | 010250 0161
10 | 010254 0163
11 | 01025F 0166
12 | 010265 0168
13 | 01026F 016D
14 | 010279 0172
15 | 01027E 016A
16 | 010283 016C
17 | 010287 0174
18 | 01028C 0176
19 | 01028D 0177
20 | 01028E 0179
21 | 01029E 016B
22 | 0102D9 012E
23 | 01038C 0151
24 | 010500 0150
25 | 01061B 013B
26 | 01152D 0134
27 | 011D0E 014E
28 | 011D1A 0152
29 | 011D27 0156
30 | 01201E 0122
31 | 01203E 015F
32 | 01203F 012040
33 | 012040 01203F
34 | 012045 012046
35 | 012046 012045
36 | 0121 01A1
37 | 012132 0146
38 | 012141 0147
39 | 012142 014C
40 | 012144 0159
41 | 01214B 0126
42 | 012183 0143
43 | 0122 01201E
44 | 012200 0141
45 | 012229 0155
46 | 012234 012235
47 | 012235 012234
48 | 0122A5 0154
49 | 0122CA 014B
50 | 0125D6 0144
51 | 0126 01214B
52 | 0127 012C
53 | 0128 0129
54 | 0129 0128
55 | 012C 0127
56 | 012C62 0137
57 | 012E 0102D9
58 | 0133 010190
59 | 0134 01152D
60 | 0136 0139
61 | 0137 012C62
62 | 0139 0136
63 | 013B 01061B
64 | 013C 013E
65 | 013E 013C
66 | 013F 01BF
67 | 0141 012200
68 | 0142 01010412
69 | 0143 012183
70 | 0144 0125D6
71 | 0145 01018E
72 | 0146 012132
73 | 0147 012141
74 | 014A 01017F
75 | 014B 0122CA
76 | 014C 012142
77 | 014D 0157
78 | 014E 011D0E
79 | 0150 010500
80 | 0151 01038C
81 | 0152 011D1A
82 | 0154 0122A5
83 | 0155 012229
84 | 0156 011D27
85 | 0157 014D
86 | 0159 012144
87 | 015B 015D
88 | 015D 015B
89 | 015F 01203E
90 | 0161 010250
91 | 0162 0171
92 | 0163 010254
93 | 0164 0170
94 | 0165 0101DD
95 | 0166 01025F
96 | 0167 010183
97 | 0168 010265
98 | 0169 010131,010323
99 | 016A 01027E
100 | 016B 01029E
101 | 016C 010283
102 | 016D 01026F
103 | 016E 0175
104 | 0170 0164
105 | 0171 0162
106 | 0172 010279
107 | 0174 010287
108 | 0175 016E
109 | 0176 01028C
110 | 0177 01028D
111 | 0179 01028E
112 | 017B 017D
113 | 017D 017B
114 | 01A1 0121
115 | 01BF 013F
116 |
--------------------------------------------------------------------------------
/modules/inter/AMBIGUOUS-UNPAD.c:
--------------------------------------------------------------------------------
1 | #include "../../src/bsdconv.h"
2 |
3 | #include "_AMBIGUOUS.h"
4 |
5 | struct my_s{
6 | char s;
7 | int dopad;
8 | };
9 |
10 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){
11 | THIS_CODEC(ins)->priv=malloc(sizeof(struct my_s));
12 | return 0;
13 | }
14 |
15 | void cbinit(struct bsdconv_instance *ins){
16 | struct my_s *r=THIS_CODEC(ins)->priv;
17 | r->s=0;
18 | r->dopad=1;
19 | }
20 |
21 | void cbctl(struct bsdconv_instance *ins, int ctl, void *ptr, size_t v){
22 | struct my_s *r=THIS_CODEC(ins)->priv;
23 | switch(ctl){
24 | break;
25 | case BSDCONV_CTL_AMBIGUOUS_PAD:
26 | r->dopad=v;
27 | break;
28 | }
29 | }
30 |
31 | void cbdestroy(struct bsdconv_instance *ins){
32 | free(THIS_CODEC(ins)->priv);
33 | }
34 |
35 | void cbconv(struct bsdconv_instance *ins){
36 | unsigned char *data;
37 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
38 | struct my_s *r=THIS_CODEC(ins)->priv;
39 | data=this_phase->curr->data;
40 | int pad;
41 | int max=sizeof(ambiguous) / sizeof(struct interval) - 1;
42 | int min = 0;
43 | int mid;
44 | uint32_t ucs=0;
45 |
46 | this_phase->state.status=NEXTPHASE;
47 |
48 | if(this_phase->curr->len>1 && data[0]==0x1){
49 | if(r->s==1 && data[1]==0xA0){
50 | r->s=0;
51 | return;
52 | }else{
53 | for(pad=1;padcurr->len;++pad){
54 | ucs<<=8;
55 | ucs|=data[pad];
56 | }
57 |
58 | pad=0;
59 | if (ucs < ambiguous[0].first || ucs > ambiguous[max].last){
60 | pad=0;
61 | }else while (max >= min) {
62 | mid = (min + max) / 2;
63 | if (ucs > ambiguous[mid].last)
64 | min = mid + 1;
65 | else if (ucs < ambiguous[mid].first)
66 | max = mid - 1;
67 | else{
68 | pad=1;
69 | break;
70 | }
71 | }
72 | if(pad && r->dopad){
73 | r->s=1;
74 | }
75 | }
76 | this_phase->data_tail->next=dup_data_rt(ins, this_phase->curr);
77 | this_phase->data_tail=this_phase->data_tail->next;
78 | this_phase->data_tail->next=NULL;
79 | }else{
80 | r->s=0;
81 | }
82 |
83 | return;
84 | }
85 |
--------------------------------------------------------------------------------
/modules/inter/AMBIGUOUS-PAD.c:
--------------------------------------------------------------------------------
1 | #include "../../src/bsdconv.h"
2 |
3 | #include "_AMBIGUOUS.h"
4 |
5 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){
6 | THIS_CODEC(ins)->priv=malloc(sizeof(int));
7 |
8 | return 0;
9 | }
10 |
11 | void cbinit(struct bsdconv_instance *ins){
12 | int *r=THIS_CODEC(ins)->priv;
13 | *r=1;
14 | }
15 |
16 | void cbctl(struct bsdconv_instance *ins, int ctl, void *ptr, size_t v){
17 | int *r=THIS_CODEC(ins)->priv;
18 | switch(ctl){
19 | break;
20 | case BSDCONV_CTL_AMBIGUOUS_PAD:
21 | *r=v;
22 | break;
23 | }
24 | }
25 |
26 | void cbdestroy(struct bsdconv_instance *ins){
27 | int *r=THIS_CODEC(ins)->priv;
28 | free(r);
29 | }
30 | void cbconv(struct bsdconv_instance *ins){
31 | unsigned char *data;
32 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
33 | int *dopad=THIS_CODEC(ins)->priv;
34 | data=this_phase->curr->data;
35 | int pad;
36 |
37 | int max=sizeof(ambiguous) / sizeof(struct interval) - 1;
38 | int min = 0;
39 | int mid;
40 | char *space="\x01\xA0";
41 | uint32_t ucs=0;
42 |
43 | this_phase->data_tail->next=dup_data_rt(ins, this_phase->curr);
44 | this_phase->data_tail=this_phase->data_tail->next;
45 | this_phase->data_tail->next=NULL;
46 |
47 | if(this_phase->curr->len>0 && data[0]==0x1){
48 | for(pad=1;padcurr->len;++pad){
49 | ucs<<=8;
50 | ucs|=data[pad];
51 | }
52 |
53 | pad=0;
54 | if (ucs < ambiguous[0].first || ucs > ambiguous[max].last){
55 | pad=0;
56 | }else while (max >= min) {
57 | mid = (min + max) / 2;
58 | if (ucs > ambiguous[mid].last)
59 | min = mid + 1;
60 | else if (ucs < ambiguous[mid].first)
61 | max = mid - 1;
62 | else{
63 | pad=1;
64 | break;
65 | }
66 | }
67 | if(pad && *dopad){
68 | DATA_MALLOC(ins, this_phase->data_tail->next);
69 | this_phase->data_tail=this_phase->data_tail->next;
70 | this_phase->data_tail->len=2;
71 | this_phase->data_tail->data=space;
72 | this_phase->data_tail->flags=0;
73 | this_phase->data_tail->next=NULL;
74 | }
75 | }
76 |
77 | this_phase->state.status=NEXTPHASE;
78 | return;
79 | }
80 |
--------------------------------------------------------------------------------
/modules/inter/BIG5-DEFRAG.c:
--------------------------------------------------------------------------------
1 | #include "../../src/bsdconv.h"
2 |
3 | struct my_s{
4 | struct data_rt *p;
5 | struct data_rt *q;
6 | struct data_rt **r;
7 | char f;
8 | };
9 |
10 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){
11 | THIS_CODEC(ins)->priv=malloc(sizeof(struct my_s));
12 | return 0;
13 | }
14 |
15 | void cbinit(struct bsdconv_instance *ins){
16 | struct my_s *r=THIS_CODEC(ins)->priv;
17 | r->p=NULL;
18 | r->q=NULL;
19 | r->r=&(r->q);
20 | r->f=0;
21 | }
22 |
23 | void cbdestroy(struct bsdconv_instance *ins){
24 | free(THIS_CODEC(ins)->priv);
25 | }
26 |
27 | void cbconv(struct bsdconv_instance *ins){
28 | unsigned char *data;
29 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
30 | struct my_s *r=THIS_CODEC(ins)->priv;
31 | data=this_phase->curr->data;
32 |
33 | if(r->f==0){
34 | if(data[0]==0x3 && data[1]>0x7f){
35 | r->f=1;
36 | r->p=dup_data_rt(ins, this_phase->curr);
37 | this_phase->state.status=SUBMATCH;
38 | return;
39 | }else{
40 | DATA_MALLOC(ins, this_phase->data_tail->next);
41 | this_phase->data_tail->next=dup_data_rt(ins, this_phase->curr);
42 | this_phase->data_tail=this_phase->data_tail->next;
43 | this_phase->data_tail->next=NULL;
44 | this_phase->state.status=NEXTPHASE;
45 | return;
46 | }
47 | }else if(r->f){
48 | if(data[0]==0x1b){
49 | *(r->r)=dup_data_rt(ins, this_phase->curr);
50 | (*(r->r))->next=NULL;
51 | r->r=&((*(r->r))->next);
52 |
53 | this_phase->state.status=SUBMATCH;
54 | return;
55 | }else{
56 | r->f=0;
57 |
58 | this_phase->data_tail->next=r->p;
59 | this_phase->data_tail=this_phase->data_tail->next;
60 |
61 | this_phase->data_tail->next=dup_data_rt(ins, this_phase->curr);
62 | this_phase->data_tail=this_phase->data_tail->next;
63 |
64 | if(r->q){
65 | this_phase->data_tail->next=r->q;
66 | *(r->r)=NULL;
67 | while(this_phase->data_tail->next){
68 | this_phase->data_tail=this_phase->data_tail->next;
69 | }
70 | }
71 | r->p=r->q=NULL;
72 | r->r=&(r->q);
73 | r->f=0;
74 | this_phase->state.status=NEXTPHASE;
75 | return;
76 | }
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/modules/inter/SCORE-TRAIN.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include "../../src/bsdconv.h"
6 |
7 | struct my_s{
8 | FILE *bak;
9 | FILE *score;
10 | FILE *list;
11 | };
12 |
13 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){
14 | struct my_s *r=malloc(sizeof(struct my_s));
15 | char buf[256]={0};
16 | char *p=getenv("BSDCONV_SCORE");
17 | if(p==NULL){
18 | strcpy(buf,getenv("HOME"));
19 | strcat(buf,"/.bsdconv.score");
20 | p=buf;
21 | }
22 | r->bak=fopen(p,"a"); //ensure file existence
23 | fclose(r->bak);
24 | r->bak=r->score=fopen(p,"rb+");
25 | r->list=NULL;
26 | THIS_CODEC(ins)->priv=r;
27 | return 0;
28 | }
29 |
30 | void cbdestroy(struct bsdconv_instance *ins){
31 | struct my_s *r=THIS_CODEC(ins)->priv;
32 | fclose(r->bak);
33 | free(r);
34 | }
35 |
36 | void cbctl(struct bsdconv_instance *ins, int ctl, void *ptr, size_t v){
37 | struct my_s *r=THIS_CODEC(ins)->priv;
38 | switch(ctl){
39 | case BSDCONV_CTL_ATTACH_SCORE:
40 | r->score=ptr;
41 | break;
42 | case BSDCONV_CTL_ATTACH_OUTPUT_FILE:
43 | r->list=ptr;
44 | break;
45 | }
46 | }
47 |
48 | void cbconv(struct bsdconv_instance *ins){
49 | unsigned char *data;
50 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
51 | struct my_s *r=THIS_CODEC(ins)->priv;
52 | data=this_phase->curr->data;
53 | unsigned char v=0;
54 | int i;
55 | uint32_t ucs=0;
56 | uint32_t ucs4;
57 |
58 | this_phase->data_tail->next=dup_data_rt(ins, this_phase->curr);
59 | this_phase->data_tail=this_phase->data_tail->next;
60 | this_phase->data_tail->next=NULL;
61 |
62 | if(data[0]==0x1){
63 | for(i=1;icurr->len;++i){
64 | ucs<<=8;
65 | ucs|=data[i];
66 | }
67 | fseek(r->score, ucs*sizeof(unsigned char), SEEK_SET);
68 | fread(&v, sizeof(unsigned char), 1, r->score);
69 | if(v==0 && r->list){
70 | ucs4=htobe32(ucs);
71 | fwrite(&ucs4, sizeof(uint32_t), 1, r->list);
72 | }
73 | if(v<3){
74 | v+=1;
75 | fseek(r->score, ucs*sizeof(unsigned char), SEEK_SET);
76 | fwrite(&v, sizeof(unsigned char), 1, r->score);
77 | }
78 | }
79 |
80 | this_phase->state.status=NEXTPHASE;
81 | return;
82 | }
83 |
--------------------------------------------------------------------------------
/modules/inter/_NF-HANGUL-DECOMPOSITION.c:
--------------------------------------------------------------------------------
1 | #include "../../src/bsdconv.h"
2 |
3 | static void decomposeHangul(uint32_t ucs, struct bsdconv_instance *ins);
4 |
5 | #define SBase 0xAC00
6 | #define LBase 0x1100
7 | #define VBase 0x1161
8 | #define TBase 0x11A7
9 | #define LCount 19
10 | #define VCount 21
11 | #define TCount 28
12 | #define NCount (VCount * TCount)
13 | #define SCount (LCount * NCount)
14 |
15 | void cbconv(struct bsdconv_instance *ins){
16 | unsigned char *data;
17 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
18 | data=this_phase->curr->data;
19 | int i;
20 | uint32_t ucs=0;
21 |
22 | if(data[0]==0x1){
23 | for(i=1;icurr->len;++i){
24 | ucs<<=8;
25 | ucs|=data[i];
26 | }
27 | int SIndex = ucs - SBase;
28 | if(SIndex >= 0 && SIndex < SCount){
29 | decomposeHangul(ucs, ins);
30 | }else{
31 | this_phase->data_tail->next=dup_data_rt(ins, this_phase->curr);
32 | this_phase->data_tail=this_phase->data_tail->next;
33 | this_phase->data_tail->next=NULL;
34 | }
35 | }
36 |
37 | this_phase->state.status=NEXTPHASE;
38 | return;
39 | }
40 |
41 | static void decomposeHangul(uint32_t ucs, struct bsdconv_instance *ins){
42 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
43 | int SIndex = ucs - SBase;
44 | if(SIndex >= 0 && SIndex < SCount){
45 | int L = LBase + SIndex / NCount;
46 | int V = VBase + (SIndex % NCount) / TCount;
47 | int T = TBase + SIndex % TCount;
48 |
49 | decomposeHangul(L, ins);
50 | decomposeHangul(V, ins);
51 | if(T != TBase)
52 | decomposeHangul(T, ins);
53 | }else{
54 | int i;
55 | unsigned char *p;
56 | unsigned char stack[8];
57 | int stack_len=0;
58 | DATA_MALLOC(ins, this_phase->data_tail->next);
59 | this_phase->data_tail=this_phase->data_tail->next;
60 | while(ucs && stack_len>= 8;
63 | stack_len += 1;
64 | }
65 | this_phase->data_tail->len=stack_len+=1;
66 | this_phase->data_tail->data=malloc(this_phase->data_tail->len);
67 | p=this_phase->data_tail->data;
68 | *p=1;
69 | p+=1;
70 | stack_len-=1;
71 | for(i=0;idata_tail->flags=F_FREE;
76 | this_phase->data_tail->next=NULL;
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/modules/from/ASCII-NUMERIC-HTML-ENTITY.c:
--------------------------------------------------------------------------------
1 | #define USE_HEX_MAP
2 | #define USE_DEC_MAP
3 |
4 | #include
5 | #include
6 | #include
7 | #include "../../src/bsdconv.h"
8 |
9 | struct my_s{
10 | int status;
11 | int *tbl;
12 | int b;
13 | union {
14 | char c[4];
15 | uint32_t i;
16 | } buf;
17 | };
18 |
19 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){
20 | THIS_CODEC(ins)->priv=malloc(sizeof(struct my_s));
21 | return 0;
22 | }
23 |
24 | void cbinit(struct bsdconv_instance *ins){
25 | struct my_s *r=THIS_CODEC(ins)->priv;
26 | r->status=0;
27 | }
28 |
29 | void cbdestroy(struct bsdconv_instance *ins){
30 | void *p=THIS_CODEC(ins)->priv;
31 | free(p);
32 | }
33 |
34 | #define DEADEND() do{ \
35 | this_phase->state.status=DEADEND; \
36 | t->status=0; \
37 | return; \
38 | }while(0);
39 |
40 | void cbconv(struct bsdconv_instance *ins){
41 | char ob[8], *p;
42 | int i,j=0;
43 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
44 | struct my_s *t=THIS_CODEC(ins)->priv;
45 | char d;
46 |
47 | for(;this_phase->icurr->len;this_phase->i+=1){
48 | d=CP(this_phase->curr->data)[this_phase->i];
49 | if(d==';' && t->status){
50 | //put data
51 | t->buf.i=htobe32(t->buf.i);
52 | for(i=0;i<4;i++){
53 | if(t->buf.c[i] || j)
54 | ob[j++]=t->buf.c[i];
55 | }
56 | DATA_MALLOC(ins, this_phase->data_tail->next);
57 | this_phase->data_tail=this_phase->data_tail->next;
58 | this_phase->data_tail->next=NULL;
59 | this_phase->data_tail->flags=F_FREE;
60 | this_phase->data_tail->len=j+1;
61 | p=this_phase->data_tail->data=malloc(j+1);
62 | p[0]=0x01;
63 | memcpy(&p[1], ob, j);
64 | this_phase->state.status=NEXTPHASE;
65 | t->status=0;
66 | return;
67 | }
68 | if(t->status){
69 | ++t->status;
70 | if(t->tbl[(unsigned char)d]==-1) DEADEND();
71 | t->buf.i*=t->b;
72 | t->buf.i+=t->tbl[(unsigned char)d];
73 | }else{
74 | if(d=='x'){
75 | t->status=1000;
76 | t->tbl=hex;
77 | t->b=16;
78 | t->buf.i=0;
79 | continue;
80 | }
81 | t->b=10;
82 | t->tbl=dec;
83 | if(t->tbl[(unsigned char)d]==-1) DEADEND();
84 | t->buf.i=t->tbl[(unsigned char)d];
85 | t->status=1;
86 | }
87 | }
88 | this_phase->state.status=CONTINUE;
89 | return;
90 | }
91 |
--------------------------------------------------------------------------------
/modules/from/CP950-UDA.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "../../src/bsdconv.h"
4 |
5 | // Ref: http://kanji-database.sourceforge.net/charcode/big5.html
6 |
7 | struct my_s{
8 | int h;
9 | int x;
10 | int y;
11 | };
12 |
13 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){
14 | struct my_s *r = malloc(sizeof(struct my_s));
15 | THIS_CODEC(ins)->priv = r;
16 | r->h = 0;
17 | return 0;
18 | }
19 |
20 | void cbdestroy(struct bsdconv_instance *ins){
21 | struct my_s *r = THIS_CODEC(ins)->priv;
22 | free(r);
23 | }
24 |
25 | void cbconv(struct bsdconv_instance *ins){
26 | struct bsdconv_phase *this_phase = THIS_PHASE(ins);
27 | struct my_s *r = THIS_CODEC(ins)->priv;
28 |
29 | unsigned char d = UCP(this_phase->curr->data)[this_phase->i];
30 |
31 | if(r->h==0){
32 | if(d>=0xFA && d<=0xFE){
33 | r->h = d;
34 | r->x = 0xE000;
35 | r->y = 0xFA;
36 | this_phase->state.status = CONTINUE;
37 | return;
38 | }else if(d>=0x8E && d<=0xA0){
39 | r->h = d;
40 | r->x = 0xE311;
41 | r->y = 0x8E;
42 | this_phase->state.status = CONTINUE;
43 | return;
44 | }else if(d>=0x81 && d<=0x8D){
45 | r->h = d;
46 | r->x = 0xEEB8;
47 | r->y = 0x81;
48 | this_phase->state.status = CONTINUE;
49 | return;
50 | }else if(d>=0xC6 && d<=0xC8){
51 | r->h = d;
52 | r->x = 0xF672;
53 | r->y = 0xC6;
54 | this_phase->state.status = CONTINUE;
55 | return;
56 | }else{
57 | this_phase->state.status = DEADEND;
58 | return;
59 | }
60 | }else{
61 | uint32_t b = (r->h<<8)|d;
62 | if(
63 | (b>=0xFA40 && b<=0xFEFE)
64 | ||
65 | (b>=0x8E40 && b<=0xA0FE)
66 | ||
67 | (b>=0x8140 && b<=0x8DFE)
68 | ||
69 | (b>=0xC6A1 && b<=0xC8FE)
70 | ){
71 | uint32_t u = r->x + (157 * (r->h - r->y)) + (d<0x80?d-0x40:d-0x62);
72 | unsigned char *c;
73 | DATA_MALLOC(ins, this_phase->data_tail->next);
74 | this_phase->data_tail=this_phase->data_tail->next;
75 | this_phase->data_tail->next=NULL;
76 | this_phase->data_tail->len=3;
77 | this_phase->data_tail->data=c=malloc(3);
78 | this_phase->data_tail->flags=F_FREE;
79 | this_phase->state.status=NEXTPHASE;
80 | c[0] = 0x01;
81 | c[1] = (u >> 8) & 0xFF;
82 | c[2] = u & 0xFF;
83 | }else{
84 | this_phase->state.status = DEADEND;
85 | }
86 | r->h = 0;
87 | return;
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/src/bsdconv-completion.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "bsdconv.h"
4 |
5 | int item_in_array(char *item, char **array, int size){
6 | int i;
7 | for(i=0;i1)
27 | arg=argv[1];
28 |
29 | arg=arg_base=strdup(arg);
30 |
31 | if(arg[0]=='\'' || arg[0]=='"'){
32 | arg+=1;
33 | }
34 | i=strlen(arg)-1;
35 | if(arg[i]=='\'' || arg[i]=='"'){
36 | arg[i]=0;
37 | }
38 |
39 | part=arg;
40 | mode=FROM;
41 | strtoupper(arg);
42 | for(c=arg;*c;++c){
43 | switch(*c){
44 | case ':':
45 | mode=TO;
46 | part=c+1;
47 | break;
48 | case '|':
49 | mode=FROM;
50 | part=c+1;
51 | break;
52 | case ',':
53 | part=c+1;
54 | break;
55 | }
56 | }
57 |
58 | int size=0;
59 | int num=0;
60 | inter_list=bsdconv_modules_list(INTER);
61 | fromto_list=bsdconv_modules_list(mode);
62 | for(p=inter_list;*p;++p)
63 | num+=1;
64 | for(p=fromto_list;*p;++p)
65 | num+=1;
66 |
67 | codecs_list=malloc(sizeof(char *) * (num+1));
68 | for(p=inter_list;*p;++p){
69 | if(strstr(*p,part)==*p && !item_in_array(*p, codecs_list, size)){
70 | codecs_list[size]=*p;
71 | size+=1;
72 | }
73 | codecs_list[size]=NULL;
74 | }
75 | for(p=fromto_list;*p;++p){
76 | if(strstr(*p,part)==*p && !item_in_array(*p, codecs_list, size)){
77 | codecs_list[size]=*p;
78 | size+=1;
79 | }
80 | codecs_list[size]=NULL;
81 | }
82 |
83 | for(i=0;iqueue=NULL;
13 | t->rerail=NULL;
14 | THIS_CODEC(ins)->priv=t;
15 | return 0;
16 | }
17 |
18 | void cbinit(struct bsdconv_instance *ins){
19 | struct my_s *t=THIS_CODEC(ins)->priv;
20 | t->offsetA=0;
21 | t->offsetB=0;
22 | t->last=&t->queue;
23 | struct data_rt *q;
24 | while(t->queue){
25 | DATUM_FREE(ins, (struct data_rt *)t->queue->data);
26 | q=t->queue;
27 | t->queue=t->queue->next;
28 | DATUM_FREE(ins, q);
29 | }
30 | }
31 |
32 | void cbdestroy(struct bsdconv_instance *ins){
33 | struct my_s *t=THIS_CODEC(ins)->priv;
34 | struct data_rt *q;
35 | if(bsdconv_hash_has(ins, HASHKEY)){
36 | while(t->queue){
37 | DATUM_FREE(ins, (struct data_rt *)t->queue->data);
38 | q=t->queue;
39 | t->queue=t->queue->next;
40 | DATUM_FREE(ins, q);
41 | }
42 | free(t);
43 | bsdconv_hash_del(ins, HASHKEY);
44 | }
45 | }
46 |
47 | void cbconv(struct bsdconv_instance *ins){
48 | unsigned char *data;
49 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
50 | struct my_s *t=THIS_CODEC(ins)->priv;
51 | struct data_rt *q;
52 | data=this_phase->curr->data;
53 | int i;
54 | uint32_t ucs=0;
55 |
56 | this_phase->state.status=NEXTPHASE;
57 |
58 | if(this_phase->curr->len>0 && data[0]==0x1){
59 | for(i=1;icurr->len;++i){
60 | ucs<<=8;
61 | ucs|=data[i];
62 | }
63 | if(ucs==0x09||ucs==0x0A||ucs==0x0D||ucs==0x20){
64 | DATA_MALLOC(ins, q);
65 | *(t->last)=q;
66 | q->next=NULL;
67 | q->flags=0;
68 | t->last=&q->next;
69 | q->data=(void *) dup_data_rt(ins, this_phase->curr);
70 | ((struct data_rt *)q->data)->next=NULL;
71 | q->len=t->offsetA;
72 |
73 | if(t->rerail){
74 | t->rerail->flags |= (F_MATCH | F_PENDING);
75 | t->rerail->match_data = NULL;
76 | }
77 |
78 | return;
79 | }
80 | }
81 | t->offsetA+=1;
82 |
83 | this_phase->data_tail->next=dup_data_rt(ins, this_phase->curr);
84 | this_phase->data_tail=this_phase->data_tail->next;
85 | this_phase->data_tail->next=NULL;
86 |
87 | return;
88 | }
89 |
--------------------------------------------------------------------------------
/modules/to/_GB18030.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include "../../src/bsdconv.h"
3 |
4 | struct gb18030_data {
5 | uint32_t beg;
6 | uint32_t end;
7 | uint32_t off;
8 | };
9 |
10 | static const struct gb18030_data gb18030_table[] = {
11 | {0x0452, 0x200F, 1688038},
12 | {0x2643, 0x2E80, 1696437},
13 | {0x361B, 0x3917, 1700191},
14 | {0x3CE1, 0x4055, 1701916},
15 | {0x4160, 0x4336, 1703065},
16 | {0x44D7, 0x464B, 1703947},
17 | {0x478E, 0x4946, 1704636},
18 | {0x49B8, 0x4C76, 1705179},
19 | {0x9FA6, 0xD7FF, 1706261},
20 | {0xE865, 0xF92B, 1720768},
21 | {0xFA2A, 0xFE2F, 1725296},
22 | {0xFFE6, 0xFFFF, 1726612},
23 | {0x10000, 0x10FFFF, 1876218},
24 | };
25 |
26 | void cbconv(struct bsdconv_instance *ins){
27 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
28 | unsigned char *data, *p;
29 | unsigned int len;
30 | int max=sizeof(gb18030_table) / sizeof(struct gb18030_data) - 1;
31 | int min = 0;
32 | int mid;
33 | union {
34 | unsigned char byte[4];
35 | uint32_t num;
36 | } codepoint;
37 | int i;
38 | uint32_t ucs;
39 | uint32_t gb;
40 | data=this_phase->curr->data;
41 |
42 | data+=1;
43 | len=this_phase->curr->len-1;
44 |
45 | codepoint.num=0;
46 | for(i=0;(len-i)>0;++i){
47 | codepoint.byte[3-i]=data[len-i-1];
48 | }
49 | ucs=be32toh(codepoint.num);
50 |
51 | if (ucs < gb18030_table[0].beg || ucs > gb18030_table[max].end){
52 | this_phase->state.status=DEADEND;
53 | return;
54 | }else while (max >= min) {
55 | mid = (min + max) / 2;
56 | if (ucs > gb18030_table[mid].end)
57 | min = mid + 1;
58 | else if (ucs < gb18030_table[mid].beg)
59 | max = mid - 1;
60 | else{
61 | break;
62 | }
63 | }
64 | if(gb18030_table[mid].beg<=ucs && ucs<=gb18030_table[mid].end){
65 | this_phase->state.status=NEXTPHASE;
66 | DATA_MALLOC(ins, this_phase->data_tail->next);
67 | this_phase->data_tail=this_phase->data_tail->next;
68 | this_phase->data_tail->next=NULL;
69 | this_phase->data_tail->flags=F_FREE;
70 |
71 | gb=gb18030_table[mid].off + (ucs - gb18030_table[mid].beg);
72 |
73 | this_phase->data_tail->len=4;
74 | p=this_phase->data_tail->data=malloc(4);
75 |
76 | gb-=1687218;
77 | p[3]=0x30+gb%10;
78 | gb/=10;
79 | p[2]=0x81+gb%126;
80 | gb/=126;
81 | p[1]=0x30+gb%10;
82 | gb/=10;
83 | p[0]=0x81+gb;
84 | return;
85 | }else{
86 | this_phase->state.status=DEADEND;
87 | return;
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/modules/inter/TRIM-WIDTH.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include "../../src/bsdconv.h"
7 |
8 | struct my_s{
9 | struct bsdconv_instance *ins;
10 | char ambi_width;
11 | size_t width;
12 | long remain;
13 | bsdconv_counter_t *full;
14 | bsdconv_counter_t *half;
15 | bsdconv_counter_t *ambi;
16 | };
17 |
18 | int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){
19 | int i;
20 | struct my_s *r=THIS_CODEC(ins)->priv=malloc(sizeof(struct my_s));
21 |
22 | char width_set=0;
23 | r->ambi_width=1;
24 |
25 | while(arg){
26 | if(strcasecmp(arg->key, "AMBI-AS-WIDE")==0 || strcasecmp(arg->key, "AMBIGUOUS-AS-WIDE")==0){
27 | r->ambi_width=2;
28 | }else if(sscanf(arg->key,"%d", &i)==1){
29 | r->width=i;
30 | width_set=1;
31 | }else{
32 | return EINVAL;
33 | }
34 | arg=arg->next;
35 | }
36 |
37 | if(width_set==0)
38 | return EINVAL;
39 |
40 | r->ins=bsdconv_create("WIDTH");
41 | r->full=bsdconv_counter(r->ins, "FULL");
42 | r->half=bsdconv_counter(r->ins, "HALF");
43 | r->ambi=bsdconv_counter(r->ins, "AMBI");
44 | return 0;
45 | }
46 |
47 | void cbinit(struct bsdconv_instance *ins){
48 | struct my_s *r=THIS_CODEC(ins)->priv;
49 | bsdconv_init(r->ins);
50 | r->remain=r->width;
51 | }
52 |
53 | void cbdestroy(struct bsdconv_instance *ins){
54 | struct my_s *r=THIS_CODEC(ins)->priv;
55 | bsdconv_destroy(r->ins);
56 | free(r);
57 | }
58 |
59 | void cbconv(struct bsdconv_instance *ins){
60 | struct bsdconv_phase *this_phase=THIS_PHASE(ins);
61 | struct my_s *r=THIS_CODEC(ins)->priv;
62 |
63 | bsdconv_counter_reset(r->ins, NULL);
64 | bsdconv_init(r->ins);
65 | r->ins->input=*(this_phase->curr);
66 | this_phase->curr->flags &= ~F_FREE;
67 | r->ins->input.next=NULL;
68 | r->ins->flush=1;
69 | bsdconv(r->ins);
70 | int w=*(r->full)*2 + *(r->half) + *(r->ambi) * r->ambi_width;
71 | if(r->remain >= w){
72 | this_phase->data_tail->next=r->ins->phase[r->ins->phasen].data_head->next;
73 | while(this_phase->data_tail->next){
74 | this_phase->data_tail=this_phase->data_tail->next;
75 | }
76 | r->ins->phase[r->ins->phasen].data_head->next=NULL;
77 | r->ins->phase[r->ins->phasen].data_tail=r->ins->phase[r->ins->phasen].data_head;
78 | r->remain -= w;
79 | }else{
80 | r->remain=-1;
81 | }
82 |
83 | this_phase->state.status=NEXTPHASE;
84 | return;
85 | }
86 |
--------------------------------------------------------------------------------