├── .gitignore ├── CMakeLists.txt ├── Doxyfile.in ├── LICENSE ├── README-cn.md ├── README.md ├── build.sh ├── include └── exiconv.h ├── lib ├── charsetdetect_include │ └── charsetdetect.h ├── iconv_include │ ├── export.h │ ├── iconv.h │ ├── iconv.h.build.in │ ├── iconv.h.in │ └── iconv.h.inst ├── libcharsetdetect.so ├── libiconv.so ├── libiconv.so.2 └── libiconv.so.2.5.1 └── src ├── core.c ├── detect.c └── main.c /.gitignore: -------------------------------------------------------------------------------- 1 | *.*~ 2 | .fuse* 3 | *.sublime-* 4 | bin/ 5 | build/ 6 | libcharsetdetect/ 7 | libiconv-1.14/ 8 | 9 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | project(ExIconv) 3 | 4 | SET (CMAKE_CXX_COMPILER_ENV_VAR "clang") 5 | SET (CMAKE_CXX_FLAGS "-std=c99") 6 | SET (CMAKE_CXX_FLAGS_DEBUG "-g") 7 | SET (CMAKE_CXX_FLAGS_MINSIZEREL "-Os -DNDEBUG") 8 | SET (CMAKE_CXX_FLAGS_RELEASE "-O4 -DNDEBUG") 9 | SET (CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g") 10 | SET (EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin) 11 | 12 | include_directories(src include lib/charsetdetect_include lib/iconv_include) 13 | link_directories(${CMAKE_CURRENT_SOURCE_DIR}/lib) 14 | 15 | file(GLOB_RECURSE source_files ${CMAKE_CURRENT_SOURCE_DIR}/src/*.c) 16 | 17 | add_executable(iconv_test ${source_files}) 18 | add_library(exiconv STATIC ${source_files}) 19 | target_link_libraries(iconv_test iconv charsetdetect) 20 | target_link_libraries(exiconv iconv charsetdetect) 21 | 22 | 23 | FIND_PACKAGE(Doxygen) 24 | OPTION(BUILD_DOCUMENTATION "Create and install the HTML based API documentation (requires Doxygen)" ${DOXYGEN_FOUND}) 25 | 26 | IF(BUILD_DOCUMENTATION) 27 | IF(NOT DOXYGEN_FOUND) 28 | MESSAGE(FATAL_ERROR "Doxygen is needed to build the documentation.") 29 | ENDIF() 30 | 31 | SET(doxyfile_in ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in) 32 | SET(doxyfile ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile) 33 | 34 | CONFIGURE_FILE(${doxyfile_in} ${doxyfile} @ONLY) 35 | 36 | ADD_CUSTOM_TARGET(doc 37 | COMMAND ${DOXYGEN_EXECUTABLE} ${doxyfile} 38 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} 39 | COMMENT "Generating API documentation with Doxygen" 40 | VERBATIM) 41 | 42 | INSTALL(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/html DESTINATION share/doc) 43 | ENDIF() 44 | -------------------------------------------------------------------------------- /Doxyfile.in: -------------------------------------------------------------------------------- 1 | PROJECT_NAME = "@CMAKE_PROJECT_NAME@" 2 | PROJECT_NUMBER = @VERSION_MAJOR@.@VERSION_MINOR@.@VERSION_PATCH@ 3 | STRIP_FROM_PATH = @PROJECT_SOURCE_DIR@ \ 4 | @PROJECT_BINARY_DIR@ 5 | INPUT = @doxy_main_page@ \ 6 | @PROJECT_SOURCE_DIR@ \ 7 | @PROJECT_BINARY_DIR@ 8 | FILE_PATTERNS = *.h \ 9 | *.c 10 | OUTPUT_LANGUAGE = Chinese 11 | RECURSIVE = YES 12 | EXTRACT_ALL = YES 13 | EXTRACT_PRIVATE = YES 14 | EXTRACT_STATIC = YES 15 | EXCLUDE_PATTERNS = */thirdparty/* 16 | USE_MDFILE_AS_MAINPAGE = @doxy_main_page@ 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 西风逍遥游 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README-cn.md: -------------------------------------------------------------------------------- 1 | # ExIconv 2 | 这是一个C语言编码自动解析库 3 | 4 | 对于跨平台开发,推荐使用C++版estring,支持conan包管理器: 5 | 6 | 整合了libiconv和libcharsetdetect 7 | 实现了自动读取文本并判断编码形式,转换为UTF-32方便处理 8 | 9 | 使用示例: 10 | ```C 11 | #include "stdio.h" 12 | #include "exiconv.h" 13 | #define BUFFER_SIZE 4096 14 | 15 | int main(int argc, const char * argv[]) { 16 | FILE* f = fopen(argv[1], "r"); 17 | size_t outsize; 18 | echar_t* str = autoreadfile(f, &outsize); 19 | printf("echar_len = %d\n", estrlen(str)); 20 | char* utf8_str = conv2utf8(str, &outsize); 21 | printf("utf8:%s\n", utf8_str); 22 | fclose(f); 23 | return 0; 24 | } 25 | ``` 26 | 27 | 核心接口: 28 | ```C 29 | 30 | typedef uint32_t echar_t; 31 | struct FILE; 32 | 33 | 34 | /** 35 | * @brief 将UTF-32的字符串转换为utf8编码,便于输出 36 | * 37 | * @param data UTF-32格式的字符串数组 38 | * @param outsize 转换后的C字符串长度 39 | * 40 | * @return 转换后的字符串,会自动新malloc空间,用过后由调用者释放 41 | */ 42 | extern char* 43 | conv2utf8 (const echar_t* data, size_t* outsize); 44 | 45 | 46 | /** 47 | * @brief 将utf8编码转换为内部UTF-32格式 48 | * 49 | * @param data C风格字符串数组 50 | * @param outsize 转换后的UTF-32字符串长度 51 | * 52 | * @return 转换后的字符串,会自动新malloc空间,用过后由调用者释放 53 | */ 54 | extern echar_t* 55 | utf8conv2echar (const char* data, size_t* outsize); 56 | 57 | 58 | 59 | 60 | /** 61 | * @brief 将UTF-32的字符串转换为utf8编码,便于输出 62 | * 63 | * @param data UTF-32格式的字符串数组 64 | * @param outsize 转换后的C字符串长度 65 | * 66 | * @return 转换后的字符串,会自动新malloc空间,用过后由调用者释放 67 | */ 68 | extern char* 69 | echar2code (const echar_t* data, size_t* outsize, const char* encode); 70 | 71 | 72 | /** 73 | * @brief 将指定编码转换为内部UTF-32格式 74 | * 75 | * @param data C风格字符串数组 76 | * @param outsize 转换后的UTF-32字符串长度 77 | * 78 | * @return 转换后的字符串,会自动新malloc空间,用过后由调用者释放 79 | */ 80 | extern echar_t* 81 | code2echar (const char* data, size_t* outsize, const char* encode); 82 | 83 | 84 | 85 | 86 | /** 87 | * @brief 自动读取文件下的文本内容,识别文本编码,并自动转换位为UTF-32的内部编码格式 88 | * 89 | * @param f 文件指针,读模式打开 90 | * @param outsize 转换后的字符串长度 91 | * 92 | * @return 转换后的字符串,会自动新malloc空间,用过后由调用者释放 93 | */ 94 | extern echar_t* 95 | autoreadfile (FILE* f, size_t* outsize); 96 | 97 | 98 | /** 99 | * @brief 自动识别文本格式,并转换为内部使用的UTF-32 100 | * 101 | * @param data 传入的字符串原始数据 102 | * @param outsize 转换后的字符串长度 103 | * 104 | * @return 转换后的字符串,会自动新malloc空间,用过后由调用者释放 105 | */ 106 | extern echar_t* 107 | autoreadchar (const char* data, size_t* outsize); 108 | 109 | 110 | /** 111 | * @brief 分析该段字符串的编码 112 | * 113 | * @param data 字符串元数据,必须\0结尾,否则可能异常 114 | * @return 编码名称,如果为NULL则分析失败 115 | */ 116 | extern const char* 117 | encodedetect (const char* data); 118 | 119 | /** 120 | * @brief 分析该文件的编码 121 | * 122 | * @param data 文件指针 123 | * @return 编码名称,如果为NULL则分析失败 124 | */ 125 | extern const char* 126 | fileencodedetect (FILE* f); 127 | 128 | 129 | /** 130 | * @brief 字符串长度计算 131 | * 132 | * @param str 要测量的字符串,必须0结尾 133 | * @return 长度 134 | */ 135 | extern size_t 136 | estrlen (const echar_t* str); 137 | 138 | 139 | /** 140 | * 释放字符串使用 141 | */ 142 | #define FreeStr(p) free_str((void**)&p) 143 | 144 | extern void 145 | free_str(void** p); 146 | 147 | ``` 148 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ExIconv 2 | 3 | [中文文档](./README-cn.md) 4 | 5 | It's an encoding auto-detection library in C 6 | 7 | For cross-platform and C++ version, please using the estring which support Conan package manager: 8 | 9 | 10 | 11 | It combines libiconv and libcharsetdetect to auto-detect encoding and directly transform to UTF-32. 12 | 13 | Example: 14 | 15 | ```C 16 | #include "stdio.h" 17 | #include "exiconv.h" 18 | #define BUFFER_SIZE 4096 19 | 20 | int main(int argc, const char * argv[]) { 21 | FILE* f = fopen(argv[1], "r"); 22 | size_t outsize; 23 | echar_t* str = autoreadfile(f, &outsize); 24 | printf("echar_len = %d\n", estrlen(str)); 25 | char* utf8_str = conv2utf8(str, &outsize); 26 | printf("utf8:%s\n", utf8_str); 27 | fclose(f); 28 | return 0; 29 | } 30 | ``` 31 | 32 | Functions: 33 | ```C 34 | 35 | typedef uint32_t echar_t; 36 | struct FILE; 37 | 38 | 39 | /** 40 | * @brief translate UTF-32 string to utf8 for output 41 | * 42 | * @param data UTF-32 array 43 | * @param outsize length of the string after transform 44 | * 45 | * @return It will auto malloc space for the new string, please free it after use 46 | */ 47 | extern char* 48 | conv2utf8 (const echar_t* data, size_t* outsize); 49 | 50 | 51 | /** 52 | * @brief translate utf8 to UTF-32 array 53 | * 54 | * @param data utf8 string array 55 | * @param outsize length of the string after transform 56 | * 57 | * @return It will auto malloc space for the new string, please free it after use 58 | */ 59 | extern echar_t* 60 | utf8conv2echar (const char* data, size_t* outsize); 61 | 62 | 63 | 64 | 65 | /** 66 | * @brief translate UTF-32 string to a special encode 67 | * 68 | * @param data UTF-32 array 69 | * @param outsize length of the string after transform 70 | * 71 | * @return It will auto malloc space for the new string, please free it after use 72 | */ 73 | extern char* 74 | echar2code (const echar_t* data, size_t* outsize, const char* encode); 75 | 76 | 77 | /** 78 | * @brief translate a special encode to UTF-32 string 79 | * 80 | * @param data string array 81 | * @param outsize length of the string after transform 82 | * 83 | * @return It will auto malloc space for the new string, please free it after use 84 | */ 85 | extern echar_t* 86 | code2echar (const char* data, size_t* outsize, const char* encode); 87 | 88 | 89 | 90 | 91 | /** 92 | * @brief Read the file and auto-detect encoding, then translate into UTF-32 array 93 | * 94 | * @param f FILE pointer (binary read mode) 95 | * @param outsize length of the string after transform 96 | * 97 | * @return It will auto malloc space for the new string, please free it after use 98 | */ 99 | extern echar_t* 100 | autoreadfile (FILE* f, size_t* outsize); 101 | 102 | 103 | /** 104 | * @brief auto-detect encoding, then translate into UTF-32 array 105 | * 106 | * @param data string data 107 | * @param outsize length of the string after transform 108 | * 109 | * @return It will auto malloc space for the new string, please free it after use 110 | */ 111 | extern echar_t* 112 | autoreadchar (const char* data, size_t* outsize); 113 | 114 | 115 | /** 116 | * @brief detect encoding 117 | * 118 | * @param data string data end with '\0' 119 | * @return name of the encoding, NULL if it's failure 120 | */ 121 | extern const char* 122 | encodedetect (const char* data); 123 | 124 | /** 125 | * @brief detect encoding for the file 126 | * 127 | * @param data FILE pointer (binary read mode) 128 | * @return name of the encoding, NULL if it's failure 129 | */ 130 | extern const char* 131 | fileencodedetect (FILE* f); 132 | 133 | 134 | /** 135 | * @brief get the length of string 136 | * 137 | * @param str string data end with '\0' 138 | * @return length 139 | */ 140 | extern size_t 141 | estrlen (const echar_t* str); 142 | 143 | 144 | /** 145 | * Free the data 146 | */ 147 | #define FreeStr(p) free_str((void**)&p) 148 | 149 | extern void 150 | free_str(void** p); 151 | 152 | ``` 153 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # @Author: sxf 3 | # @Date: 2015-11-03 08:33:43 4 | # @Last Modified by: sxf 5 | # @Last Modified time: 2015-11-03 08:34:18 6 | 7 | mkdir ./build 8 | cd ./build 9 | cmake .. 10 | make 11 | cd .. -------------------------------------------------------------------------------- /include/exiconv.h: -------------------------------------------------------------------------------- 1 | /* 2 | * @Author: sxf 3 | * @Date: 2015-11-04 12:07:35 4 | * @Last Modified by: sxf 5 | * @Last Modified time: 2015-11-04 19:47:23 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | typedef uint32_t echar_t; 13 | struct FILE; 14 | 15 | 16 | /** 17 | * @brief 将UCS4的字符串转换为utf8编码,便于输出 18 | * 19 | * @param data UCS4格式的字符串数组 20 | * @param outsize 转换后的C字符串长度 21 | * 22 | * @return 转换后的字符串,会自动新malloc空间,用过后由调用者释放 23 | */ 24 | extern char* 25 | conv2utf8 (const echar_t* data, size_t* outsize); 26 | 27 | 28 | /** 29 | * @brief 将utf8编码转换为内部UCS4格式 30 | * 31 | * @param data C风格字符串数组 32 | * @param outsize 转换后的UCS4字符串长度 33 | * 34 | * @return 转换后的字符串,会自动新malloc空间,用过后由调用者释放 35 | */ 36 | extern echar_t* 37 | utf8conv2echar (const char* data, size_t* outsize); 38 | 39 | 40 | 41 | 42 | /** 43 | * @brief 将UCS4的字符串转换为utf8编码,便于输出 44 | * 45 | * @param data UCS4格式的字符串数组 46 | * @param outsize 转换后的C字符串长度 47 | * 48 | * @return 转换后的字符串,会自动新malloc空间,用过后由调用者释放 49 | */ 50 | extern char* 51 | echar2code (const echar_t* data, size_t* outsize, const char* encode); 52 | 53 | 54 | /** 55 | * @brief 将指定编码转换为内部UCS4格式 56 | * 57 | * @param data C风格字符串数组 58 | * @param outsize 转换后的UCS4字符串长度 59 | * 60 | * @return 转换后的字符串,会自动新malloc空间,用过后由调用者释放 61 | */ 62 | extern echar_t* 63 | code2echar (const char* data, size_t* outsize, const char* encode); 64 | 65 | 66 | 67 | 68 | /** 69 | * @brief 自动读取文件下的文本内容,识别文本编码,并自动转换位为UCS4的内部编码格式 70 | * 71 | * @param f 文件指针,读模式打开 72 | * @param outsize 转换后的字符串长度 73 | * 74 | * @return 转换后的字符串,会自动新malloc空间,用过后由调用者释放 75 | */ 76 | extern echar_t* 77 | autoreadfile (FILE* f, size_t* outsize); 78 | 79 | 80 | /** 81 | * @brief 自动识别文本格式,并转换为内部使用的UCS4 82 | * 83 | * @param data 传入的字符串原始数据 84 | * @param outsize 转换后的字符串长度 85 | * 86 | * @return 转换后的字符串,会自动新malloc空间,用过后由调用者释放 87 | */ 88 | extern echar_t* 89 | autoreadchar (const char* data, size_t* outsize); 90 | 91 | 92 | /** 93 | * @brief 分析该段字符串的编码 94 | * 95 | * @param data 字符串元数据,必须\0结尾,否则可能异常 96 | * @return 编码名称,如果为NULL则分析失败 97 | */ 98 | extern const char* 99 | encodedetect (const char* data); 100 | 101 | /** 102 | * @brief 分析该文件的编码 103 | * 104 | * @param data 文件指针 105 | * @return 编码名称,如果为NULL则分析失败 106 | */ 107 | extern const char* 108 | fileencodedetect (FILE* f); 109 | 110 | 111 | /** 112 | * @brief 字符串长度计算 113 | * 114 | * @param str 要测量的字符串,必须0结尾 115 | * @return 长度 116 | */ 117 | extern size_t 118 | estrlen (const echar_t* str); 119 | 120 | 121 | 122 | #define FreeStr(p) free_str((void**)&p) 123 | 124 | extern void 125 | free_str(void** p); 126 | 127 | -------------------------------------------------------------------------------- /lib/charsetdetect_include/charsetdetect.h: -------------------------------------------------------------------------------- 1 | #ifndef charsetdetect_ 2 | #define charsetdetect_ 3 | 4 | /* The classes below are exported */ 5 | #pragma GCC visibility push(default) 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | // Opaque type of character set detectors 12 | typedef void* csd_t; 13 | 14 | // Create a new character set detector. Must be freed by csd_close. 15 | // If creation fails, returns (csd_t)-1. 16 | csd_t csd_open(void); 17 | // Feeds some more data to the character set detector. Returns 0 if it 18 | // needs more data to come to a conclusion and a positive number if it has enough to say what 19 | // the character set is. Returns a negative number if there is an error. 20 | int csd_consider(csd_t csd, const char *data, int length); 21 | // Closes the character set detector and returns the detected character set name as an ASCII string. 22 | // Returns NULL if detection failed. 23 | const char *csd_close(csd_t csd); 24 | 25 | #ifdef __cplusplus 26 | } 27 | #endif 28 | 29 | #pragma GCC visibility pop 30 | #endif 31 | -------------------------------------------------------------------------------- /lib/iconv_include/export.h: -------------------------------------------------------------------------------- 1 | 2 | #if @HAVE_VISIBILITY@ && BUILDING_LIBICONV 3 | #define LIBICONV_DLL_EXPORTED __attribute__((__visibility__("default"))) 4 | #else 5 | #define LIBICONV_DLL_EXPORTED 6 | #endif 7 | -------------------------------------------------------------------------------- /lib/iconv_include/iconv.h: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 1999-2003, 2005-2006, 2008-2011 Free Software Foundation, Inc. 2 | This file is part of the GNU LIBICONV Library. 3 | 4 | The GNU LIBICONV Library is free software; you can redistribute it 5 | and/or modify it under the terms of the GNU Library General Public 6 | License as published by the Free Software Foundation; either version 2 7 | of the License, or (at your option) any later version. 8 | 9 | The GNU LIBICONV Library is distributed in the hope that it will be 10 | useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 | Library General Public License for more details. 13 | 14 | You should have received a copy of the GNU Library General Public 15 | License along with the GNU LIBICONV Library; see the file COPYING.LIB. 16 | If not, write to the Free Software Foundation, Inc., 51 Franklin Street, 17 | Fifth Floor, Boston, MA 02110-1301, USA. */ 18 | 19 | /* When installed, this file is called "iconv.h". */ 20 | 21 | #ifndef _LIBICONV_H 22 | #define _LIBICONV_H 23 | 24 | #define _LIBICONV_VERSION 0x010E /* version number: (major<<8) + minor */ 25 | 26 | #if 1 && BUILDING_LIBICONV 27 | #define LIBICONV_DLL_EXPORTED __attribute__((__visibility__("default"))) 28 | #else 29 | #define LIBICONV_DLL_EXPORTED 30 | #endif 31 | extern LIBICONV_DLL_EXPORTED int _libiconv_version; /* Likewise */ 32 | 33 | /* We would like to #include any system header file which could define 34 | iconv_t, 1. in order to eliminate the risk that the user gets compilation 35 | errors because some other system header file includes /usr/include/iconv.h 36 | which defines iconv_t or declares iconv after this file, 2. when compiling 37 | for LIBICONV_PLUG, we need the proper iconv_t type in order to produce 38 | binary compatible code. 39 | But gcc's #include_next is not portable. Thus, once libiconv's iconv.h 40 | has been installed in /usr/local/include, there is no way any more to 41 | include the original /usr/include/iconv.h. We simply have to get away 42 | without it. 43 | Ad 1. The risk that a system header file does 44 | #include "iconv.h" or #include_next "iconv.h" 45 | is small. They all do #include . 46 | Ad 2. The iconv_t type is a pointer type in all cases I have seen. (It 47 | has to be a scalar type because (iconv_t)(-1) is a possible return value 48 | from iconv_open().) */ 49 | 50 | /* Define iconv_t ourselves. */ 51 | #undef iconv_t 52 | #define iconv_t libiconv_t 53 | typedef void* iconv_t; 54 | 55 | /* Get size_t declaration. 56 | Get wchar_t declaration if it exists. */ 57 | #include 58 | 59 | /* Get errno declaration and values. */ 60 | #include 61 | /* Some systems, like SunOS 4, don't have EILSEQ. Some systems, like BSD/OS, 62 | have EILSEQ in a different header. On these systems, define EILSEQ 63 | ourselves. */ 64 | #ifndef EILSEQ 65 | #define EILSEQ 66 | #endif 67 | 68 | 69 | #ifdef __cplusplus 70 | extern "C" { 71 | #endif 72 | 73 | 74 | /* Allocates descriptor for code conversion from encoding ‘fromcode’ to 75 | encoding ‘tocode’. */ 76 | #ifndef LIBICONV_PLUG 77 | #define iconv_open libiconv_open 78 | #endif 79 | extern LIBICONV_DLL_EXPORTED iconv_t iconv_open (const char* tocode, const char* fromcode); 80 | 81 | /* Converts, using conversion descriptor ‘cd’, at most ‘*inbytesleft’ bytes 82 | starting at ‘*inbuf’, writing at most ‘*outbytesleft’ bytes starting at 83 | ‘*outbuf’. 84 | Decrements ‘*inbytesleft’ and increments ‘*inbuf’ by the same amount. 85 | Decrements ‘*outbytesleft’ and increments ‘*outbuf’ by the same amount. */ 86 | #ifndef LIBICONV_PLUG 87 | #define iconv libiconv 88 | #endif 89 | extern LIBICONV_DLL_EXPORTED size_t iconv (iconv_t cd, char* * inbuf, size_t *inbytesleft, char* * outbuf, size_t *outbytesleft); 90 | 91 | /* Frees resources allocated for conversion descriptor ‘cd’. */ 92 | #ifndef LIBICONV_PLUG 93 | #define iconv_close libiconv_close 94 | #endif 95 | extern LIBICONV_DLL_EXPORTED int iconv_close (iconv_t cd); 96 | 97 | 98 | #ifdef __cplusplus 99 | } 100 | #endif 101 | 102 | 103 | #ifndef LIBICONV_PLUG 104 | 105 | /* Nonstandard extensions. */ 106 | 107 | #if 1 108 | #if 0 109 | /* Tru64 with Desktop Toolkit C has a bug: must be included before 110 | . 111 | BSD/OS 4.0.1 has a bug: , and must be 112 | included before . */ 113 | #include 114 | #include 115 | #include 116 | #endif 117 | #include 118 | #endif 119 | 120 | #ifdef __cplusplus 121 | extern "C" { 122 | #endif 123 | 124 | /* A type that holds all memory needed by a conversion descriptor. 125 | A pointer to such an object can be used as an iconv_t. */ 126 | typedef struct { 127 | void* dummy1[28]; 128 | #if 1 129 | mbstate_t dummy2; 130 | #endif 131 | } iconv_allocation_t; 132 | 133 | /* Allocates descriptor for code conversion from encoding ‘fromcode’ to 134 | encoding ‘tocode’ into preallocated memory. Returns an error indicator 135 | (0 or -1 with errno set). */ 136 | #define iconv_open_into libiconv_open_into 137 | extern LIBICONV_DLL_EXPORTED int iconv_open_into (const char* tocode, const char* fromcode, 138 | iconv_allocation_t* resultp); 139 | 140 | /* Control of attributes. */ 141 | #define iconvctl libiconvctl 142 | extern LIBICONV_DLL_EXPORTED int iconvctl (iconv_t cd, int request, void* argument); 143 | 144 | /* Hook performed after every successful conversion of a Unicode character. */ 145 | typedef void (*iconv_unicode_char_hook) (unsigned int uc, void* data); 146 | /* Hook performed after every successful conversion of a wide character. */ 147 | typedef void (*iconv_wide_char_hook) (wchar_t wc, void* data); 148 | /* Set of hooks. */ 149 | struct iconv_hooks { 150 | iconv_unicode_char_hook uc_hook; 151 | iconv_wide_char_hook wc_hook; 152 | void* data; 153 | }; 154 | 155 | /* Fallback function. Invoked when a small number of bytes could not be 156 | converted to a Unicode character. This function should process all 157 | bytes from inbuf and may produce replacement Unicode characters by calling 158 | the write_replacement callback repeatedly. */ 159 | typedef void (*iconv_unicode_mb_to_uc_fallback) 160 | (const char* inbuf, size_t inbufsize, 161 | void (*write_replacement) (const unsigned int *buf, size_t buflen, 162 | void* callback_arg), 163 | void* callback_arg, 164 | void* data); 165 | /* Fallback function. Invoked when a Unicode character could not be converted 166 | to the target encoding. This function should process the character and 167 | may produce replacement bytes (in the target encoding) by calling the 168 | write_replacement callback repeatedly. */ 169 | typedef void (*iconv_unicode_uc_to_mb_fallback) 170 | (unsigned int code, 171 | void (*write_replacement) (const char *buf, size_t buflen, 172 | void* callback_arg), 173 | void* callback_arg, 174 | void* data); 175 | #if 1 176 | /* Fallback function. Invoked when a number of bytes could not be converted to 177 | a wide character. This function should process all bytes from inbuf and may 178 | produce replacement wide characters by calling the write_replacement 179 | callback repeatedly. */ 180 | typedef void (*iconv_wchar_mb_to_wc_fallback) 181 | (const char* inbuf, size_t inbufsize, 182 | void (*write_replacement) (const wchar_t *buf, size_t buflen, 183 | void* callback_arg), 184 | void* callback_arg, 185 | void* data); 186 | /* Fallback function. Invoked when a wide character could not be converted to 187 | the target encoding. This function should process the character and may 188 | produce replacement bytes (in the target encoding) by calling the 189 | write_replacement callback repeatedly. */ 190 | typedef void (*iconv_wchar_wc_to_mb_fallback) 191 | (wchar_t code, 192 | void (*write_replacement) (const char *buf, size_t buflen, 193 | void* callback_arg), 194 | void* callback_arg, 195 | void* data); 196 | #else 197 | /* If the wchar_t type does not exist, these two fallback functions are never 198 | invoked. Their argument list therefore does not matter. */ 199 | typedef void (*iconv_wchar_mb_to_wc_fallback) (); 200 | typedef void (*iconv_wchar_wc_to_mb_fallback) (); 201 | #endif 202 | /* Set of fallbacks. */ 203 | struct iconv_fallbacks { 204 | iconv_unicode_mb_to_uc_fallback mb_to_uc_fallback; 205 | iconv_unicode_uc_to_mb_fallback uc_to_mb_fallback; 206 | iconv_wchar_mb_to_wc_fallback mb_to_wc_fallback; 207 | iconv_wchar_wc_to_mb_fallback wc_to_mb_fallback; 208 | void* data; 209 | }; 210 | 211 | /* Requests for iconvctl. */ 212 | #define ICONV_TRIVIALP 0 /* int *argument */ 213 | #define ICONV_GET_TRANSLITERATE 1 /* int *argument */ 214 | #define ICONV_SET_TRANSLITERATE 2 /* const int *argument */ 215 | #define ICONV_GET_DISCARD_ILSEQ 3 /* int *argument */ 216 | #define ICONV_SET_DISCARD_ILSEQ 4 /* const int *argument */ 217 | #define ICONV_SET_HOOKS 5 /* const struct iconv_hooks *argument */ 218 | #define ICONV_SET_FALLBACKS 6 /* const struct iconv_fallbacks *argument */ 219 | 220 | /* Listing of locale independent encodings. */ 221 | #define iconvlist libiconvlist 222 | extern LIBICONV_DLL_EXPORTED void iconvlist (int (*do_one) (unsigned int namescount, 223 | const char * const * names, 224 | void* data), 225 | void* data); 226 | 227 | /* Canonicalize an encoding name. 228 | The result is either a canonical encoding name, or name itself. */ 229 | extern LIBICONV_DLL_EXPORTED const char * iconv_canonicalize (const char * name); 230 | 231 | /* Support for relocatable packages. */ 232 | 233 | /* Sets the original and the current installation prefix of the package. 234 | Relocation simply replaces a pathname starting with the original prefix 235 | by the corresponding pathname with the current prefix instead. Both 236 | prefixes should be directory names without trailing slash (i.e. use "" 237 | instead of "/"). */ 238 | extern LIBICONV_DLL_EXPORTED void libiconv_set_relocation_prefix (const char *orig_prefix, 239 | const char *curr_prefix); 240 | 241 | #ifdef __cplusplus 242 | } 243 | #endif 244 | 245 | #endif 246 | 247 | 248 | #endif /* _LIBICONV_H */ 249 | -------------------------------------------------------------------------------- /lib/iconv_include/iconv.h.build.in: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 1999-2003, 2005-2006, 2008-2011 Free Software Foundation, Inc. 2 | This file is part of the GNU LIBICONV Library. 3 | 4 | The GNU LIBICONV Library is free software; you can redistribute it 5 | and/or modify it under the terms of the GNU Library General Public 6 | License as published by the Free Software Foundation; either version 2 7 | of the License, or (at your option) any later version. 8 | 9 | The GNU LIBICONV Library is distributed in the hope that it will be 10 | useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 | Library General Public License for more details. 13 | 14 | You should have received a copy of the GNU Library General Public 15 | License along with the GNU LIBICONV Library; see the file COPYING.LIB. 16 | If not, write to the Free Software Foundation, Inc., 51 Franklin Street, 17 | Fifth Floor, Boston, MA 02110-1301, USA. */ 18 | 19 | /* When installed, this file is called "iconv.h". */ 20 | 21 | #ifndef _LIBICONV_H 22 | #define _LIBICONV_H 23 | 24 | #define _LIBICONV_VERSION 0x010E /* version number: (major<<8) + minor */ 25 | 26 | #if @HAVE_VISIBILITY@ && BUILDING_LIBICONV 27 | #define LIBICONV_DLL_EXPORTED __attribute__((__visibility__("default"))) 28 | #else 29 | #define LIBICONV_DLL_EXPORTED 30 | #endif 31 | extern LIBICONV_DLL_EXPORTED @DLL_VARIABLE@ int _libiconv_version; /* Likewise */ 32 | 33 | /* We would like to #include any system header file which could define 34 | iconv_t, 1. in order to eliminate the risk that the user gets compilation 35 | errors because some other system header file includes /usr/include/iconv.h 36 | which defines iconv_t or declares iconv after this file, 2. when compiling 37 | for LIBICONV_PLUG, we need the proper iconv_t type in order to produce 38 | binary compatible code. 39 | But gcc's #include_next is not portable. Thus, once libiconv's iconv.h 40 | has been installed in /usr/local/include, there is no way any more to 41 | include the original /usr/include/iconv.h. We simply have to get away 42 | without it. 43 | Ad 1. The risk that a system header file does 44 | #include "iconv.h" or #include_next "iconv.h" 45 | is small. They all do #include . 46 | Ad 2. The iconv_t type is a pointer type in all cases I have seen. (It 47 | has to be a scalar type because (iconv_t)(-1) is a possible return value 48 | from iconv_open().) */ 49 | 50 | /* Define iconv_t ourselves. */ 51 | #undef iconv_t 52 | #define iconv_t libiconv_t 53 | typedef void* iconv_t; 54 | 55 | /* Get size_t declaration. 56 | Get wchar_t declaration if it exists. */ 57 | #include 58 | 59 | /* Get errno declaration and values. */ 60 | #include 61 | /* Some systems, like SunOS 4, don't have EILSEQ. Some systems, like BSD/OS, 62 | have EILSEQ in a different header. On these systems, define EILSEQ 63 | ourselves. */ 64 | #ifndef EILSEQ 65 | #define EILSEQ @EILSEQ@ 66 | #endif 67 | 68 | 69 | #ifdef __cplusplus 70 | extern "C" { 71 | #endif 72 | 73 | 74 | /* Allocates descriptor for code conversion from encoding ‘fromcode’ to 75 | encoding ‘tocode’. */ 76 | #ifndef LIBICONV_PLUG 77 | #define iconv_open libiconv_open 78 | #endif 79 | extern LIBICONV_DLL_EXPORTED iconv_t iconv_open (const char* tocode, const char* fromcode); 80 | 81 | /* Converts, using conversion descriptor ‘cd’, at most ‘*inbytesleft’ bytes 82 | starting at ‘*inbuf’, writing at most ‘*outbytesleft’ bytes starting at 83 | ‘*outbuf’. 84 | Decrements ‘*inbytesleft’ and increments ‘*inbuf’ by the same amount. 85 | Decrements ‘*outbytesleft’ and increments ‘*outbuf’ by the same amount. */ 86 | #ifndef LIBICONV_PLUG 87 | #define iconv libiconv 88 | #endif 89 | extern LIBICONV_DLL_EXPORTED size_t iconv (iconv_t cd, @ICONV_CONST@ char* * inbuf, size_t *inbytesleft, char* * outbuf, size_t *outbytesleft); 90 | 91 | /* Frees resources allocated for conversion descriptor ‘cd’. */ 92 | #ifndef LIBICONV_PLUG 93 | #define iconv_close libiconv_close 94 | #endif 95 | extern LIBICONV_DLL_EXPORTED int iconv_close (iconv_t cd); 96 | 97 | 98 | #ifdef __cplusplus 99 | } 100 | #endif 101 | 102 | 103 | #ifndef LIBICONV_PLUG 104 | 105 | /* Nonstandard extensions. */ 106 | 107 | #if @USE_MBSTATE_T@ 108 | #if @BROKEN_WCHAR_H@ 109 | /* Tru64 with Desktop Toolkit C has a bug: must be included before 110 | . 111 | BSD/OS 4.0.1 has a bug: , and must be 112 | included before . */ 113 | #include 114 | #include 115 | #include 116 | #endif 117 | #include 118 | #endif 119 | 120 | #ifdef __cplusplus 121 | extern "C" { 122 | #endif 123 | 124 | /* A type that holds all memory needed by a conversion descriptor. 125 | A pointer to such an object can be used as an iconv_t. */ 126 | typedef struct { 127 | void* dummy1[28]; 128 | #if @USE_MBSTATE_T@ 129 | mbstate_t dummy2; 130 | #endif 131 | } iconv_allocation_t; 132 | 133 | /* Allocates descriptor for code conversion from encoding ‘fromcode’ to 134 | encoding ‘tocode’ into preallocated memory. Returns an error indicator 135 | (0 or -1 with errno set). */ 136 | #define iconv_open_into libiconv_open_into 137 | extern LIBICONV_DLL_EXPORTED int iconv_open_into (const char* tocode, const char* fromcode, 138 | iconv_allocation_t* resultp); 139 | 140 | /* Control of attributes. */ 141 | #define iconvctl libiconvctl 142 | extern LIBICONV_DLL_EXPORTED int iconvctl (iconv_t cd, int request, void* argument); 143 | 144 | /* Hook performed after every successful conversion of a Unicode character. */ 145 | typedef void (*iconv_unicode_char_hook) (unsigned int uc, void* data); 146 | /* Hook performed after every successful conversion of a wide character. */ 147 | typedef void (*iconv_wide_char_hook) (wchar_t wc, void* data); 148 | /* Set of hooks. */ 149 | struct iconv_hooks { 150 | iconv_unicode_char_hook uc_hook; 151 | iconv_wide_char_hook wc_hook; 152 | void* data; 153 | }; 154 | 155 | /* Fallback function. Invoked when a small number of bytes could not be 156 | converted to a Unicode character. This function should process all 157 | bytes from inbuf and may produce replacement Unicode characters by calling 158 | the write_replacement callback repeatedly. */ 159 | typedef void (*iconv_unicode_mb_to_uc_fallback) 160 | (const char* inbuf, size_t inbufsize, 161 | void (*write_replacement) (const unsigned int *buf, size_t buflen, 162 | void* callback_arg), 163 | void* callback_arg, 164 | void* data); 165 | /* Fallback function. Invoked when a Unicode character could not be converted 166 | to the target encoding. This function should process the character and 167 | may produce replacement bytes (in the target encoding) by calling the 168 | write_replacement callback repeatedly. */ 169 | typedef void (*iconv_unicode_uc_to_mb_fallback) 170 | (unsigned int code, 171 | void (*write_replacement) (const char *buf, size_t buflen, 172 | void* callback_arg), 173 | void* callback_arg, 174 | void* data); 175 | #if @HAVE_WCHAR_T@ 176 | /* Fallback function. Invoked when a number of bytes could not be converted to 177 | a wide character. This function should process all bytes from inbuf and may 178 | produce replacement wide characters by calling the write_replacement 179 | callback repeatedly. */ 180 | typedef void (*iconv_wchar_mb_to_wc_fallback) 181 | (const char* inbuf, size_t inbufsize, 182 | void (*write_replacement) (const wchar_t *buf, size_t buflen, 183 | void* callback_arg), 184 | void* callback_arg, 185 | void* data); 186 | /* Fallback function. Invoked when a wide character could not be converted to 187 | the target encoding. This function should process the character and may 188 | produce replacement bytes (in the target encoding) by calling the 189 | write_replacement callback repeatedly. */ 190 | typedef void (*iconv_wchar_wc_to_mb_fallback) 191 | (wchar_t code, 192 | void (*write_replacement) (const char *buf, size_t buflen, 193 | void* callback_arg), 194 | void* callback_arg, 195 | void* data); 196 | #else 197 | /* If the wchar_t type does not exist, these two fallback functions are never 198 | invoked. Their argument list therefore does not matter. */ 199 | typedef void (*iconv_wchar_mb_to_wc_fallback) (); 200 | typedef void (*iconv_wchar_wc_to_mb_fallback) (); 201 | #endif 202 | /* Set of fallbacks. */ 203 | struct iconv_fallbacks { 204 | iconv_unicode_mb_to_uc_fallback mb_to_uc_fallback; 205 | iconv_unicode_uc_to_mb_fallback uc_to_mb_fallback; 206 | iconv_wchar_mb_to_wc_fallback mb_to_wc_fallback; 207 | iconv_wchar_wc_to_mb_fallback wc_to_mb_fallback; 208 | void* data; 209 | }; 210 | 211 | /* Requests for iconvctl. */ 212 | #define ICONV_TRIVIALP 0 /* int *argument */ 213 | #define ICONV_GET_TRANSLITERATE 1 /* int *argument */ 214 | #define ICONV_SET_TRANSLITERATE 2 /* const int *argument */ 215 | #define ICONV_GET_DISCARD_ILSEQ 3 /* int *argument */ 216 | #define ICONV_SET_DISCARD_ILSEQ 4 /* const int *argument */ 217 | #define ICONV_SET_HOOKS 5 /* const struct iconv_hooks *argument */ 218 | #define ICONV_SET_FALLBACKS 6 /* const struct iconv_fallbacks *argument */ 219 | 220 | /* Listing of locale independent encodings. */ 221 | #define iconvlist libiconvlist 222 | extern LIBICONV_DLL_EXPORTED void iconvlist (int (*do_one) (unsigned int namescount, 223 | const char * const * names, 224 | void* data), 225 | void* data); 226 | 227 | /* Canonicalize an encoding name. 228 | The result is either a canonical encoding name, or name itself. */ 229 | extern LIBICONV_DLL_EXPORTED const char * iconv_canonicalize (const char * name); 230 | 231 | /* Support for relocatable packages. */ 232 | 233 | /* Sets the original and the current installation prefix of the package. 234 | Relocation simply replaces a pathname starting with the original prefix 235 | by the corresponding pathname with the current prefix instead. Both 236 | prefixes should be directory names without trailing slash (i.e. use "" 237 | instead of "/"). */ 238 | extern LIBICONV_DLL_EXPORTED void libiconv_set_relocation_prefix (const char *orig_prefix, 239 | const char *curr_prefix); 240 | 241 | #ifdef __cplusplus 242 | } 243 | #endif 244 | 245 | #endif 246 | 247 | 248 | #endif /* _LIBICONV_H */ 249 | -------------------------------------------------------------------------------- /lib/iconv_include/iconv.h.in: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 1999-2003, 2005-2006, 2008-2011 Free Software Foundation, Inc. 2 | This file is part of the GNU LIBICONV Library. 3 | 4 | The GNU LIBICONV Library is free software; you can redistribute it 5 | and/or modify it under the terms of the GNU Library General Public 6 | License as published by the Free Software Foundation; either version 2 7 | of the License, or (at your option) any later version. 8 | 9 | The GNU LIBICONV Library is distributed in the hope that it will be 10 | useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 | Library General Public License for more details. 13 | 14 | You should have received a copy of the GNU Library General Public 15 | License along with the GNU LIBICONV Library; see the file COPYING.LIB. 16 | If not, write to the Free Software Foundation, Inc., 51 Franklin Street, 17 | Fifth Floor, Boston, MA 02110-1301, USA. */ 18 | 19 | /* When installed, this file is called "iconv.h". */ 20 | 21 | #ifndef _LIBICONV_H 22 | #define _LIBICONV_H 23 | 24 | #define _LIBICONV_VERSION 0x010E /* version number: (major<<8) + minor */ 25 | extern @DLL_VARIABLE@ int _libiconv_version; /* Likewise */ 26 | 27 | /* We would like to #include any system header file which could define 28 | iconv_t, 1. in order to eliminate the risk that the user gets compilation 29 | errors because some other system header file includes /usr/include/iconv.h 30 | which defines iconv_t or declares iconv after this file, 2. when compiling 31 | for LIBICONV_PLUG, we need the proper iconv_t type in order to produce 32 | binary compatible code. 33 | But gcc's #include_next is not portable. Thus, once libiconv's iconv.h 34 | has been installed in /usr/local/include, there is no way any more to 35 | include the original /usr/include/iconv.h. We simply have to get away 36 | without it. 37 | Ad 1. The risk that a system header file does 38 | #include "iconv.h" or #include_next "iconv.h" 39 | is small. They all do #include . 40 | Ad 2. The iconv_t type is a pointer type in all cases I have seen. (It 41 | has to be a scalar type because (iconv_t)(-1) is a possible return value 42 | from iconv_open().) */ 43 | 44 | /* Define iconv_t ourselves. */ 45 | #undef iconv_t 46 | #define iconv_t libiconv_t 47 | typedef void* iconv_t; 48 | 49 | /* Get size_t declaration. 50 | Get wchar_t declaration if it exists. */ 51 | #include 52 | 53 | /* Get errno declaration and values. */ 54 | #include 55 | /* Some systems, like SunOS 4, don't have EILSEQ. Some systems, like BSD/OS, 56 | have EILSEQ in a different header. On these systems, define EILSEQ 57 | ourselves. */ 58 | #ifndef EILSEQ 59 | #define EILSEQ @EILSEQ@ 60 | #endif 61 | 62 | 63 | #ifdef __cplusplus 64 | extern "C" { 65 | #endif 66 | 67 | 68 | /* Allocates descriptor for code conversion from encoding ‘fromcode’ to 69 | encoding ‘tocode’. */ 70 | #ifndef LIBICONV_PLUG 71 | #define iconv_open libiconv_open 72 | #endif 73 | extern iconv_t iconv_open (const char* tocode, const char* fromcode); 74 | 75 | /* Converts, using conversion descriptor ‘cd’, at most ‘*inbytesleft’ bytes 76 | starting at ‘*inbuf’, writing at most ‘*outbytesleft’ bytes starting at 77 | ‘*outbuf’. 78 | Decrements ‘*inbytesleft’ and increments ‘*inbuf’ by the same amount. 79 | Decrements ‘*outbytesleft’ and increments ‘*outbuf’ by the same amount. */ 80 | #ifndef LIBICONV_PLUG 81 | #define iconv libiconv 82 | #endif 83 | extern size_t iconv (iconv_t cd, @ICONV_CONST@ char* * inbuf, size_t *inbytesleft, char* * outbuf, size_t *outbytesleft); 84 | 85 | /* Frees resources allocated for conversion descriptor ‘cd’. */ 86 | #ifndef LIBICONV_PLUG 87 | #define iconv_close libiconv_close 88 | #endif 89 | extern int iconv_close (iconv_t cd); 90 | 91 | 92 | #ifdef __cplusplus 93 | } 94 | #endif 95 | 96 | 97 | #ifndef LIBICONV_PLUG 98 | 99 | /* Nonstandard extensions. */ 100 | 101 | #if @USE_MBSTATE_T@ 102 | #if @BROKEN_WCHAR_H@ 103 | /* Tru64 with Desktop Toolkit C has a bug: must be included before 104 | . 105 | BSD/OS 4.0.1 has a bug: , and must be 106 | included before . */ 107 | #include 108 | #include 109 | #include 110 | #endif 111 | #include 112 | #endif 113 | 114 | #ifdef __cplusplus 115 | extern "C" { 116 | #endif 117 | 118 | /* A type that holds all memory needed by a conversion descriptor. 119 | A pointer to such an object can be used as an iconv_t. */ 120 | typedef struct { 121 | void* dummy1[28]; 122 | #if @USE_MBSTATE_T@ 123 | mbstate_t dummy2; 124 | #endif 125 | } iconv_allocation_t; 126 | 127 | /* Allocates descriptor for code conversion from encoding ‘fromcode’ to 128 | encoding ‘tocode’ into preallocated memory. Returns an error indicator 129 | (0 or -1 with errno set). */ 130 | #define iconv_open_into libiconv_open_into 131 | extern int iconv_open_into (const char* tocode, const char* fromcode, 132 | iconv_allocation_t* resultp); 133 | 134 | /* Control of attributes. */ 135 | #define iconvctl libiconvctl 136 | extern int iconvctl (iconv_t cd, int request, void* argument); 137 | 138 | /* Hook performed after every successful conversion of a Unicode character. */ 139 | typedef void (*iconv_unicode_char_hook) (unsigned int uc, void* data); 140 | /* Hook performed after every successful conversion of a wide character. */ 141 | typedef void (*iconv_wide_char_hook) (wchar_t wc, void* data); 142 | /* Set of hooks. */ 143 | struct iconv_hooks { 144 | iconv_unicode_char_hook uc_hook; 145 | iconv_wide_char_hook wc_hook; 146 | void* data; 147 | }; 148 | 149 | /* Fallback function. Invoked when a small number of bytes could not be 150 | converted to a Unicode character. This function should process all 151 | bytes from inbuf and may produce replacement Unicode characters by calling 152 | the write_replacement callback repeatedly. */ 153 | typedef void (*iconv_unicode_mb_to_uc_fallback) 154 | (const char* inbuf, size_t inbufsize, 155 | void (*write_replacement) (const unsigned int *buf, size_t buflen, 156 | void* callback_arg), 157 | void* callback_arg, 158 | void* data); 159 | /* Fallback function. Invoked when a Unicode character could not be converted 160 | to the target encoding. This function should process the character and 161 | may produce replacement bytes (in the target encoding) by calling the 162 | write_replacement callback repeatedly. */ 163 | typedef void (*iconv_unicode_uc_to_mb_fallback) 164 | (unsigned int code, 165 | void (*write_replacement) (const char *buf, size_t buflen, 166 | void* callback_arg), 167 | void* callback_arg, 168 | void* data); 169 | #if @HAVE_WCHAR_T@ 170 | /* Fallback function. Invoked when a number of bytes could not be converted to 171 | a wide character. This function should process all bytes from inbuf and may 172 | produce replacement wide characters by calling the write_replacement 173 | callback repeatedly. */ 174 | typedef void (*iconv_wchar_mb_to_wc_fallback) 175 | (const char* inbuf, size_t inbufsize, 176 | void (*write_replacement) (const wchar_t *buf, size_t buflen, 177 | void* callback_arg), 178 | void* callback_arg, 179 | void* data); 180 | /* Fallback function. Invoked when a wide character could not be converted to 181 | the target encoding. This function should process the character and may 182 | produce replacement bytes (in the target encoding) by calling the 183 | write_replacement callback repeatedly. */ 184 | typedef void (*iconv_wchar_wc_to_mb_fallback) 185 | (wchar_t code, 186 | void (*write_replacement) (const char *buf, size_t buflen, 187 | void* callback_arg), 188 | void* callback_arg, 189 | void* data); 190 | #else 191 | /* If the wchar_t type does not exist, these two fallback functions are never 192 | invoked. Their argument list therefore does not matter. */ 193 | typedef void (*iconv_wchar_mb_to_wc_fallback) (); 194 | typedef void (*iconv_wchar_wc_to_mb_fallback) (); 195 | #endif 196 | /* Set of fallbacks. */ 197 | struct iconv_fallbacks { 198 | iconv_unicode_mb_to_uc_fallback mb_to_uc_fallback; 199 | iconv_unicode_uc_to_mb_fallback uc_to_mb_fallback; 200 | iconv_wchar_mb_to_wc_fallback mb_to_wc_fallback; 201 | iconv_wchar_wc_to_mb_fallback wc_to_mb_fallback; 202 | void* data; 203 | }; 204 | 205 | /* Requests for iconvctl. */ 206 | #define ICONV_TRIVIALP 0 /* int *argument */ 207 | #define ICONV_GET_TRANSLITERATE 1 /* int *argument */ 208 | #define ICONV_SET_TRANSLITERATE 2 /* const int *argument */ 209 | #define ICONV_GET_DISCARD_ILSEQ 3 /* int *argument */ 210 | #define ICONV_SET_DISCARD_ILSEQ 4 /* const int *argument */ 211 | #define ICONV_SET_HOOKS 5 /* const struct iconv_hooks *argument */ 212 | #define ICONV_SET_FALLBACKS 6 /* const struct iconv_fallbacks *argument */ 213 | 214 | /* Listing of locale independent encodings. */ 215 | #define iconvlist libiconvlist 216 | extern void iconvlist (int (*do_one) (unsigned int namescount, 217 | const char * const * names, 218 | void* data), 219 | void* data); 220 | 221 | /* Canonicalize an encoding name. 222 | The result is either a canonical encoding name, or name itself. */ 223 | extern const char * iconv_canonicalize (const char * name); 224 | 225 | /* Support for relocatable packages. */ 226 | 227 | /* Sets the original and the current installation prefix of the package. 228 | Relocation simply replaces a pathname starting with the original prefix 229 | by the corresponding pathname with the current prefix instead. Both 230 | prefixes should be directory names without trailing slash (i.e. use "" 231 | instead of "/"). */ 232 | extern void libiconv_set_relocation_prefix (const char *orig_prefix, 233 | const char *curr_prefix); 234 | 235 | #ifdef __cplusplus 236 | } 237 | #endif 238 | 239 | #endif 240 | 241 | 242 | #endif /* _LIBICONV_H */ 243 | -------------------------------------------------------------------------------- /lib/iconv_include/iconv.h.inst: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 1999-2003, 2005-2006, 2008-2011 Free Software Foundation, Inc. 2 | This file is part of the GNU LIBICONV Library. 3 | 4 | The GNU LIBICONV Library is free software; you can redistribute it 5 | and/or modify it under the terms of the GNU Library General Public 6 | License as published by the Free Software Foundation; either version 2 7 | of the License, or (at your option) any later version. 8 | 9 | The GNU LIBICONV Library is distributed in the hope that it will be 10 | useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 | Library General Public License for more details. 13 | 14 | You should have received a copy of the GNU Library General Public 15 | License along with the GNU LIBICONV Library; see the file COPYING.LIB. 16 | If not, write to the Free Software Foundation, Inc., 51 Franklin Street, 17 | Fifth Floor, Boston, MA 02110-1301, USA. */ 18 | 19 | /* When installed, this file is called "iconv.h". */ 20 | 21 | #ifndef _LIBICONV_H 22 | #define _LIBICONV_H 23 | 24 | #define _LIBICONV_VERSION 0x010E /* version number: (major<<8) + minor */ 25 | extern int _libiconv_version; /* Likewise */ 26 | 27 | /* We would like to #include any system header file which could define 28 | iconv_t, 1. in order to eliminate the risk that the user gets compilation 29 | errors because some other system header file includes /usr/include/iconv.h 30 | which defines iconv_t or declares iconv after this file, 2. when compiling 31 | for LIBICONV_PLUG, we need the proper iconv_t type in order to produce 32 | binary compatible code. 33 | But gcc's #include_next is not portable. Thus, once libiconv's iconv.h 34 | has been installed in /usr/local/include, there is no way any more to 35 | include the original /usr/include/iconv.h. We simply have to get away 36 | without it. 37 | Ad 1. The risk that a system header file does 38 | #include "iconv.h" or #include_next "iconv.h" 39 | is small. They all do #include . 40 | Ad 2. The iconv_t type is a pointer type in all cases I have seen. (It 41 | has to be a scalar type because (iconv_t)(-1) is a possible return value 42 | from iconv_open().) */ 43 | 44 | /* Define iconv_t ourselves. */ 45 | #undef iconv_t 46 | #define iconv_t libiconv_t 47 | typedef void* iconv_t; 48 | 49 | /* Get size_t declaration. 50 | Get wchar_t declaration if it exists. */ 51 | #include 52 | 53 | /* Get errno declaration and values. */ 54 | #include 55 | /* Some systems, like SunOS 4, don't have EILSEQ. Some systems, like BSD/OS, 56 | have EILSEQ in a different header. On these systems, define EILSEQ 57 | ourselves. */ 58 | #ifndef EILSEQ 59 | #define EILSEQ 60 | #endif 61 | 62 | 63 | #ifdef __cplusplus 64 | extern "C" { 65 | #endif 66 | 67 | 68 | /* Allocates descriptor for code conversion from encoding ‘fromcode’ to 69 | encoding ‘tocode’. */ 70 | #ifndef LIBICONV_PLUG 71 | #define iconv_open libiconv_open 72 | #endif 73 | extern iconv_t iconv_open (const char* tocode, const char* fromcode); 74 | 75 | /* Converts, using conversion descriptor ‘cd’, at most ‘*inbytesleft’ bytes 76 | starting at ‘*inbuf’, writing at most ‘*outbytesleft’ bytes starting at 77 | ‘*outbuf’. 78 | Decrements ‘*inbytesleft’ and increments ‘*inbuf’ by the same amount. 79 | Decrements ‘*outbytesleft’ and increments ‘*outbuf’ by the same amount. */ 80 | #ifndef LIBICONV_PLUG 81 | #define iconv libiconv 82 | #endif 83 | extern size_t iconv (iconv_t cd, char* * inbuf, size_t *inbytesleft, char* * outbuf, size_t *outbytesleft); 84 | 85 | /* Frees resources allocated for conversion descriptor ‘cd’. */ 86 | #ifndef LIBICONV_PLUG 87 | #define iconv_close libiconv_close 88 | #endif 89 | extern int iconv_close (iconv_t cd); 90 | 91 | 92 | #ifdef __cplusplus 93 | } 94 | #endif 95 | 96 | 97 | #ifndef LIBICONV_PLUG 98 | 99 | /* Nonstandard extensions. */ 100 | 101 | #if 1 102 | #if 0 103 | /* Tru64 with Desktop Toolkit C has a bug: must be included before 104 | . 105 | BSD/OS 4.0.1 has a bug: , and must be 106 | included before . */ 107 | #include 108 | #include 109 | #include 110 | #endif 111 | #include 112 | #endif 113 | 114 | #ifdef __cplusplus 115 | extern "C" { 116 | #endif 117 | 118 | /* A type that holds all memory needed by a conversion descriptor. 119 | A pointer to such an object can be used as an iconv_t. */ 120 | typedef struct { 121 | void* dummy1[28]; 122 | #if 1 123 | mbstate_t dummy2; 124 | #endif 125 | } iconv_allocation_t; 126 | 127 | /* Allocates descriptor for code conversion from encoding ‘fromcode’ to 128 | encoding ‘tocode’ into preallocated memory. Returns an error indicator 129 | (0 or -1 with errno set). */ 130 | #define iconv_open_into libiconv_open_into 131 | extern int iconv_open_into (const char* tocode, const char* fromcode, 132 | iconv_allocation_t* resultp); 133 | 134 | /* Control of attributes. */ 135 | #define iconvctl libiconvctl 136 | extern int iconvctl (iconv_t cd, int request, void* argument); 137 | 138 | /* Hook performed after every successful conversion of a Unicode character. */ 139 | typedef void (*iconv_unicode_char_hook) (unsigned int uc, void* data); 140 | /* Hook performed after every successful conversion of a wide character. */ 141 | typedef void (*iconv_wide_char_hook) (wchar_t wc, void* data); 142 | /* Set of hooks. */ 143 | struct iconv_hooks { 144 | iconv_unicode_char_hook uc_hook; 145 | iconv_wide_char_hook wc_hook; 146 | void* data; 147 | }; 148 | 149 | /* Fallback function. Invoked when a small number of bytes could not be 150 | converted to a Unicode character. This function should process all 151 | bytes from inbuf and may produce replacement Unicode characters by calling 152 | the write_replacement callback repeatedly. */ 153 | typedef void (*iconv_unicode_mb_to_uc_fallback) 154 | (const char* inbuf, size_t inbufsize, 155 | void (*write_replacement) (const unsigned int *buf, size_t buflen, 156 | void* callback_arg), 157 | void* callback_arg, 158 | void* data); 159 | /* Fallback function. Invoked when a Unicode character could not be converted 160 | to the target encoding. This function should process the character and 161 | may produce replacement bytes (in the target encoding) by calling the 162 | write_replacement callback repeatedly. */ 163 | typedef void (*iconv_unicode_uc_to_mb_fallback) 164 | (unsigned int code, 165 | void (*write_replacement) (const char *buf, size_t buflen, 166 | void* callback_arg), 167 | void* callback_arg, 168 | void* data); 169 | #if 1 170 | /* Fallback function. Invoked when a number of bytes could not be converted to 171 | a wide character. This function should process all bytes from inbuf and may 172 | produce replacement wide characters by calling the write_replacement 173 | callback repeatedly. */ 174 | typedef void (*iconv_wchar_mb_to_wc_fallback) 175 | (const char* inbuf, size_t inbufsize, 176 | void (*write_replacement) (const wchar_t *buf, size_t buflen, 177 | void* callback_arg), 178 | void* callback_arg, 179 | void* data); 180 | /* Fallback function. Invoked when a wide character could not be converted to 181 | the target encoding. This function should process the character and may 182 | produce replacement bytes (in the target encoding) by calling the 183 | write_replacement callback repeatedly. */ 184 | typedef void (*iconv_wchar_wc_to_mb_fallback) 185 | (wchar_t code, 186 | void (*write_replacement) (const char *buf, size_t buflen, 187 | void* callback_arg), 188 | void* callback_arg, 189 | void* data); 190 | #else 191 | /* If the wchar_t type does not exist, these two fallback functions are never 192 | invoked. Their argument list therefore does not matter. */ 193 | typedef void (*iconv_wchar_mb_to_wc_fallback) (); 194 | typedef void (*iconv_wchar_wc_to_mb_fallback) (); 195 | #endif 196 | /* Set of fallbacks. */ 197 | struct iconv_fallbacks { 198 | iconv_unicode_mb_to_uc_fallback mb_to_uc_fallback; 199 | iconv_unicode_uc_to_mb_fallback uc_to_mb_fallback; 200 | iconv_wchar_mb_to_wc_fallback mb_to_wc_fallback; 201 | iconv_wchar_wc_to_mb_fallback wc_to_mb_fallback; 202 | void* data; 203 | }; 204 | 205 | /* Requests for iconvctl. */ 206 | #define ICONV_TRIVIALP 0 /* int *argument */ 207 | #define ICONV_GET_TRANSLITERATE 1 /* int *argument */ 208 | #define ICONV_SET_TRANSLITERATE 2 /* const int *argument */ 209 | #define ICONV_GET_DISCARD_ILSEQ 3 /* int *argument */ 210 | #define ICONV_SET_DISCARD_ILSEQ 4 /* const int *argument */ 211 | #define ICONV_SET_HOOKS 5 /* const struct iconv_hooks *argument */ 212 | #define ICONV_SET_FALLBACKS 6 /* const struct iconv_fallbacks *argument */ 213 | 214 | /* Listing of locale independent encodings. */ 215 | #define iconvlist libiconvlist 216 | extern void iconvlist (int (*do_one) (unsigned int namescount, 217 | const char * const * names, 218 | void* data), 219 | void* data); 220 | 221 | /* Canonicalize an encoding name. 222 | The result is either a canonical encoding name, or name itself. */ 223 | extern const char * iconv_canonicalize (const char * name); 224 | 225 | /* Support for relocatable packages. */ 226 | 227 | /* Sets the original and the current installation prefix of the package. 228 | Relocation simply replaces a pathname starting with the original prefix 229 | by the corresponding pathname with the current prefix instead. Both 230 | prefixes should be directory names without trailing slash (i.e. use "" 231 | instead of "/"). */ 232 | extern void libiconv_set_relocation_prefix (const char *orig_prefix, 233 | const char *curr_prefix); 234 | 235 | #ifdef __cplusplus 236 | } 237 | #endif 238 | 239 | #endif 240 | 241 | 242 | #endif /* _LIBICONV_H */ 243 | -------------------------------------------------------------------------------- /lib/libcharsetdetect.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunxfancy/ExIconv/e790d7a86bcb59883d56a30450eed0d7633ca2a6/lib/libcharsetdetect.so -------------------------------------------------------------------------------- /lib/libiconv.so: -------------------------------------------------------------------------------- 1 | libiconv.so.2.5.1 -------------------------------------------------------------------------------- /lib/libiconv.so.2: -------------------------------------------------------------------------------- 1 | libiconv.so.2.5.1 -------------------------------------------------------------------------------- /lib/libiconv.so.2.5.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunxfancy/ExIconv/e790d7a86bcb59883d56a30450eed0d7633ca2a6/lib/libiconv.so.2.5.1 -------------------------------------------------------------------------------- /src/core.c: -------------------------------------------------------------------------------- 1 | /* 2 | * @Author: sxf 3 | * @Date: 2015-11-04 15:17:26 4 | * @Last Modified by: sxf 5 | * @Last Modified time: 2015-11-05 08:21:32 6 | */ 7 | 8 | #include "iconv.h" 9 | #include "exiconv.h" 10 | #include 11 | #include 12 | #include 13 | 14 | extern char* 15 | echar2code (const echar_t* data, size_t* outsize, const char* encode) { 16 | assert (data != NULL); 17 | iconv_t cd = iconv_open (encode, "UTF-32"); // tocode, fromcode 18 | if ((iconv_t)-1 == cd) { 19 | printf("不能从编码 %s 转换到 %s!\n", "UTF-32", encode); 20 | return NULL; 21 | } 22 | size_t in_size = sizeof(echar_t) * estrlen(data); 23 | size_t out_size = in_size; 24 | size_t malloc_size = out_size; 25 | char* outbuf = (char*) malloc (out_size); 26 | char* inptr = (char*) data; 27 | char* outptr = outbuf; 28 | // printf("before inbuf = %s, outbuf = %s, inbufbytesleft = %d, outbufbytesleft = %d\n", 29 | // inptr, outptr, in_size, out_size); 30 | size_t nconv = iconv (cd, &inptr, &in_size, &outptr, &out_size); 31 | // printf("after nconv = %d, inbuf = %s, outbuf = %s, inbufbytesleft = %d, outbufbytesleft = %d\n", 32 | // nconv, inptr, outptr, in_size, out_size); 33 | if (nconv == -1) { 34 | printf("%s\n", strerror(errno)); 35 | return NULL; 36 | } 37 | iconv_close (cd); 38 | *outsize = malloc_size - out_size; 39 | return outbuf; 40 | } 41 | 42 | 43 | extern echar_t* 44 | code2echar (const char* data, size_t* outsize, const char* encode) { 45 | assert (data != NULL); 46 | iconv_t cd = iconv_open ("UTF-32", encode); // tocode, fromcode 47 | if ((iconv_t)-1 == cd) { 48 | printf("不能从编码 %s 转换到 %s!\n", encode, "UTF-32"); 49 | return NULL; 50 | } 51 | size_t in_size = strlen(data); 52 | size_t out_size = sizeof(echar_t) * in_size; 53 | size_t malloc_size = out_size; 54 | char* outbuf = (char*) malloc (out_size); 55 | char* inptr = (char*) data; 56 | char* outptr = outbuf; 57 | // printf("before inbuf = %s, outbuf = %s, inbufbytesleft = %d, outbufbytesleft = %d\n", 58 | // inptr, outptr, in_size, outsize); 59 | size_t nconv = iconv (cd, &inptr, &in_size, &outptr, &out_size); 60 | if (nconv == -1) { 61 | printf("%s\n", strerror(errno)); 62 | return NULL; 63 | } 64 | // printf("after nconv = %d, inbuf = %s, outbuf = %s, inbufbytesleft = %d, outbufbytesleft = %d\n", 65 | // nconv, inptr, outptr, in_size, outsize); 66 | iconv_close (cd); 67 | *outsize = malloc_size - out_size; 68 | return (echar_t*) outbuf; 69 | } 70 | 71 | 72 | extern char* 73 | conv2utf8 (const echar_t* data, size_t* outsize) { 74 | return echar2code (data, outsize, "UTF-8"); 75 | } 76 | 77 | 78 | extern echar_t* 79 | utf8conv2echar (const char* data, size_t* outsize) { 80 | return code2echar (data, outsize, "UTF-8"); 81 | } 82 | 83 | 84 | extern echar_t* 85 | autoreadfile (FILE* f, size_t* outsize) { 86 | size_t file_len = ftell(f); 87 | const char* encode = fileencodedetect(f); 88 | fseek(f, 0L, SEEK_END); 89 | file_len = ftell(f) - file_len; 90 | fseek(f, -file_len, SEEK_END); 91 | char* inbuf = (char*) malloc (file_len); 92 | fread(inbuf, 1, file_len, f); 93 | echar_t* outbuf = code2echar(inbuf, outsize, encode); 94 | FreeStr(inbuf); 95 | return outbuf; 96 | } 97 | 98 | extern echar_t* 99 | autoreadchar (const char* data, size_t* outsize) { 100 | const char* encode = encodedetect(data); 101 | return code2echar(data, outsize, encode); 102 | } 103 | 104 | 105 | extern size_t 106 | estrlen (const echar_t* str) { 107 | assert (str != NULL); 108 | const echar_t* p; size_t size = 0; 109 | for (p = str; *p != 0; ++p) { 110 | ++size; 111 | } 112 | return size; 113 | } 114 | 115 | extern void 116 | free_str(void** p) { 117 | if (*p != NULL) { 118 | free(*p); 119 | *p = NULL; 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /src/detect.c: -------------------------------------------------------------------------------- 1 | /* 2 | * @Author: sxf 3 | * @Date: 2015-11-04 15:34:23 4 | * @Last Modified by: sxf 5 | * @Last Modified time: 2015-11-04 20:23:24 6 | */ 7 | 8 | #include "exiconv.h" 9 | #include "charsetdetect.h" 10 | #include 11 | #include 12 | #include 13 | extern const char* 14 | encodedetect (const char* data) { 15 | assert(data != NULL); 16 | csd_t csd = csd_open(); 17 | if (csd == (csd_t)-1) { 18 | printf("csd_open failed\n"); 19 | return NULL; 20 | } 21 | 22 | int size = strlen(data); 23 | int result = csd_consider(csd, data, size); 24 | if (result < 0) 25 | return NULL; 26 | return csd_close(csd); 27 | } 28 | 29 | 30 | extern const char* 31 | fileencodedetect (FILE* f) { 32 | assert(f != NULL); 33 | csd_t csd = csd_open(); 34 | if (csd == (csd_t)-1) { 35 | printf("csd_open failed\n"); 36 | return NULL; 37 | } 38 | int sumsize = 0; 39 | int size; const int BUFFER_SIZE = 1024; 40 | char buf[BUFFER_SIZE]; 41 | memset(buf, sizeof(buf), 0); 42 | while ((size = fread(buf, 1, BUFFER_SIZE, f)) != 0) { 43 | sumsize += size; 44 | int result = csd_consider(csd, buf, size); 45 | if (result < 0) { 46 | printf("csd_consider failed\n"); 47 | return NULL; 48 | } else if (result > 0) { 49 | // Already have enough data 50 | break; 51 | } 52 | } 53 | fseek(f, -sumsize, SEEK_CUR); 54 | return csd_close(csd); 55 | } -------------------------------------------------------------------------------- /src/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * @Author: sxf 3 | * @Date: 2015-11-04 10:16:06 4 | * @Last Modified by: sxf 5 | * @Last Modified time: 2015-11-04 22:06:49 6 | */ 7 | #include "charsetdetect.h" 8 | #include "stdio.h" 9 | #include "exiconv.h" 10 | 11 | #define BUFFER_SIZE 4096 12 | #define CHAR uint32_t 13 | 14 | int main(int argc, const char * argv[]) { 15 | FILE* f = fopen(argv[1], "r"); 16 | size_t outsize; 17 | echar_t* str = autoreadfile(f, &outsize); 18 | printf("echar_len = %d\n", estrlen(str)); 19 | char* utf8_str = conv2utf8(str, &outsize); 20 | printf("utf8:%s\n", utf8_str); 21 | fclose(f); 22 | return 0; 23 | } --------------------------------------------------------------------------------