├── configure ├── update-mozilla ├── nscore.h ├── .gitignore ├── charsetdetectPriv.h ├── charsetdetect.h ├── configure-iphone ├── configure-iphone-simulator ├── CMakeLists.txt ├── nspr-emu ├── README.md ├── prcpucfg.h ├── prcpucfg_mac.h ├── prmem.h ├── obsolete │ └── protypes.h ├── prcpucfg_win.h └── prcpucfg_freebsd.h ├── charsetdetect.cpp ├── cmake-toolchains ├── iphone.toolchain └── iphone-simulator.toolchain ├── mozilla └── extensions │ └── universalchardet │ └── src │ └── base │ ├── nsUTF8Prober.h │ ├── nsLatin1Prober.h │ ├── nsSBCSGroupProber.h │ ├── nsEscCharsetProber.h │ ├── nsBig5Prober.h │ ├── nsEUCKRProber.h │ ├── nsEUCTWProber.h │ ├── nsGB2312Prober.h │ ├── nsUTF8Prober.cpp │ ├── nsEUCJPProber.h │ ├── nsSJISProber.h │ ├── Makefile.in │ ├── nsBig5Prober.cpp │ ├── nsMBCSGroupProber.h │ ├── nsEUCKRProber.cpp │ ├── nsEUCTWProber.cpp │ ├── nsPkgInt.h │ ├── nsCharSetProber.h │ ├── nsGB2312Prober.cpp │ ├── nsUniversalDetector.h │ ├── nsSJISProber.cpp │ ├── nsEUCJPProber.cpp │ ├── nsEscCharsetProber.cpp │ ├── nsCodingStateMachine.h │ ├── CharDistribution.cpp │ ├── nsSBCharSetProber.cpp │ ├── nsCharSetProber.cpp │ ├── JpCntx.h │ ├── nsSBCharSetProber.h │ ├── nsMBCSGroupProber.cpp │ ├── nsLatin1Prober.cpp │ ├── nsSBCSGroupProber.cpp │ ├── nsHebrewProber.cpp │ ├── CharDistribution.h │ ├── nsHebrewProber.h │ └── nsUniversalDetector.cpp └── README.md /configure: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cmake . -DBUILD_SHARED_LIBS=YES "$@" 4 | -------------------------------------------------------------------------------- /update-mozilla: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | cvs -d :pserver:anonymous@cvs-mirror.mozilla.org:/cvsroot co mozilla/extensions/universalchardet/src/base 3 | -------------------------------------------------------------------------------- /nscore.h: -------------------------------------------------------------------------------- 1 | #include "prtypes.h" 2 | #define NS_OK 0 3 | #define nsnull 0L 4 | typedef PRUint32 nsresult; 5 | #define NS_ERROR_OUT_OF_MEMORY ((nsresult) 0x8007000eL) 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # OS junk 2 | .DS_Store 3 | Thumbs.db 4 | 5 | # XCode build artifacts 6 | build/ 7 | 8 | # XCode user configuration 9 | *.mode1v3 10 | *.pbxuser 11 | 12 | # XCode something or other.. not sure if it should be checked in 13 | charsetdetectProj.xcconfig 14 | charsetdetectTarget.xcconfig 15 | 16 | # CMake generated build system 17 | Makefile 18 | 19 | # CMake build artifacts 20 | lib/ 21 | 22 | # CMake junk 23 | CMakeCache.txt 24 | CMakeFiles/ 25 | cmake_install.cmake 26 | install_manifest.txt 27 | -------------------------------------------------------------------------------- /charsetdetectPriv.h: -------------------------------------------------------------------------------- 1 | /* The classes below are not exported */ 2 | #pragma GCC visibility push(hidden) 3 | 4 | #include 5 | #include 6 | #include "nscore.h" 7 | #include "nsUniversalDetector.h" 8 | 9 | class Detector : public nsUniversalDetector { 10 | public: 11 | Detector(PRUint32 aLanguageFilter) : nsUniversalDetector(aLanguageFilter) {}; 12 | int Consider(const char *data, int length); 13 | const char *Close(void); 14 | protected: 15 | void Report(const char* aCharset); 16 | const char *mDetectedCharset; 17 | }; 18 | 19 | #pragma GCC visibility pop 20 | -------------------------------------------------------------------------------- /charsetdetect.h: -------------------------------------------------------------------------------- 1 | #ifndef charsetdetect_ 2 | #define charsetdetect_ 3 | 4 | /* The classes below are exported */ 5 | #pragma GCC visibility push(default) 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | // Opaque type of character set detectors 12 | typedef void* csd_t; 13 | 14 | // Create a new character set detector. Must be freed by csd_close. 15 | // If creation fails, returns (csd_t)-1. 16 | csd_t csd_open(void); 17 | // Feeds some more data to the character set detector. Returns 0 if it 18 | // needs more data to come to a conclusion and a positive number if it has enough to say what 19 | // the character set is. Returns a negative number if there is an error. 20 | int csd_consider(csd_t csd, const char *data, int length); 21 | // Closes the character set detector and returns the detected character set name as an ASCII string. 22 | // Returns NULL if detection failed. 23 | const char *csd_close(csd_t csd); 24 | 25 | #ifdef __cplusplus 26 | } 27 | #endif 28 | 29 | #pragma GCC visibility pop 30 | #endif 31 | -------------------------------------------------------------------------------- /configure-iphone: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | unset CPATH 3 | unset C_INCLUDE_PATH 4 | unset CPLUS_INCLUDE_PATH 5 | unset OBJC_INCLUDE_PATH 6 | unset LIBS 7 | unset DYLD_FALLBACK_LIBRARY_PATH 8 | unset DYLD_FALLBACK_FRAMEWORK_PATH 9 | 10 | if [ $# -lt 1 ]; then 11 | echo "Usage: $0 sdk_version" 12 | exit 1 13 | fi 14 | 15 | export SDKVER=$1 16 | shift 17 | 18 | export DEVROOT="/Developer/Platforms/iPhoneOS.platform/Developer" 19 | export SDKROOT="$DEVROOT/SDKs/iPhoneOS$SDKVER.sdk" 20 | export PKG_CONFIG_PATH="$SDKROOT/usr/lib/pkgconfig":"$SDKROOT/opt/iphone-$SDKVER/lib/pkgconfig":"$SDKROOT/usr/local/iphone-$SDKVER/lib/pkgconfig" 21 | export PKG_CONFIG_LIBDIR="$PKG_CONFIG_PATH" 22 | 23 | # OK, this is really weird... running CMake can fail the first few times. It looks like you need to run it a few times to get 24 | # the values that you SET to propagate around, because if you don't the linker flags etc that get used are outdated 25 | for i in 1 2 3; do 26 | cmake . -DSDKVER=$SDKVER -DCMAKE_TOOLCHAIN_FILE="cmake-toolchains/iphone.toolchain" -DCMAKE_INSTALL_PREFIX="/opt/iphone-$SDKVER" "$@" 27 | done 28 | -------------------------------------------------------------------------------- /configure-iphone-simulator: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | unset CPATH 3 | unset C_INCLUDE_PATH 4 | unset CPLUS_INCLUDE_PATH 5 | unset OBJC_INCLUDE_PATH 6 | unset LIBS 7 | unset DYLD_FALLBACK_LIBRARY_PATH 8 | unset DYLD_FALLBACK_FRAMEWORK_PATH 9 | 10 | if [ $# -lt 1 ]; then 11 | echo "Usage: $0 sdk_version" 12 | exit 1 13 | fi 14 | 15 | export SDKVER=$1 16 | shift 17 | 18 | export DEVROOT="/Developer/Platforms/iPhoneSimulator.platform/Developer" 19 | export SDKROOT="$DEVROOT/SDKs/iPhoneSimulator$SDKVER.sdk" 20 | export PKG_CONFIG_PATH="$SDKROOT/usr/lib/pkgconfig":"$SDKROOT/opt/iphone-simulator-$SDKVER/lib/pkgconfig":"$SDKROOT/usr/local/iphone-simulator-$SDKVER/lib/pkgconfig" 21 | export PKG_CONFIG_LIBDIR="$PKG_CONFIG_PATH" 22 | 23 | # OK, this is really weird... running CMake can fail the first few times. It looks like you need to run it a few times to get 24 | # the values that you SET to propagate around, because if you don't the linker flags etc that get used are outdated 25 | for i in 1 2 3; do 26 | cmake . -DSDKVER=$SDKVER -DCMAKE_TOOLCHAIN_FILE="cmake-toolchains/iphone-simulator.toolchain" -DCMAKE_INSTALL_PREFIX="/opt/iphone-simulator-$SDKVER" "$@" 27 | done 28 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | CMAKE_MINIMUM_REQUIRED(VERSION 2.6) 2 | 3 | 4 | # Add a distclean target to the generated makefile 5 | IF (UNIX) 6 | ADD_CUSTOM_TARGET (distclean @echo cleaning for source distribution) 7 | SET(DISTCLEANED 8 | cmake.depends 9 | cmake.check_depends 10 | CMakeCache.txt 11 | cmake.check_cache 12 | *.cmake 13 | Makefile 14 | core core.* 15 | gmon.out 16 | *~ 17 | ) 18 | 19 | ADD_CUSTOM_COMMAND( 20 | DEPENDS clean 21 | COMMENT "distribution clean" 22 | COMMAND rm 23 | ARGS -Rf CMakeTmp ${DISTCLEANED} 24 | TARGET distclean 25 | ) 26 | ENDIF(UNIX) 27 | 28 | 29 | PROJECT(charsetdetect) 30 | 31 | INCLUDE_DIRECTORIES(. nspr-emu mozilla/extensions/universalchardet/src/base/) 32 | 33 | FILE(GLOB charsetdetect_lib_src mozilla/extensions/universalchardet/src/base/*.cpp) 34 | SET(charsetdetect_lib_src ${charsetdetect_lib_src} charsetdetect.cpp) 35 | ADD_LIBRARY(charsetdetect ${charsetdetect_lib_src}) 36 | 37 | # Do not output the shared library into the root 38 | SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/build) 39 | 40 | INSTALL (TARGETS charsetdetect DESTINATION lib) 41 | INSTALL (FILES charsetdetect.h DESTINATION include) 42 | -------------------------------------------------------------------------------- /nspr-emu/README.md: -------------------------------------------------------------------------------- 1 | # NSPR emulation library 2 | 3 | Author: Max Bolingbroke () 4 | 5 | This library was created by pulling together the minimal pieces of NSPR required to get the Universal Character Set 6 | Detector (UCSD) to compile. It turns out that the UCSD uses only very minimal parts of NSPR, so we can get away with just a few 7 | header files and totally forget about linking to the library. 8 | 9 | Precisely, we used these files from [NSPR v4.8.6](https://ftp.mozilla.org/pub/mozilla.org/nspr/releases/v4.8.6/): 10 | 11 | * prtypes.h 12 | * prmem.h 13 | * obsolete/protypes.h 14 | 15 | I then patched some prototypes in prmem.h to use C library functions directly. The protypes were replaced with #define directives 16 | that ensure that callers of PR_Malloc, PR_Calloc, PR_Realloc, PR_Free just use their C library counterparts directly. 17 | 18 | To finish off, I needed to provide prcpucfg.h. Normally this is generated by the nspr build process, but to simplify things I used 19 | the version from [Google's Chrome source tree](http://src.chromium.org/viewvc/chrome/trunk/src/base/third_party/nspr/prcpucfg.h?view=markup) 20 | that just uses the preprocessor to choose between several pre-generated versions. 21 | 22 | I had to patch the Google-provided prcpucfg.h so that the #included paths pointed to the right directory. 23 | 24 | ## Licensing 25 | 26 | Chromium (and thus presumably their contribution) is licensed under the BSD license. 27 | 28 | NSPR is licensed under the MPL/GPL/LGPL tri-license. 29 | 30 | Thus, this emulation library is licensed under the LGPL (I think). -------------------------------------------------------------------------------- /charsetdetect.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "charsetdetect.h" 3 | #include "charsetdetectPriv.h" 4 | 5 | 6 | // 7 | // C++ API to the character set detector (not exported) 8 | // 9 | 10 | void Detector::Report(const char* aCharset) { 11 | // mDone has to be set true here because the original code 12 | // does not always set it, and I am trying to avoid modifying 13 | // the original code. 14 | mDone = PR_TRUE; 15 | 16 | mDetectedCharset = aCharset; 17 | } 18 | 19 | int Detector::Consider(const char *data, int length) { 20 | if (HandleData(data, length) == NS_ERROR_OUT_OF_MEMORY) { 21 | // Error, signal with a negative number 22 | return -1; 23 | } 24 | 25 | if (mDone) { 26 | // Detected early 27 | return 0; 28 | } 29 | 30 | // Need more data! 31 | return 1; 32 | } 33 | 34 | const char *Detector::Close(void) { 35 | DataEnd(); 36 | 37 | if (!mDone) { 38 | if (mInputState == eEscAscii) { 39 | return "ibm850"; 40 | } 41 | else if (mInputState == ePureAscii) { 42 | return "ASCII"; 43 | } 44 | 45 | return NULL; 46 | } 47 | 48 | return mDetectedCharset; 49 | } 50 | 51 | 52 | // 53 | // C API to the character set detector (we actually export this) 54 | // 55 | 56 | csd_t csd_open(void) { 57 | // TODO: capture exceptions thrown by "new" and return -1 in that case 58 | // TODO: provide C-land with access to the language filter constructor argument 59 | return new Detector(NS_FILTER_ALL); 60 | } 61 | 62 | int csd_consider(csd_t csd, const char *data, int length) { 63 | return ((Detector*)csd)->Consider(data, length); 64 | } 65 | 66 | const char *csd_close(csd_t csd) { 67 | const char *result = ((Detector*)csd)->Close(); 68 | delete ((Detector*)csd); 69 | return result; 70 | } 71 | -------------------------------------------------------------------------------- /nspr-emu/prcpucfg.h: -------------------------------------------------------------------------------- 1 | // Copyright 2008, Google Inc. 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are 6 | // met: 7 | // 8 | // * Redistributions of source code must retain the above copyright 9 | // notice, this list of conditions and the following disclaimer. 10 | // * Redistributions in binary form must reproduce the above 11 | // copyright notice, this list of conditions and the following disclaimer 12 | // in the documentation and/or other materials provided with the 13 | // distribution. 14 | // * Neither the name of Google Inc. nor the names of its 15 | // contributors may be used to endorse or promote products derived from 16 | // this software without specific prior written permission. 17 | // 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | #ifndef BASE_THIRD_PARTY_NSPR_PRCPUCFG_H__ 31 | #define BASE_THIRD_PARTY_NSPR_PRCPUCFG_H__ 32 | 33 | #if defined(WIN32) 34 | #include "prcpucfg_win.h" 35 | #elif defined(__APPLE__) 36 | #include "prcpucfg_mac.h" 37 | #elif defined(__linux__) 38 | #include "prcpucfg_linux.h" 39 | #elif defined(__FreeBSD__) 40 | #include "prcpucfg_freebsd.h" 41 | #elif defined(__OpenBSD__) 42 | #include "prcpucfg_openbsd.h" 43 | #elif defined(__sun) 44 | #include "prcpucfg_solaris.h" 45 | #else 46 | #error Provide a prcpucfg.h appropriate for your platform 47 | #endif 48 | 49 | #endif // BASE_THIRD_PARTY_NSPR_PRCPUCFG_H__ 50 | -------------------------------------------------------------------------------- /cmake-toolchains/iphone.toolchain: -------------------------------------------------------------------------------- 1 | # Michael Aaron Safyan (michaelsafyan@gmail.com). Copyright (C) 2009-2010. Simplified BSD License. 2 | 3 | # Grr.. variables set by -D seem to be invisible when processing the toolchain file 4 | SET (SDKVER $ENV{SDKVER}) 5 | 6 | SET (CMAKE_SYSTEM_NAME Generic) 7 | SET (CMAKE_SYSTEM_VERSION 1) 8 | SET (CMAKE_SYSTEM_PROCESSOR arm) 9 | SET_PROPERTY(GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS FALSE) 10 | 11 | SET (DEVROOT "/Developer/Platforms/iPhoneOS.platform/Developer") 12 | SET (SDKROOT "${DEVROOT}/SDKs/iPhoneOS${SDKVER}.sdk") 13 | SET (CMAKE_OSX_SYSROOT "${SDKROOT}") 14 | SET (CMAKE_OSX_ARCHITECTURES "armv6" "armv7") 15 | 16 | SET (CMAKE_C_COMPILER "${DEVROOT}/usr/bin/gcc-4.2") 17 | SET (CMAKE_CXX_COMPILER "${DEVROOT}/usr/bin/g++-4.2") 18 | 19 | # NB: these flags are passed to both the linker and the compiler 20 | 21 | SET (LINKER_COMPILER_COMMON_FLAGS "-arch armv6 -arch armv7") 22 | 23 | SET (CMAKE_C_FLAGS "${LINKER_COMPILER_COMMON_FLAGS} -std=c99") 24 | SET (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS} -DDEBUG=1 -ggdb") 25 | SET (CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS} -DNDEBUG=1") 26 | SET (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS} -DNDEBUG=1 -ggdb") 27 | 28 | SET (CMAKE_CXX_FLAGS ${LINKER_COMPILER_COMMON_FLAGS}) 29 | SET (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} -DDEBUG=1 -ggdb") 30 | SET (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -DNDEBUG=1") 31 | SET (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS} -DNDEBUG=1 -ggdb") 32 | 33 | # NB: these flags are passed only to the compiler 34 | 35 | # ADD_DEFINITIONS("-x objective-c") # FIXME: should be c++ for CXX.. but can't add to CXX/C_FLAGS because they get passed to linker >_> 36 | ADD_DEFINITIONS("-pipe") 37 | ADD_DEFINITIONS("-no-cpp-precomp") 38 | ADD_DEFINITIONS("--sysroot=${SDKROOT}") 39 | ADD_DEFINITIONS("-miphoneos-version-min=${SDKVER}") 40 | 41 | INCLUDE_DIRECTORIES(SYSTEM "${SDKROOT}/usr/include") 42 | INCLUDE_DIRECTORIES(SYSTEM "${SDKROOT}/opt/iphone-${SDKVER}/include") 43 | INCLUDE_DIRECTORIES(SYSTEM "${SDKROOT}/usr/local/iphone-${SDKVER}/include") 44 | 45 | LINK_DIRECTORIES("${SDKROOT}/usr/lib") 46 | LINK_DIRECTORIES("${SDKROOT}/opt/iphone-${SDKVER}/lib") 47 | LINK_DIRECTORIES("${SDKROOT}/usr/local/iphone-${SDKVER}/lib") 48 | 49 | SET (CMAKE_FIND_ROOT_PATH "${SDKROOT}" "${SDKROOT}/opt/iphone-${SDKVER}/" "${SDKROOT}/usr/local/iphone-${SDKVER}/") 50 | SET (CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH) 51 | SET (CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) 52 | SET (CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) 53 | 54 | SET (iPhone 1) 55 | SET (iPhoneOS 1) 56 | SET (iPhoneOS_VERSION ${SDKVER}) 57 | -------------------------------------------------------------------------------- /cmake-toolchains/iphone-simulator.toolchain: -------------------------------------------------------------------------------- 1 | # Michael Aaron Safyan (michaelsafyan@gmail.com). Copyright (C) 2009-2010. Simplified BSD License. 2 | 3 | # Grr.. variables set by -D seem to be invisible when processing the toolchain file 4 | SET (SDKVER $ENV{SDKVER}) 5 | 6 | SET (CMAKE_SYSTEM_NAME Generic) 7 | SET (CMAKE_SYSTEM_VERSION 1) 8 | SET (CMAKE_SYSTEM_PROCESSOR i686) 9 | SET_PROPERTY(GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS FALSE) 10 | 11 | SET (DEVROOT "/Developer/Platforms/iPhoneSimulator.platform/Developer") 12 | SET (SDKROOT "${DEVROOT}/SDKs/iPhoneSimulator${SDKVER}.sdk") 13 | SET (CMAKE_OSX_SYSROOT "${SDKROOT}") 14 | SET (CMAKE_OSX_ARCHITECTURES "i386") 15 | 16 | SET (CMAKE_C_COMPILER "${DEVROOT}/usr/bin/gcc-4.2") 17 | SET (CMAKE_CXX_COMPILER "${DEVROOT}/usr/bin/g++-4.2") 18 | 19 | # NB: these flags are passed to both the linker and the compiler 20 | 21 | SET (LINKER_COMPILER_COMMON_FLAGS "-arch i386") 22 | 23 | SET (CMAKE_C_FLAGS "${LINKER_COMPILER_COMMON_FLAGS} -std=c99") 24 | SET (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS} -DDEBUG=1 -ggdb") 25 | SET (CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS} -DNDEBUG=1") 26 | SET (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS} -DNDEBUG=1 -ggdb") 27 | 28 | SET (CMAKE_CXX_FLAGS ${LINKER_COMPILER_COMMON_FLAGS}) 29 | SET (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} -DDEBUG=1 -ggdb") 30 | SET (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -DNDEBUG=1") 31 | SET (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS} -DNDEBUG=1 -ggdb") 32 | 33 | # NB: these flags are passed only to the compiler 34 | 35 | # ADD_DEFINITIONS("-x objective-c") # FIXME: should be c++ for CXX.. but can't add to CXX/C_FLAGS because they get passed to linker >_> 36 | ADD_DEFINITIONS("-pipe") 37 | ADD_DEFINITIONS("-no-cpp-precomp") 38 | ADD_DEFINITIONS("--sysroot=${SDKROOT}") 39 | ADD_DEFINITIONS("-miphoneos-version-min=${SDKVER}") 40 | 41 | INCLUDE_DIRECTORIES(SYSTEM "${SDKROOT}/usr/include") 42 | INCLUDE_DIRECTORIES(SYSTEM "${SDKROOT}/opt/iphone-simulator-${SDKVER}/include") 43 | INCLUDE_DIRECTORIES(SYSTEM "${SDKROOT}/usr/local/iphone-simulator-${SDKVER}/include") 44 | 45 | LINK_DIRECTORIES("${SDKROOT}/usr/lib") 46 | LINK_DIRECTORIES("${SDKROOT}/opt/iphone-simulator-${SDKVER}/lib") 47 | LINK_DIRECTORIES("${SDKROOT}/usr/local/iphone-simulator-${SDKVER}/lib") 48 | 49 | SET (CMAKE_FIND_ROOT_PATH "${SDKROOT}" "${SDKROOT}/opt/iphone-simulator-${SDKVER}/" "${SDKROOT}/usr/local/iphone-simulator-${SDKVER}/") 50 | SET (CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH) 51 | SET (CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) 52 | SET (CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) 53 | 54 | SET (iPhone 1) 55 | SET (iPhoneSimulator 1) 56 | SET (iPhoneSimulator_VERSION ${SDKVER}) 57 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsUTF8Prober.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is mozilla.org code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 1998 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | 38 | #ifndef nsUTF8Prober_h__ 39 | #define nsUTF8Prober_h__ 40 | 41 | #include "nsCharSetProber.h" 42 | #include "nsCodingStateMachine.h" 43 | 44 | class nsUTF8Prober: public nsCharSetProber { 45 | public: 46 | nsUTF8Prober(){mNumOfMBChar = 0; 47 | mCodingSM = new nsCodingStateMachine(&UTF8SMModel); 48 | Reset(); } 49 | virtual ~nsUTF8Prober(){delete mCodingSM;} 50 | nsProbingState HandleData(const char* aBuf, PRUint32 aLen); 51 | const char* GetCharSetName() {return "UTF-8";} 52 | nsProbingState GetState(void) {return mState;} 53 | void Reset(void); 54 | float GetConfidence(void); 55 | void SetOpion() {} 56 | 57 | protected: 58 | nsCodingStateMachine* mCodingSM; 59 | nsProbingState mState; 60 | PRUint32 mNumOfMBChar; 61 | }; 62 | 63 | #endif /* nsUTF8Prober_h__ */ 64 | 65 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsLatin1Prober.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is Mozilla Universal charset detector code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 2001 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * Shy Shalom 24 | * 25 | * Alternatively, the contents of this file may be used under the terms of 26 | * either the GNU General Public License Version 2 or later (the "GPL"), or 27 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 28 | * in which case the provisions of the GPL or the LGPL are applicable instead 29 | * of those above. If you wish to allow use of your version of this file only 30 | * under the terms of either the GPL or the LGPL, and not to allow others to 31 | * use your version of this file under the terms of the MPL, indicate your 32 | * decision by deleting the provisions above and replace them with the notice 33 | * and other provisions required by the GPL or the LGPL. If you do not delete 34 | * the provisions above, a recipient may use your version of this file under 35 | * the terms of any one of the MPL, the GPL or the LGPL. 36 | * 37 | * ***** END LICENSE BLOCK ***** */ 38 | 39 | #ifndef nsLatin1Prober_h__ 40 | #define nsLatin1Prober_h__ 41 | 42 | #include "nsCharSetProber.h" 43 | 44 | #define FREQ_CAT_NUM 4 45 | 46 | class nsLatin1Prober: public nsCharSetProber { 47 | public: 48 | nsLatin1Prober(void){Reset();} 49 | virtual ~nsLatin1Prober(void){} 50 | nsProbingState HandleData(const char* aBuf, PRUint32 aLen); 51 | const char* GetCharSetName() {return "windows-1252";} 52 | nsProbingState GetState(void) {return mState;} 53 | void Reset(void); 54 | float GetConfidence(void); 55 | void SetOpion() {} 56 | 57 | #ifdef DEBUG_chardet 58 | virtual void DumpStatus(); 59 | #endif 60 | 61 | protected: 62 | 63 | nsProbingState mState; 64 | char mLastCharClass; 65 | PRUint32 mFreqCounter[FREQ_CAT_NUM]; 66 | }; 67 | 68 | 69 | #endif /* nsLatin1Prober_h__ */ 70 | 71 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsSBCSGroupProber.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is Mozilla Universal charset detector code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 2001 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * Shy Shalom 24 | * 25 | * Alternatively, the contents of this file may be used under the terms of 26 | * either the GNU General Public License Version 2 or later (the "GPL"), or 27 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 28 | * in which case the provisions of the GPL or the LGPL are applicable instead 29 | * of those above. If you wish to allow use of your version of this file only 30 | * under the terms of either the GPL or the LGPL, and not to allow others to 31 | * use your version of this file under the terms of the MPL, indicate your 32 | * decision by deleting the provisions above and replace them with the notice 33 | * and other provisions required by the GPL or the LGPL. If you do not delete 34 | * the provisions above, a recipient may use your version of this file under 35 | * the terms of any one of the MPL, the GPL or the LGPL. 36 | * 37 | * ***** END LICENSE BLOCK ***** */ 38 | 39 | #ifndef nsSBCSGroupProber_h__ 40 | #define nsSBCSGroupProber_h__ 41 | 42 | 43 | #define NUM_OF_SBCS_PROBERS 13 44 | 45 | class nsCharSetProber; 46 | class nsSBCSGroupProber: public nsCharSetProber { 47 | public: 48 | nsSBCSGroupProber(); 49 | virtual ~nsSBCSGroupProber(); 50 | nsProbingState HandleData(const char* aBuf, PRUint32 aLen); 51 | const char* GetCharSetName(); 52 | nsProbingState GetState(void) {return mState;} 53 | void Reset(void); 54 | float GetConfidence(void); 55 | void SetOpion() {} 56 | 57 | #ifdef DEBUG_chardet 58 | void DumpStatus(); 59 | #endif 60 | 61 | protected: 62 | nsProbingState mState; 63 | nsCharSetProber* mProbers[NUM_OF_SBCS_PROBERS]; 64 | PRBool mIsActive[NUM_OF_SBCS_PROBERS]; 65 | PRInt32 mBestGuess; 66 | PRUint32 mActiveNum; 67 | }; 68 | 69 | #endif /* nsSBCSGroupProber_h__ */ 70 | 71 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsEscCharsetProber.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is mozilla.org code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 1998 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | 38 | #ifndef nsEscCharSetProber_h__ 39 | #define nsEscCharSetProber_h__ 40 | 41 | #include "nsCharSetProber.h" 42 | #include "nsCodingStateMachine.h" 43 | 44 | #define NUM_OF_ESC_CHARSETS 4 45 | 46 | class nsEscCharSetProber: public nsCharSetProber { 47 | public: 48 | nsEscCharSetProber(PRUint32 aLanguageFilter); 49 | virtual ~nsEscCharSetProber(void); 50 | nsProbingState HandleData(const char* aBuf, PRUint32 aLen); 51 | const char* GetCharSetName() {return mDetectedCharset;} 52 | nsProbingState GetState(void) {return mState;} 53 | void Reset(void); 54 | float GetConfidence(void){return (float)0.99;} 55 | void SetOpion() {} 56 | 57 | protected: 58 | void GetDistribution(PRUint32 aCharLen, const char* aStr); 59 | 60 | nsCodingStateMachine* mCodingSM[NUM_OF_ESC_CHARSETS] ; 61 | PRUint32 mActiveSM; 62 | nsProbingState mState; 63 | const char * mDetectedCharset; 64 | }; 65 | 66 | #endif /* nsEscCharSetProber_h__ */ 67 | 68 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsBig5Prober.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is mozilla.org code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 1998 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | 38 | #ifndef nsBig5Prober_h__ 39 | #define nsBig5Prober_h__ 40 | 41 | #include "nsCharSetProber.h" 42 | #include "nsCodingStateMachine.h" 43 | #include "CharDistribution.h" 44 | 45 | class nsBig5Prober: public nsCharSetProber { 46 | public: 47 | nsBig5Prober(void){mCodingSM = new nsCodingStateMachine(&Big5SMModel); 48 | Reset();} 49 | virtual ~nsBig5Prober(void){delete mCodingSM;} 50 | nsProbingState HandleData(const char* aBuf, PRUint32 aLen); 51 | const char* GetCharSetName() {return "Big5";} 52 | nsProbingState GetState(void) {return mState;} 53 | void Reset(void); 54 | float GetConfidence(void); 55 | void SetOpion() {} 56 | 57 | protected: 58 | void GetDistribution(PRUint32 aCharLen, const char* aStr); 59 | 60 | nsCodingStateMachine* mCodingSM; 61 | nsProbingState mState; 62 | 63 | //Big5ContextAnalysis mContextAnalyser; 64 | Big5DistributionAnalysis mDistributionAnalyser; 65 | char mLastChar[2]; 66 | 67 | }; 68 | 69 | 70 | #endif /* nsBig5Prober_h__ */ 71 | 72 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsEUCKRProber.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is mozilla.org code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 1998 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | 38 | #ifndef nsEUCKRProber_h__ 39 | #define nsEUCKRProber_h__ 40 | 41 | #include "nsCharSetProber.h" 42 | #include "nsCodingStateMachine.h" 43 | #include "CharDistribution.h" 44 | 45 | class nsEUCKRProber: public nsCharSetProber { 46 | public: 47 | nsEUCKRProber(void){mCodingSM = new nsCodingStateMachine(&EUCKRSMModel); 48 | Reset();} 49 | virtual ~nsEUCKRProber(void){delete mCodingSM;} 50 | nsProbingState HandleData(const char* aBuf, PRUint32 aLen); 51 | const char* GetCharSetName() {return "EUC-KR";} 52 | nsProbingState GetState(void) {return mState;} 53 | void Reset(void); 54 | float GetConfidence(void); 55 | void SetOpion() {} 56 | 57 | protected: 58 | void GetDistribution(PRUint32 aCharLen, const char* aStr); 59 | 60 | nsCodingStateMachine* mCodingSM; 61 | nsProbingState mState; 62 | 63 | //EUCKRContextAnalysis mContextAnalyser; 64 | EUCKRDistributionAnalysis mDistributionAnalyser; 65 | char mLastChar[2]; 66 | 67 | }; 68 | 69 | 70 | #endif /* nsEUCKRProber_h__ */ 71 | 72 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsEUCTWProber.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is mozilla.org code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 1998 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | 38 | #ifndef nsEUCTWProber_h__ 39 | #define nsEUCTWProber_h__ 40 | 41 | #include "nsCharSetProber.h" 42 | #include "nsCodingStateMachine.h" 43 | #include "CharDistribution.h" 44 | 45 | class nsEUCTWProber: public nsCharSetProber { 46 | public: 47 | nsEUCTWProber(void){mCodingSM = new nsCodingStateMachine(&EUCTWSMModel); 48 | Reset();} 49 | virtual ~nsEUCTWProber(void){delete mCodingSM;} 50 | nsProbingState HandleData(const char* aBuf, PRUint32 aLen); 51 | const char* GetCharSetName() {return "x-euc-tw";} 52 | nsProbingState GetState(void) {return mState;} 53 | void Reset(void); 54 | float GetConfidence(void); 55 | void SetOpion() {} 56 | 57 | protected: 58 | void GetDistribution(PRUint32 aCharLen, const char* aStr); 59 | 60 | nsCodingStateMachine* mCodingSM; 61 | nsProbingState mState; 62 | 63 | //EUCTWContextAnalysis mContextAnalyser; 64 | EUCTWDistributionAnalysis mDistributionAnalyser; 65 | char mLastChar[2]; 66 | 67 | }; 68 | 69 | 70 | #endif /* nsEUCTWProber_h__ */ 71 | 72 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsGB2312Prober.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is mozilla.org code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 1998 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | 38 | #ifndef nsGB2312Prober_h__ 39 | #define nsGB2312Prober_h__ 40 | 41 | #include "nsCharSetProber.h" 42 | #include "nsCodingStateMachine.h" 43 | #include "CharDistribution.h" 44 | 45 | // We use gb18030 to replace gb2312, because 18030 is a superset. 46 | 47 | class nsGB18030Prober: public nsCharSetProber { 48 | public: 49 | nsGB18030Prober(void){mCodingSM = new nsCodingStateMachine(&GB18030SMModel); 50 | Reset();} 51 | virtual ~nsGB18030Prober(void){delete mCodingSM;} 52 | nsProbingState HandleData(const char* aBuf, PRUint32 aLen); 53 | const char* GetCharSetName() {return "gb18030";} 54 | nsProbingState GetState(void) {return mState;} 55 | void Reset(void); 56 | float GetConfidence(void); 57 | void SetOpion() {} 58 | 59 | protected: 60 | void GetDistribution(PRUint32 aCharLen, const char* aStr); 61 | 62 | nsCodingStateMachine* mCodingSM; 63 | nsProbingState mState; 64 | 65 | //GB2312ContextAnalysis mContextAnalyser; 66 | GB2312DistributionAnalysis mDistributionAnalyser; 67 | char mLastChar[2]; 68 | 69 | }; 70 | 71 | 72 | #endif /* nsGB2312Prober_h__ */ 73 | 74 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsUTF8Prober.cpp: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is mozilla.org code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 1998 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | 38 | #include "nsUTF8Prober.h" 39 | 40 | void nsUTF8Prober::Reset(void) 41 | { 42 | mCodingSM->Reset(); 43 | mNumOfMBChar = 0; 44 | mState = eDetecting; 45 | } 46 | 47 | nsProbingState nsUTF8Prober::HandleData(const char* aBuf, PRUint32 aLen) 48 | { 49 | nsSMState codingState; 50 | 51 | for (PRUint32 i = 0; i < aLen; i++) 52 | { 53 | codingState = mCodingSM->NextState(aBuf[i]); 54 | if (codingState == eItsMe) 55 | { 56 | mState = eFoundIt; 57 | break; 58 | } 59 | if (codingState == eStart) 60 | { 61 | if (mCodingSM->GetCurrentCharLen() >= 2) 62 | mNumOfMBChar++; 63 | } 64 | } 65 | 66 | if (mState == eDetecting) 67 | if (GetConfidence() > SHORTCUT_THRESHOLD) 68 | mState = eFoundIt; 69 | return mState; 70 | } 71 | 72 | #define ONE_CHAR_PROB (float)0.50 73 | 74 | float nsUTF8Prober::GetConfidence(void) 75 | { 76 | float unlike = (float)0.99; 77 | 78 | if (mNumOfMBChar < 6) 79 | { 80 | for (PRUint32 i = 0; i < mNumOfMBChar; i++) 81 | unlike *= ONE_CHAR_PROB; 82 | return (float)1.0 - unlike; 83 | } 84 | else 85 | return (float)0.99; 86 | } 87 | 88 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsEUCJPProber.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is mozilla.org code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 1998 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | 38 | // for S-JIS encoding, obeserve characteristic: 39 | // 1, kana character (or hankaku?) often have hight frequency of appereance 40 | // 2, kana character often exist in group 41 | // 3, certain combination of kana is never used in japanese language 42 | 43 | #ifndef nsEUCJPProber_h__ 44 | #define nsEUCJPProber_h__ 45 | 46 | #include "nsCharSetProber.h" 47 | #include "nsCodingStateMachine.h" 48 | #include "JpCntx.h" 49 | #include "CharDistribution.h" 50 | 51 | class nsEUCJPProber: public nsCharSetProber { 52 | public: 53 | nsEUCJPProber(void){mCodingSM = new nsCodingStateMachine(&EUCJPSMModel); 54 | Reset();} 55 | virtual ~nsEUCJPProber(void){delete mCodingSM;} 56 | nsProbingState HandleData(const char* aBuf, PRUint32 aLen); 57 | const char* GetCharSetName() {return "EUC-JP";} 58 | nsProbingState GetState(void) {return mState;} 59 | void Reset(void); 60 | float GetConfidence(void); 61 | void SetOpion() {} 62 | 63 | protected: 64 | nsCodingStateMachine* mCodingSM; 65 | nsProbingState mState; 66 | 67 | EUCJPContextAnalysis mContextAnalyser; 68 | EUCJPDistributionAnalysis mDistributionAnalyser; 69 | 70 | char mLastChar[2]; 71 | }; 72 | 73 | 74 | #endif /* nsEUCJPProber_h__ */ 75 | 76 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsSJISProber.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is mozilla.org code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 1998 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | 38 | // for S-JIS encoding, obeserve characteristic: 39 | // 1, kana character (or hankaku?) often have hight frequency of appereance 40 | // 2, kana character often exist in group 41 | // 3, certain combination of kana is never used in japanese language 42 | 43 | #ifndef nsSJISProber_h__ 44 | #define nsSJISProber_h__ 45 | 46 | #include "nsCharSetProber.h" 47 | #include "nsCodingStateMachine.h" 48 | #include "JpCntx.h" 49 | #include "CharDistribution.h" 50 | 51 | 52 | class nsSJISProber: public nsCharSetProber { 53 | public: 54 | nsSJISProber(void){mCodingSM = new nsCodingStateMachine(&SJISSMModel); 55 | Reset();} 56 | virtual ~nsSJISProber(void){delete mCodingSM;} 57 | nsProbingState HandleData(const char* aBuf, PRUint32 aLen); 58 | const char* GetCharSetName() {return "Shift_JIS";} 59 | nsProbingState GetState(void) {return mState;} 60 | void Reset(void); 61 | float GetConfidence(void); 62 | void SetOpion() {} 63 | 64 | protected: 65 | nsCodingStateMachine* mCodingSM; 66 | nsProbingState mState; 67 | 68 | SJISContextAnalysis mContextAnalyser; 69 | SJISDistributionAnalysis mDistributionAnalyser; 70 | 71 | char mLastChar[2]; 72 | 73 | }; 74 | 75 | 76 | #endif /* nsSJISProber_h__ */ 77 | 78 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/Makefile.in: -------------------------------------------------------------------------------- 1 | # 2 | # ***** BEGIN LICENSE BLOCK ***** 3 | # Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | # 5 | # The contents of this file are subject to the Mozilla Public License Version 6 | # 1.1 (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # http://www.mozilla.org/MPL/ 9 | # 10 | # Software distributed under the License is distributed on an "AS IS" basis, 11 | # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | # for the specific language governing rights and limitations under the 13 | # License. 14 | # 15 | # The Original Code is Mozilla Universal charset detector code. 16 | # 17 | # The Initial Developer of the Original Code is 18 | # Netscape Communications Corporation. 19 | # Portions created by the Initial Developer are Copyright (C) 1998 20 | # the Initial Developer. All Rights Reserved. 21 | # 22 | # Contributor(s): 23 | # Simon Montagu 24 | # Shy Shalom 25 | # Proofpoint, Inc. 26 | # 27 | # Alternatively, the contents of this file may be used under the terms of 28 | # either the GNU General Public License Version 2 or later (the "GPL"), or 29 | # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 30 | # in which case the provisions of the GPL or the LGPL are applicable instead 31 | # of those above. If you wish to allow use of your version of this file only 32 | # under the terms of either the GPL or the LGPL, and not to allow others to 33 | # use your version of this file under the terms of the MPL, indicate your 34 | # decision by deleting the provisions above and replace them with the notice 35 | # and other provisions required by the GPL or the LGPL. If you do not delete 36 | # the provisions above, a recipient may use your version of this file under 37 | # the terms of any one of the MPL, the GPL or the LGPL. 38 | # 39 | # ***** END LICENSE BLOCK ***** 40 | 41 | DEPTH = ../../../.. 42 | topsrcdir = @top_srcdir@ 43 | srcdir = @srcdir@ 44 | VPATH = @srcdir@ 45 | 46 | include $(DEPTH)/config/autoconf.mk 47 | 48 | MODULE = universalchardet_s 49 | LIBRARY_NAME = universalchardet_s 50 | 51 | CPPSRCS = \ 52 | CharDistribution.cpp \ 53 | JpCntx.cpp \ 54 | LangBulgarianModel.cpp \ 55 | LangCyrillicModel.cpp \ 56 | LangGreekModel.cpp \ 57 | LangHungarianModel.cpp \ 58 | LangHebrewModel.cpp \ 59 | nsHebrewProber.cpp \ 60 | nsCharSetProber.cpp \ 61 | nsBig5Prober.cpp \ 62 | nsEUCJPProber.cpp \ 63 | nsEUCKRProber.cpp \ 64 | nsEUCTWProber.cpp \ 65 | nsEscCharsetProber.cpp \ 66 | nsEscSM.cpp \ 67 | nsGB2312Prober.cpp \ 68 | nsMBCSGroupProber.cpp \ 69 | nsMBCSSM.cpp \ 70 | nsSBCSGroupProber.cpp \ 71 | nsSBCharSetProber.cpp \ 72 | nsSJISProber.cpp \ 73 | nsUTF8Prober.cpp \ 74 | nsLatin1Prober.cpp \ 75 | nsUniversalDetector.cpp \ 76 | $(NULL) 77 | 78 | # Not built on win32. Not sure why not 79 | ifneq ($(OS_ARCH),WINNT) 80 | CPPSRCS += LangThaiModel.cpp 81 | endif 82 | 83 | FORCE_STATIC_LIB = 1 84 | # This library is used by other shared libs in a static build 85 | FORCE_USE_PIC = 1 86 | 87 | include $(topsrcdir)/config/rules.mk 88 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsBig5Prober.cpp: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is mozilla.org code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 1998 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | 38 | #include "nsBig5Prober.h" 39 | 40 | void nsBig5Prober::Reset(void) 41 | { 42 | mCodingSM->Reset(); 43 | mState = eDetecting; 44 | mDistributionAnalyser.Reset(); 45 | } 46 | 47 | nsProbingState nsBig5Prober::HandleData(const char* aBuf, PRUint32 aLen) 48 | { 49 | nsSMState codingState; 50 | 51 | for (PRUint32 i = 0; i < aLen; i++) 52 | { 53 | codingState = mCodingSM->NextState(aBuf[i]); 54 | if (codingState == eItsMe) 55 | { 56 | mState = eFoundIt; 57 | break; 58 | } 59 | if (codingState == eStart) 60 | { 61 | PRUint32 charLen = mCodingSM->GetCurrentCharLen(); 62 | 63 | if (i == 0) 64 | { 65 | mLastChar[1] = aBuf[0]; 66 | mDistributionAnalyser.HandleOneChar(mLastChar, charLen); 67 | } 68 | else 69 | mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen); 70 | } 71 | } 72 | 73 | mLastChar[0] = aBuf[aLen-1]; 74 | 75 | if (mState == eDetecting) 76 | if (mDistributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) 77 | mState = eFoundIt; 78 | 79 | return mState; 80 | } 81 | 82 | float nsBig5Prober::GetConfidence(void) 83 | { 84 | float distribCf = mDistributionAnalyser.GetConfidence(); 85 | 86 | return (float)distribCf; 87 | } 88 | 89 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsMBCSGroupProber.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is mozilla.org code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 1998 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * Proofpoint, Inc. 24 | * 25 | * Alternatively, the contents of this file may be used under the terms of 26 | * either the GNU General Public License Version 2 or later (the "GPL"), or 27 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 28 | * in which case the provisions of the GPL or the LGPL are applicable instead 29 | * of those above. If you wish to allow use of your version of this file only 30 | * under the terms of either the GPL or the LGPL, and not to allow others to 31 | * use your version of this file under the terms of the MPL, indicate your 32 | * decision by deleting the provisions above and replace them with the notice 33 | * and other provisions required by the GPL or the LGPL. If you do not delete 34 | * the provisions above, a recipient may use your version of this file under 35 | * the terms of any one of the MPL, the GPL or the LGPL. 36 | * 37 | * ***** END LICENSE BLOCK ***** */ 38 | 39 | #ifndef nsMBCSGroupProber_h__ 40 | #define nsMBCSGroupProber_h__ 41 | 42 | #include "nsSJISProber.h" 43 | #include "nsUTF8Prober.h" 44 | #include "nsEUCJPProber.h" 45 | #include "nsGB2312Prober.h" 46 | #include "nsEUCKRProber.h" 47 | #include "nsBig5Prober.h" 48 | #include "nsEUCTWProber.h" 49 | 50 | #define NUM_OF_PROBERS 7 51 | 52 | class nsMBCSGroupProber: public nsCharSetProber { 53 | public: 54 | nsMBCSGroupProber(PRUint32 aLanguageFilter); 55 | virtual ~nsMBCSGroupProber(); 56 | nsProbingState HandleData(const char* aBuf, PRUint32 aLen); 57 | const char* GetCharSetName(); 58 | nsProbingState GetState(void) {return mState;} 59 | void Reset(void); 60 | float GetConfidence(void); 61 | void SetOpion() {} 62 | 63 | #ifdef DEBUG_chardet 64 | void DumpStatus(); 65 | #endif 66 | #ifdef DEBUG_jgmyers 67 | void GetDetectorState(nsUniversalDetector::DetectorState (&states)[nsUniversalDetector::NumDetectors], PRUint32 &offset); 68 | #endif 69 | 70 | protected: 71 | nsProbingState mState; 72 | nsCharSetProber* mProbers[NUM_OF_PROBERS]; 73 | PRBool mIsActive[NUM_OF_PROBERS]; 74 | PRInt32 mBestGuess; 75 | PRUint32 mActiveNum; 76 | PRUint32 mKeepNext; 77 | }; 78 | 79 | #endif /* nsMBCSGroupProber_h__ */ 80 | 81 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsEUCKRProber.cpp: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is mozilla.org code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 1998 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | 38 | #include "nsEUCKRProber.h" 39 | 40 | void nsEUCKRProber::Reset(void) 41 | { 42 | mCodingSM->Reset(); 43 | mState = eDetecting; 44 | mDistributionAnalyser.Reset(); 45 | //mContextAnalyser.Reset(); 46 | } 47 | 48 | nsProbingState nsEUCKRProber::HandleData(const char* aBuf, PRUint32 aLen) 49 | { 50 | nsSMState codingState; 51 | 52 | for (PRUint32 i = 0; i < aLen; i++) 53 | { 54 | codingState = mCodingSM->NextState(aBuf[i]); 55 | if (codingState == eItsMe) 56 | { 57 | mState = eFoundIt; 58 | break; 59 | } 60 | if (codingState == eStart) 61 | { 62 | PRUint32 charLen = mCodingSM->GetCurrentCharLen(); 63 | 64 | if (i == 0) 65 | { 66 | mLastChar[1] = aBuf[0]; 67 | mDistributionAnalyser.HandleOneChar(mLastChar, charLen); 68 | } 69 | else 70 | mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen); 71 | } 72 | } 73 | 74 | mLastChar[0] = aBuf[aLen-1]; 75 | 76 | if (mState == eDetecting) 77 | if (mDistributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) 78 | mState = eFoundIt; 79 | // else 80 | // mDistributionAnalyser.HandleData(aBuf, aLen); 81 | 82 | return mState; 83 | } 84 | 85 | float nsEUCKRProber::GetConfidence(void) 86 | { 87 | float distribCf = mDistributionAnalyser.GetConfidence(); 88 | 89 | return (float)distribCf; 90 | } 91 | 92 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsEUCTWProber.cpp: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is mozilla.org code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 1998 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | 38 | #include "nsEUCTWProber.h" 39 | 40 | void nsEUCTWProber::Reset(void) 41 | { 42 | mCodingSM->Reset(); 43 | mState = eDetecting; 44 | mDistributionAnalyser.Reset(); 45 | //mContextAnalyser.Reset(); 46 | } 47 | 48 | nsProbingState nsEUCTWProber::HandleData(const char* aBuf, PRUint32 aLen) 49 | { 50 | nsSMState codingState; 51 | 52 | for (PRUint32 i = 0; i < aLen; i++) 53 | { 54 | codingState = mCodingSM->NextState(aBuf[i]); 55 | if (codingState == eItsMe) 56 | { 57 | mState = eFoundIt; 58 | break; 59 | } 60 | if (codingState == eStart) 61 | { 62 | PRUint32 charLen = mCodingSM->GetCurrentCharLen(); 63 | 64 | if (i == 0) 65 | { 66 | mLastChar[1] = aBuf[0]; 67 | mDistributionAnalyser.HandleOneChar(mLastChar, charLen); 68 | } 69 | else 70 | mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen); 71 | } 72 | } 73 | 74 | mLastChar[0] = aBuf[aLen-1]; 75 | 76 | if (mState == eDetecting) 77 | if (mDistributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) 78 | mState = eFoundIt; 79 | // else 80 | // mDistributionAnalyser.HandleData(aBuf, aLen); 81 | 82 | return mState; 83 | } 84 | 85 | float nsEUCTWProber::GetConfidence(void) 86 | { 87 | float distribCf = mDistributionAnalyser.GetConfidence(); 88 | 89 | return (float)distribCf; 90 | } 91 | 92 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsPkgInt.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is mozilla.org code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 1998 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | 38 | #ifndef nsPkgInt_h__ 39 | #define nsPkgInt_h__ 40 | #include "nscore.h" 41 | 42 | typedef enum { 43 | eIdxSft4bits = 3, 44 | eIdxSft8bits = 2, 45 | eIdxSft16bits = 1 46 | } nsIdxSft; 47 | 48 | typedef enum { 49 | eSftMsk4bits = 7, 50 | eSftMsk8bits = 3, 51 | eSftMsk16bits = 1 52 | } nsSftMsk; 53 | 54 | typedef enum { 55 | eBitSft4bits = 2, 56 | eBitSft8bits = 3, 57 | eBitSft16bits = 4 58 | } nsBitSft; 59 | 60 | typedef enum { 61 | eUnitMsk4bits = 0x0000000FL, 62 | eUnitMsk8bits = 0x000000FFL, 63 | eUnitMsk16bits = 0x0000FFFFL 64 | } nsUnitMsk; 65 | 66 | typedef struct nsPkgInt { 67 | nsIdxSft idxsft; 68 | nsSftMsk sftmsk; 69 | nsBitSft bitsft; 70 | nsUnitMsk unitmsk; 71 | PRUint32 *data; 72 | } nsPkgInt; 73 | 74 | 75 | #define PCK16BITS(a,b) ((PRUint32)(((b) << 16) | (a))) 76 | 77 | #define PCK8BITS(a,b,c,d) PCK16BITS( ((PRUint32)(((b) << 8) | (a))), \ 78 | ((PRUint32)(((d) << 8) | (c)))) 79 | 80 | #define PCK4BITS(a,b,c,d,e,f,g,h) PCK8BITS( ((PRUint32)(((b) << 4) | (a))), \ 81 | ((PRUint32)(((d) << 4) | (c))), \ 82 | ((PRUint32)(((f) << 4) | (e))), \ 83 | ((PRUint32)(((h) << 4) | (g))) ) 84 | 85 | #define GETFROMPCK(i, c) \ 86 | (((((c).data)[(i)>>(c).idxsft])>>(((i)&(c).sftmsk)<<(c).bitsft))&(c).unitmsk) 87 | 88 | #endif /* nsPkgInt_h__ */ 89 | 90 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsCharSetProber.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is Mozilla Universal charset detector code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 2001 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * Shy Shalom 24 | * 25 | * Alternatively, the contents of this file may be used under the terms of 26 | * either the GNU General Public License Version 2 or later (the "GPL"), or 27 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 28 | * in which case the provisions of the GPL or the LGPL are applicable instead 29 | * of those above. If you wish to allow use of your version of this file only 30 | * under the terms of either the GPL or the LGPL, and not to allow others to 31 | * use your version of this file under the terms of the MPL, indicate your 32 | * decision by deleting the provisions above and replace them with the notice 33 | * and other provisions required by the GPL or the LGPL. If you do not delete 34 | * the provisions above, a recipient may use your version of this file under 35 | * the terms of any one of the MPL, the GPL or the LGPL. 36 | * 37 | * ***** END LICENSE BLOCK ***** */ 38 | #ifndef nsCharSetProber_h__ 39 | #define nsCharSetProber_h__ 40 | 41 | #include "nscore.h" 42 | 43 | //#define DEBUG_chardet // Uncomment this for debug dump. 44 | 45 | typedef enum { 46 | eDetecting = 0, //We are still detecting, no sure answer yet, but caller can ask for confidence. 47 | eFoundIt = 1, //That's a positive answer 48 | eNotMe = 2 //Negative answer 49 | } nsProbingState; 50 | 51 | #define SHORTCUT_THRESHOLD (float)0.95 52 | 53 | class nsCharSetProber { 54 | public: 55 | virtual ~nsCharSetProber() {} 56 | virtual const char* GetCharSetName() = 0; 57 | virtual nsProbingState HandleData(const char* aBuf, PRUint32 aLen) = 0; 58 | virtual nsProbingState GetState(void) = 0; 59 | virtual void Reset(void) = 0; 60 | virtual float GetConfidence(void) = 0; 61 | virtual void SetOpion() = 0; 62 | 63 | #ifdef DEBUG_chardet 64 | virtual void DumpStatus() {}; 65 | #endif 66 | 67 | // Helper functions used in the Latin1 and Group probers. 68 | // both functions Allocate a new buffer for newBuf. This buffer should be 69 | // freed by the caller using PR_FREEIF. 70 | // Both functions return PR_FALSE in case of memory allocation failure. 71 | static PRBool FilterWithoutEnglishLetters(const char* aBuf, PRUint32 aLen, char** newBuf, PRUint32& newLen); 72 | static PRBool FilterWithEnglishLetters(const char* aBuf, PRUint32 aLen, char** newBuf, PRUint32& newLen); 73 | 74 | }; 75 | 76 | #endif /* nsCharSetProber_h__ */ 77 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsGB2312Prober.cpp: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is mozilla.org code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 1998 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | 38 | // for S-JIS encoding, obeserve characteristic: 39 | // 1, kana character (or hankaku?) often have hight frequency of appereance 40 | // 2, kana character often exist in group 41 | // 3, certain combination of kana is never used in japanese language 42 | 43 | #include "nsGB2312Prober.h" 44 | 45 | void nsGB18030Prober::Reset(void) 46 | { 47 | mCodingSM->Reset(); 48 | mState = eDetecting; 49 | mDistributionAnalyser.Reset(); 50 | //mContextAnalyser.Reset(); 51 | } 52 | 53 | nsProbingState nsGB18030Prober::HandleData(const char* aBuf, PRUint32 aLen) 54 | { 55 | nsSMState codingState; 56 | 57 | for (PRUint32 i = 0; i < aLen; i++) 58 | { 59 | codingState = mCodingSM->NextState(aBuf[i]); 60 | if (codingState == eItsMe) 61 | { 62 | mState = eFoundIt; 63 | break; 64 | } 65 | if (codingState == eStart) 66 | { 67 | PRUint32 charLen = mCodingSM->GetCurrentCharLen(); 68 | 69 | if (i == 0) 70 | { 71 | mLastChar[1] = aBuf[0]; 72 | mDistributionAnalyser.HandleOneChar(mLastChar, charLen); 73 | } 74 | else 75 | mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen); 76 | } 77 | } 78 | 79 | mLastChar[0] = aBuf[aLen-1]; 80 | 81 | if (mState == eDetecting) 82 | if (mDistributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) 83 | mState = eFoundIt; 84 | // else 85 | // mDistributionAnalyser.HandleData(aBuf, aLen); 86 | 87 | return mState; 88 | } 89 | 90 | float nsGB18030Prober::GetConfidence(void) 91 | { 92 | float distribCf = mDistributionAnalyser.GetConfidence(); 93 | 94 | return (float)distribCf; 95 | } 96 | 97 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsUniversalDetector.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is Mozilla Communicator client code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 1998 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | 38 | #ifndef nsUniversalDetector_h__ 39 | #define nsUniversalDetector_h__ 40 | 41 | class nsCharSetProber; 42 | 43 | #define NUM_OF_CHARSET_PROBERS 3 44 | 45 | typedef enum { 46 | ePureAscii = 0, 47 | eEscAscii = 1, 48 | eHighbyte = 2 49 | } nsInputState; 50 | 51 | #define NS_FILTER_CHINESE_SIMPLIFIED 0x01 52 | #define NS_FILTER_CHINESE_TRADITIONAL 0x02 53 | #define NS_FILTER_JAPANESE 0x04 54 | #define NS_FILTER_KOREAN 0x08 55 | #define NS_FILTER_NON_CJK 0x10 56 | #define NS_FILTER_ALL 0x1F 57 | #define NS_FILTER_CHINESE (NS_FILTER_CHINESE_SIMPLIFIED | \ 58 | NS_FILTER_CHINESE_TRADITIONAL) 59 | #define NS_FILTER_CJK (NS_FILTER_CHINESE_SIMPLIFIED | \ 60 | NS_FILTER_CHINESE_TRADITIONAL | \ 61 | NS_FILTER_JAPANESE | \ 62 | NS_FILTER_KOREAN) 63 | 64 | class nsUniversalDetector { 65 | public: 66 | nsUniversalDetector(PRUint32 aLanguageFilter); 67 | virtual ~nsUniversalDetector(); 68 | virtual nsresult HandleData(const char* aBuf, PRUint32 aLen); 69 | virtual void DataEnd(void); 70 | 71 | protected: 72 | virtual void Report(const char* aCharset) = 0; 73 | virtual void Reset(); 74 | nsInputState mInputState; 75 | PRBool mDone; 76 | PRBool mInTag; 77 | PRBool mStart; 78 | PRBool mGotData; 79 | char mLastChar; 80 | const char * mDetectedCharset; 81 | PRInt32 mBestGuess; 82 | PRUint32 mLanguageFilter; 83 | 84 | nsCharSetProber *mCharSetProbers[NUM_OF_CHARSET_PROBERS]; 85 | nsCharSetProber *mEscCharSetProber; 86 | }; 87 | 88 | #endif 89 | 90 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsSJISProber.cpp: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is mozilla.org code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 1998 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | 38 | // for S-JIS encoding, obeserve characteristic: 39 | // 1, kana character (or hankaku?) often have hight frequency of appereance 40 | // 2, kana character often exist in group 41 | // 3, certain combination of kana is never used in japanese language 42 | 43 | #include "nsSJISProber.h" 44 | 45 | void nsSJISProber::Reset(void) 46 | { 47 | mCodingSM->Reset(); 48 | mState = eDetecting; 49 | mContextAnalyser.Reset(); 50 | mDistributionAnalyser.Reset(); 51 | } 52 | 53 | nsProbingState nsSJISProber::HandleData(const char* aBuf, PRUint32 aLen) 54 | { 55 | nsSMState codingState; 56 | 57 | for (PRUint32 i = 0; i < aLen; i++) 58 | { 59 | codingState = mCodingSM->NextState(aBuf[i]); 60 | if (codingState == eItsMe) 61 | { 62 | mState = eFoundIt; 63 | break; 64 | } 65 | if (codingState == eStart) 66 | { 67 | PRUint32 charLen = mCodingSM->GetCurrentCharLen(); 68 | if (i == 0) 69 | { 70 | mLastChar[1] = aBuf[0]; 71 | mContextAnalyser.HandleOneChar(mLastChar+2-charLen, charLen); 72 | mDistributionAnalyser.HandleOneChar(mLastChar, charLen); 73 | } 74 | else 75 | { 76 | mContextAnalyser.HandleOneChar(aBuf+i+1-charLen, charLen); 77 | mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen); 78 | } 79 | } 80 | } 81 | 82 | mLastChar[0] = aBuf[aLen-1]; 83 | 84 | if (mState == eDetecting) 85 | if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) 86 | mState = eFoundIt; 87 | 88 | return mState; 89 | } 90 | 91 | float nsSJISProber::GetConfidence(void) 92 | { 93 | float contxtCf = mContextAnalyser.GetConfidence(); 94 | float distribCf = mDistributionAnalyser.GetConfidence(); 95 | 96 | return (contxtCf > distribCf ? contxtCf : distribCf); 97 | } 98 | 99 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsEUCJPProber.cpp: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is mozilla.org code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 1998 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | 38 | // for japanese encoding, obeserve characteristic: 39 | // 1, kana character (or hankaku?) often have hight frequency of appereance 40 | // 2, kana character often exist in group 41 | // 3, certain combination of kana is never used in japanese language 42 | 43 | #include "nsEUCJPProber.h" 44 | 45 | void nsEUCJPProber::Reset(void) 46 | { 47 | mCodingSM->Reset(); 48 | mState = eDetecting; 49 | mContextAnalyser.Reset(); 50 | mDistributionAnalyser.Reset(); 51 | } 52 | 53 | nsProbingState nsEUCJPProber::HandleData(const char* aBuf, PRUint32 aLen) 54 | { 55 | nsSMState codingState; 56 | 57 | for (PRUint32 i = 0; i < aLen; i++) 58 | { 59 | codingState = mCodingSM->NextState(aBuf[i]); 60 | if (codingState == eItsMe) 61 | { 62 | mState = eFoundIt; 63 | break; 64 | } 65 | if (codingState == eStart) 66 | { 67 | PRUint32 charLen = mCodingSM->GetCurrentCharLen(); 68 | 69 | if (i == 0) 70 | { 71 | mLastChar[1] = aBuf[0]; 72 | mContextAnalyser.HandleOneChar(mLastChar, charLen); 73 | mDistributionAnalyser.HandleOneChar(mLastChar, charLen); 74 | } 75 | else 76 | { 77 | mContextAnalyser.HandleOneChar(aBuf+i-1, charLen); 78 | mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen); 79 | } 80 | } 81 | } 82 | 83 | mLastChar[0] = aBuf[aLen-1]; 84 | 85 | if (mState == eDetecting) 86 | if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) 87 | mState = eFoundIt; 88 | 89 | return mState; 90 | } 91 | 92 | float nsEUCJPProber::GetConfidence(void) 93 | { 94 | float contxtCf = mContextAnalyser.GetConfidence(); 95 | float distribCf = mDistributionAnalyser.GetConfidence(); 96 | 97 | return (contxtCf > distribCf ? contxtCf : distribCf); 98 | } 99 | 100 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsEscCharsetProber.cpp: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is mozilla.org code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 1998 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | 38 | 39 | #include "nsEscCharsetProber.h" 40 | #include "nsUniversalDetector.h" 41 | 42 | nsEscCharSetProber::nsEscCharSetProber(PRUint32 aLanguageFilter) 43 | { 44 | for (PRUint32 i = 0; i < NUM_OF_ESC_CHARSETS; i++) 45 | mCodingSM[i] = nsnull; 46 | if (aLanguageFilter & NS_FILTER_CHINESE_SIMPLIFIED) 47 | { 48 | mCodingSM[0] = new nsCodingStateMachine(&HZSMModel); 49 | mCodingSM[1] = new nsCodingStateMachine(&ISO2022CNSMModel); 50 | } 51 | if (aLanguageFilter & NS_FILTER_JAPANESE) 52 | mCodingSM[2] = new nsCodingStateMachine(&ISO2022JPSMModel); 53 | if (aLanguageFilter & NS_FILTER_KOREAN) 54 | mCodingSM[3] = new nsCodingStateMachine(&ISO2022KRSMModel); 55 | mActiveSM = NUM_OF_ESC_CHARSETS; 56 | mState = eDetecting; 57 | mDetectedCharset = nsnull; 58 | } 59 | 60 | nsEscCharSetProber::~nsEscCharSetProber(void) 61 | { 62 | for (PRUint32 i = 0; i < NUM_OF_ESC_CHARSETS; i++) 63 | delete mCodingSM[i]; 64 | } 65 | 66 | void nsEscCharSetProber::Reset(void) 67 | { 68 | mState = eDetecting; 69 | for (PRUint32 i = 0; i < NUM_OF_ESC_CHARSETS; i++) 70 | if (mCodingSM[i]) 71 | mCodingSM[i]->Reset(); 72 | mActiveSM = NUM_OF_ESC_CHARSETS; 73 | mDetectedCharset = nsnull; 74 | } 75 | 76 | nsProbingState nsEscCharSetProber::HandleData(const char* aBuf, PRUint32 aLen) 77 | { 78 | nsSMState codingState; 79 | PRInt32 j; 80 | PRUint32 i; 81 | 82 | for ( i = 0; i < aLen && mState == eDetecting; i++) 83 | { 84 | for (j = mActiveSM-1; j>= 0; j--) 85 | { 86 | if (mCodingSM[j]) 87 | { 88 | codingState = mCodingSM[j]->NextState(aBuf[i]); 89 | if (codingState == eItsMe) 90 | { 91 | mState = eFoundIt; 92 | mDetectedCharset = mCodingSM[j]->GetCodingStateMachine(); 93 | return mState; 94 | } 95 | } 96 | } 97 | } 98 | 99 | return mState; 100 | } 101 | 102 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsCodingStateMachine.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is mozilla.org code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 1998 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | #ifndef nsCodingStateMachine_h__ 38 | #define nsCodingStateMachine_h__ 39 | 40 | #include "nsPkgInt.h" 41 | 42 | typedef enum { 43 | eStart = 0, 44 | eError = 1, 45 | eItsMe = 2 46 | } nsSMState; 47 | 48 | #define GETCLASS(c) GETFROMPCK(((unsigned char)(c)), mModel->classTable) 49 | 50 | //state machine model 51 | typedef struct 52 | { 53 | nsPkgInt classTable; 54 | PRUint32 classFactor; 55 | nsPkgInt stateTable; 56 | const PRUint32* charLenTable; 57 | const char* name; 58 | } SMModel; 59 | 60 | class nsCodingStateMachine { 61 | public: 62 | nsCodingStateMachine(SMModel* sm){ 63 | mCurrentState = eStart; 64 | mModel = sm; 65 | } 66 | nsSMState NextState(char c){ 67 | //for each byte we get its class , if it is first byte, we also get byte length 68 | PRUint32 byteCls = GETCLASS(c); 69 | if (mCurrentState == eStart) 70 | { 71 | mCurrentBytePos = 0; 72 | mCurrentCharLen = mModel->charLenTable[byteCls]; 73 | } 74 | //from byte's class and stateTable, we get its next state 75 | mCurrentState=(nsSMState)GETFROMPCK(mCurrentState*(mModel->classFactor)+byteCls, 76 | mModel->stateTable); 77 | mCurrentBytePos++; 78 | return mCurrentState; 79 | } 80 | PRUint32 GetCurrentCharLen(void) {return mCurrentCharLen;} 81 | void Reset(void) {mCurrentState = eStart;} 82 | const char * GetCodingStateMachine() {return mModel->name;} 83 | 84 | protected: 85 | nsSMState mCurrentState; 86 | PRUint32 mCurrentCharLen; 87 | PRUint32 mCurrentBytePos; 88 | 89 | SMModel *mModel; 90 | }; 91 | 92 | extern SMModel UTF8SMModel; 93 | extern SMModel Big5SMModel; 94 | extern SMModel EUCJPSMModel; 95 | extern SMModel EUCKRSMModel; 96 | extern SMModel EUCTWSMModel; 97 | extern SMModel GB18030SMModel; 98 | extern SMModel SJISSMModel; 99 | extern SMModel UCS2BESMModel; 100 | 101 | 102 | extern SMModel HZSMModel; 103 | extern SMModel ISO2022CNSMModel; 104 | extern SMModel ISO2022JPSMModel; 105 | extern SMModel ISO2022KRSMModel; 106 | 107 | #endif /* nsCodingStateMachine_h__ */ 108 | 109 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/CharDistribution.cpp: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is Mozilla Communicator client code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 1998 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | 38 | #include "CharDistribution.h" 39 | 40 | #include "JISFreq.tab" 41 | #include "Big5Freq.tab" 42 | #include "EUCKRFreq.tab" 43 | #include "EUCTWFreq.tab" 44 | #include "GB2312Freq.tab" 45 | 46 | #define SURE_YES 0.99f 47 | #define SURE_NO 0.01f 48 | 49 | #define MINIMUM_DATA_THRESHOLD 4 50 | 51 | //return confidence base on received data 52 | float CharDistributionAnalysis::GetConfidence() 53 | { 54 | //if we didn't receive any character in our consideration range, or the 55 | // number of frequent characters is below the minimum threshold, return 56 | // negative answer 57 | if (mTotalChars <= 0 || mFreqChars <= MINIMUM_DATA_THRESHOLD) 58 | return SURE_NO; 59 | 60 | if (mTotalChars != mFreqChars) { 61 | float r = mFreqChars / ((mTotalChars - mFreqChars) * mTypicalDistributionRatio); 62 | 63 | if (r < SURE_YES) 64 | return r; 65 | } 66 | //normalize confidence, (we don't want to be 100% sure) 67 | return SURE_YES; 68 | } 69 | 70 | EUCTWDistributionAnalysis::EUCTWDistributionAnalysis() 71 | { 72 | mCharToFreqOrder = EUCTWCharToFreqOrder; 73 | mTableSize = EUCTW_TABLE_SIZE; 74 | mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO; 75 | } 76 | 77 | EUCKRDistributionAnalysis::EUCKRDistributionAnalysis() 78 | { 79 | mCharToFreqOrder = EUCKRCharToFreqOrder; 80 | mTableSize = EUCKR_TABLE_SIZE; 81 | mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO; 82 | } 83 | 84 | GB2312DistributionAnalysis::GB2312DistributionAnalysis() 85 | { 86 | mCharToFreqOrder = GB2312CharToFreqOrder; 87 | mTableSize = GB2312_TABLE_SIZE; 88 | mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO; 89 | } 90 | 91 | Big5DistributionAnalysis::Big5DistributionAnalysis() 92 | { 93 | mCharToFreqOrder = Big5CharToFreqOrder; 94 | mTableSize = BIG5_TABLE_SIZE; 95 | mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO; 96 | } 97 | 98 | SJISDistributionAnalysis::SJISDistributionAnalysis() 99 | { 100 | mCharToFreqOrder = JISCharToFreqOrder; 101 | mTableSize = JIS_TABLE_SIZE; 102 | mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO; 103 | } 104 | 105 | EUCJPDistributionAnalysis::EUCJPDistributionAnalysis() 106 | { 107 | mCharToFreqOrder = JISCharToFreqOrder; 108 | mTableSize = JIS_TABLE_SIZE; 109 | mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO; 110 | } 111 | 112 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsSBCharSetProber.cpp: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is Mozilla Universal charset detector code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 2001 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * Shy Shalom 24 | * 25 | * Alternatively, the contents of this file may be used under the terms of 26 | * either the GNU General Public License Version 2 or later (the "GPL"), or 27 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 28 | * in which case the provisions of the GPL or the LGPL are applicable instead 29 | * of those above. If you wish to allow use of your version of this file only 30 | * under the terms of either the GPL or the LGPL, and not to allow others to 31 | * use your version of this file under the terms of the MPL, indicate your 32 | * decision by deleting the provisions above and replace them with the notice 33 | * and other provisions required by the GPL or the LGPL. If you do not delete 34 | * the provisions above, a recipient may use your version of this file under 35 | * the terms of any one of the MPL, the GPL or the LGPL. 36 | * 37 | * ***** END LICENSE BLOCK ***** */ 38 | #include 39 | #include "nsSBCharSetProber.h" 40 | 41 | nsProbingState nsSingleByteCharSetProber::HandleData(const char* aBuf, PRUint32 aLen) 42 | { 43 | unsigned char order; 44 | 45 | for (PRUint32 i = 0; i < aLen; i++) 46 | { 47 | order = mModel->charToOrderMap[(unsigned char)aBuf[i]]; 48 | 49 | if (order < SYMBOL_CAT_ORDER) 50 | mTotalChar++; 51 | if (order < SAMPLE_SIZE) 52 | { 53 | mFreqChar++; 54 | 55 | if (mLastOrder < SAMPLE_SIZE) 56 | { 57 | mTotalSeqs++; 58 | if (!mReversed) 59 | ++(mSeqCounters[mModel->precedenceMatrix[mLastOrder*SAMPLE_SIZE+order]]); 60 | else // reverse the order of the letters in the lookup 61 | ++(mSeqCounters[mModel->precedenceMatrix[order*SAMPLE_SIZE+mLastOrder]]); 62 | } 63 | } 64 | mLastOrder = order; 65 | } 66 | 67 | if (mState == eDetecting) 68 | if (mTotalSeqs > SB_ENOUGH_REL_THRESHOLD) 69 | { 70 | float cf = GetConfidence(); 71 | if (cf > POSITIVE_SHORTCUT_THRESHOLD) 72 | mState = eFoundIt; 73 | else if (cf < NEGATIVE_SHORTCUT_THRESHOLD) 74 | mState = eNotMe; 75 | } 76 | 77 | return mState; 78 | } 79 | 80 | void nsSingleByteCharSetProber::Reset(void) 81 | { 82 | mState = eDetecting; 83 | mLastOrder = 255; 84 | for (PRUint32 i = 0; i < NUMBER_OF_SEQ_CAT; i++) 85 | mSeqCounters[i] = 0; 86 | mTotalSeqs = 0; 87 | mTotalChar = 0; 88 | mFreqChar = 0; 89 | } 90 | 91 | //#define NEGATIVE_APPROACH 1 92 | 93 | float nsSingleByteCharSetProber::GetConfidence(void) 94 | { 95 | #ifdef NEGATIVE_APPROACH 96 | if (mTotalSeqs > 0) 97 | if (mTotalSeqs > mSeqCounters[NEGATIVE_CAT]*10 ) 98 | return ((float)(mTotalSeqs - mSeqCounters[NEGATIVE_CAT]*10))/mTotalSeqs * mFreqChar / mTotalChar; 99 | return (float)0.01; 100 | #else //POSITIVE_APPROACH 101 | float r; 102 | 103 | if (mTotalSeqs > 0) { 104 | r = ((float)1.0) * mSeqCounters[POSITIVE_CAT] / mTotalSeqs / mModel->mTypicalPositiveRatio; 105 | r = r*mFreqChar/mTotalChar; 106 | if (r >= (float)1.00) 107 | r = (float)0.99; 108 | return r; 109 | } 110 | return (float)0.01; 111 | #endif 112 | } 113 | 114 | const char* nsSingleByteCharSetProber::GetCharSetName() 115 | { 116 | if (!mNameProber) 117 | return mModel->charsetName; 118 | return mNameProber->GetCharSetName(); 119 | } 120 | 121 | #ifdef DEBUG_chardet 122 | void nsSingleByteCharSetProber::DumpStatus() 123 | { 124 | printf(" SBCS: %1.3f [%s]\r\n", GetConfidence(), GetCharSetName()); 125 | } 126 | #endif 127 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsCharSetProber.cpp: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is Mozilla Universal charset detector code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 2001 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * Shy Shalom 24 | * 25 | * Alternatively, the contents of this file may be used under the terms of 26 | * either the GNU General Public License Version 2 or later (the "GPL"), or 27 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 28 | * in which case the provisions of the GPL or the LGPL are applicable instead 29 | * of those above. If you wish to allow use of your version of this file only 30 | * under the terms of either the GPL or the LGPL, and not to allow others to 31 | * use your version of this file under the terms of the MPL, indicate your 32 | * decision by deleting the provisions above and replace them with the notice 33 | * and other provisions required by the GPL or the LGPL. If you do not delete 34 | * the provisions above, a recipient may use your version of this file under 35 | * the terms of any one of the MPL, the GPL or the LGPL. 36 | * 37 | * ***** END LICENSE BLOCK ***** */ 38 | 39 | #include "nsCharSetProber.h" 40 | #include "prmem.h" 41 | 42 | //This filter applies to all scripts which do not use English characters 43 | PRBool nsCharSetProber::FilterWithoutEnglishLetters(const char* aBuf, PRUint32 aLen, char** newBuf, PRUint32& newLen) 44 | { 45 | char *newptr; 46 | char *prevPtr, *curPtr; 47 | 48 | PRBool meetMSB = PR_FALSE; 49 | newptr = *newBuf = (char*)PR_Malloc(aLen); 50 | if (!newptr) 51 | return PR_FALSE; 52 | 53 | for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf+aLen; curPtr++) 54 | { 55 | if (*curPtr & 0x80) 56 | { 57 | meetMSB = PR_TRUE; 58 | } 59 | else if (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || *curPtr > 'z') 60 | { 61 | //current char is a symbol, most likely a punctuation. we treat it as segment delimiter 62 | if (meetMSB && curPtr > prevPtr) 63 | //this segment contains more than single symbol, and it has upper ASCII, we need to keep it 64 | { 65 | while (prevPtr < curPtr) *newptr++ = *prevPtr++; 66 | prevPtr++; 67 | *newptr++ = ' '; 68 | meetMSB = PR_FALSE; 69 | } 70 | else //ignore current segment. (either because it is just a symbol or just an English word) 71 | prevPtr = curPtr+1; 72 | } 73 | } 74 | if (meetMSB && curPtr > prevPtr) 75 | while (prevPtr < curPtr) *newptr++ = *prevPtr++; 76 | 77 | newLen = newptr - *newBuf; 78 | 79 | return PR_TRUE; 80 | } 81 | 82 | //This filter applies to all scripts which contain both English characters and upper ASCII characters. 83 | PRBool nsCharSetProber::FilterWithEnglishLetters(const char* aBuf, PRUint32 aLen, char** newBuf, PRUint32& newLen) 84 | { 85 | //do filtering to reduce load to probers 86 | char *newptr; 87 | char *prevPtr, *curPtr; 88 | PRBool isInTag = PR_FALSE; 89 | 90 | newptr = *newBuf = (char*)PR_Malloc(aLen); 91 | if (!newptr) 92 | return PR_FALSE; 93 | 94 | for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf+aLen; curPtr++) 95 | { 96 | if (*curPtr == '>') 97 | isInTag = PR_FALSE; 98 | else if (*curPtr == '<') 99 | isInTag = PR_TRUE; 100 | 101 | if (!(*curPtr & 0x80) && 102 | (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || *curPtr > 'z') ) 103 | { 104 | if (curPtr > prevPtr && !isInTag) // Current segment contains more than just a symbol 105 | // and it is not inside a tag, keep it. 106 | { 107 | while (prevPtr < curPtr) *newptr++ = *prevPtr++; 108 | prevPtr++; 109 | *newptr++ = ' '; 110 | } 111 | else 112 | prevPtr = curPtr+1; 113 | } 114 | } 115 | 116 | // If the current segment contains more than just a symbol 117 | // and it is not inside a tag then keep it. 118 | if (!isInTag) 119 | while (prevPtr < curPtr) 120 | *newptr++ = *prevPtr++; 121 | 122 | newLen = newptr - *newBuf; 123 | 124 | return PR_TRUE; 125 | } 126 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/JpCntx.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is Mozilla Communicator client code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 1998 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | 38 | #ifndef __JPCNTX_H__ 39 | #define __JPCNTX_H__ 40 | 41 | #define NUM_OF_CATEGORY 6 42 | 43 | #include "nscore.h" 44 | 45 | #define ENOUGH_REL_THRESHOLD 100 46 | #define MAX_REL_THRESHOLD 1000 47 | 48 | //hiragana frequency category table 49 | extern char jp2CharContext[83][83]; 50 | 51 | class JapaneseContextAnalysis 52 | { 53 | public: 54 | JapaneseContextAnalysis() {Reset();} 55 | 56 | void HandleData(const char* aBuf, PRUint32 aLen); 57 | 58 | void HandleOneChar(const char* aStr, PRUint32 aCharLen) 59 | { 60 | PRInt32 order; 61 | 62 | //if we received enough data, stop here 63 | if (mTotalRel > MAX_REL_THRESHOLD) mDone = PR_TRUE; 64 | if (mDone) return; 65 | 66 | //Only 2-bytes characters are of our interest 67 | order = (aCharLen == 2) ? GetOrder(aStr) : -1; 68 | if (order != -1 && mLastCharOrder != -1) 69 | { 70 | mTotalRel++; 71 | //count this sequence to its category counter 72 | mRelSample[jp2CharContext[mLastCharOrder][order]]++; 73 | } 74 | mLastCharOrder = order; 75 | } 76 | 77 | float GetConfidence(); 78 | void Reset(void); 79 | void SetOpion(){} 80 | PRBool GotEnoughData() {return mTotalRel > ENOUGH_REL_THRESHOLD;} 81 | 82 | protected: 83 | virtual PRInt32 GetOrder(const char* str, PRUint32 *charLen) = 0; 84 | virtual PRInt32 GetOrder(const char* str) = 0; 85 | 86 | //category counters, each integer counts sequences in its category 87 | PRUint32 mRelSample[NUM_OF_CATEGORY]; 88 | 89 | //total sequence received 90 | PRUint32 mTotalRel; 91 | 92 | //The order of previous char 93 | PRInt32 mLastCharOrder; 94 | 95 | //if last byte in current buffer is not the last byte of a character, we 96 | //need to know how many byte to skip in next buffer. 97 | PRUint32 mNeedToSkipCharNum; 98 | 99 | //If this flag is set to PR_TRUE, detection is done and conclusion has been made 100 | PRBool mDone; 101 | }; 102 | 103 | 104 | class SJISContextAnalysis : public JapaneseContextAnalysis 105 | { 106 | //SJISContextAnalysis(){}; 107 | protected: 108 | PRInt32 GetOrder(const char* str, PRUint32 *charLen); 109 | 110 | PRInt32 GetOrder(const char* str) 111 | { 112 | //We only interested in Hiragana, so first byte is '\202' 113 | if (*str == '\202' && 114 | (unsigned char)*(str+1) >= (unsigned char)0x9f && 115 | (unsigned char)*(str+1) <= (unsigned char)0xf1) 116 | return (unsigned char)*(str+1) - (unsigned char)0x9f; 117 | return -1; 118 | } 119 | }; 120 | 121 | class EUCJPContextAnalysis : public JapaneseContextAnalysis 122 | { 123 | protected: 124 | PRInt32 GetOrder(const char* str, PRUint32 *charLen); 125 | PRInt32 GetOrder(const char* str) 126 | //We only interested in Hiragana, so first byte is '\244' 127 | { 128 | if (*str == '\244' && 129 | (unsigned char)*(str+1) >= (unsigned char)0xa1 && 130 | (unsigned char)*(str+1) <= (unsigned char)0xf3) 131 | return (unsigned char)*(str+1) - (unsigned char)0xa1; 132 | return -1; 133 | } 134 | }; 135 | 136 | #endif /* __JPCNTX_H__ */ 137 | 138 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Universal Character Set Detector (UCSD) 2 | 3 | A library exposing a C interface and dependency-free interface to the Mozilla C++ UCSD library. 4 | 5 | This library provides a highly accurate set of heuristics that attempt to determine the character set used to encode some input text. 6 | This is extremely useful when your program has to handle an input file which is supplied without any encoding metadata. 7 | 8 | Pulls together: 9 | 10 | * A NSPR emulation library (see `nspr-emu/README.md`) 11 | * Code written by Colin Snover to provide a command line interface to the library 12 | * The UCSD library itself from the Mozilla seamonkey source tree 13 | 14 | The UCSD version provided is that present in the Mozilla public repo as of 31/10/2010. 15 | 16 | ## Building 17 | 18 | We have a build system based on CMake, so you will need that installed. That done, simply do this incantation: 19 | 20 | ./configure 21 | make 22 | sudo make install 23 | 24 | This will install the header file `charsetdetect.h` and the UCSD shared library, which you should link against in your compiler. 25 | 26 | ## API documentation 27 | 28 | The library provides an opaque type of character set detectors: 29 | 30 | typedef void* csd_t; 31 | 32 | The first thing a client should do is create one of these: 33 | 34 | csd_t csd_open(void); 35 | 36 | A `csd_t` created in this fashion must be freed by `csd_close`. If creation fails, `csd_open` returns `(csd_t)-1`. 37 | 38 | Now you need to feed some data to the detector: 39 | 40 | int csd_consider(csd_t csd, const char *data, int length); 41 | 42 | The meaning of the return code is as follows: 43 | 44 | * Returns 0 if more data is needed to come to a conclusion 45 | * Returns a positive number if enough data has been received to detect the character set 46 | * Returns a negative number if there is an error 47 | 48 | Finally, close the detector to find out what the character set is: 49 | 50 | const char *csd_close(csd_t csd); 51 | 52 | The detected character set name is returned as an ASCII string. This function returns `NULL` if detection failed because there was not 53 | enough data. It is safe to call `csd_close` at any point from creation by `csd_open` to the first call of `csd_close` on that character 54 | set detector. 55 | 56 | ## Full example 57 | 58 | This is a complete C program that shows how the library can be used to build a simple command-line character set detector: 59 | 60 | #include "charsetdetect.h" 61 | #include "stdio.h" 62 | 63 | #define BUFFER_SIZE 4096 64 | 65 | int main(int argc, const char * argv[]) { 66 | csd_t csd = csd_open(); 67 | if (csd == (csd_t)-1) { 68 | printf("csd_open failed\n"); 69 | return 1; 70 | } 71 | 72 | int size; 73 | char buf[BUFFER_SIZE] = {0}; 74 | 75 | while ((size = fread(buf, 1, sizeof(buf), stdin)) != 0) { 76 | int result = csd_consider(csd, buf, size); 77 | if (result < 0) { 78 | printf("csd_consider failed\n"); 79 | return 3; 80 | } else if (result > 0) { 81 | // Already have enough data 82 | break; 83 | } 84 | } 85 | 86 | const char *result = csd_close(csd); 87 | if (result == NULL) { 88 | printf("Unknown character set\n"); 89 | return 2; 90 | } else { 91 | printf("%s\n", result); 92 | return 0; 93 | } 94 | } 95 | 96 | You can compile it and try it (on platforms with GCC) as follows: 97 | 98 | gcc example.c -lcharsetdetect 99 | ./a.out < my_test_file.txt 100 | 101 | ## Known character sets 102 | 103 | The list of possible character sets that can be returned from the library as of the most recent update are: 104 | 105 | Big5 106 | EUC-JP 107 | EUC-KR 108 | GB18030 109 | gb18030 110 | HZ-GB-2312 111 | IBM855 112 | IBM866 113 | ISO-2022-CN 114 | ISO-2022-JP 115 | ISO-2022-KR 116 | ISO-8859-2 117 | ISO-8859-5 118 | ISO-8859-7 119 | ISO-8859-8 120 | KOI8-R 121 | Shift_JIS 122 | TIS-620 123 | UTF-8 124 | UTF-16BE 125 | UTF-16LE 126 | UTF-32BE 127 | UTF-32LE 128 | windows-1250 129 | windows-1251 130 | windows-1252 131 | windows-1253 132 | windows-1255 133 | x-euc-tw 134 | X-ISO-10646-UCS-4-2143 135 | X-ISO-10646-UCS-4-3412 136 | x-mac-cyrillic 137 | 138 | We believe this list to be exhaustive. Future updates to the UCSD library may add more alternatives, but we will endeavour to keep 139 | this list current. 140 | 141 | Notice that you may get both capitalisations of `GB18030`. For this reason (and to be future-proof against any future behaviour 142 | like this for newly-added character sets) we recommend that you compare character set names case insensitively. 143 | 144 | ## Licensing 145 | 146 | The files `libcharsetdetect.{cpp,h}` are (c) 2010 Colin Snover and released under an MIT license. 147 | 148 | The UCSD is (c) mozilla.org and tri-licensed under MPL 1.1/GPL 2.0/LGPL 2.1. 149 | 150 | We incorporate header files from the NSPR emulation library, which is LGPL licensed. 151 | 152 | Thus the resulting artifact is LGPL licensed (I think). -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsSBCharSetProber.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is Mozilla Universal charset detector code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 2001 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * Shy Shalom 24 | * 25 | * Alternatively, the contents of this file may be used under the terms of 26 | * either the GNU General Public License Version 2 or later (the "GPL"), or 27 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 28 | * in which case the provisions of the GPL or the LGPL are applicable instead 29 | * of those above. If you wish to allow use of your version of this file only 30 | * under the terms of either the GPL or the LGPL, and not to allow others to 31 | * use your version of this file under the terms of the MPL, indicate your 32 | * decision by deleting the provisions above and replace them with the notice 33 | * and other provisions required by the GPL or the LGPL. If you do not delete 34 | * the provisions above, a recipient may use your version of this file under 35 | * the terms of any one of the MPL, the GPL or the LGPL. 36 | * 37 | * ***** END LICENSE BLOCK ***** */ 38 | #ifndef nsSingleByteCharSetProber_h__ 39 | #define nsSingleByteCharSetProber_h__ 40 | 41 | #include "nsCharSetProber.h" 42 | 43 | #define SAMPLE_SIZE 64 44 | #define SB_ENOUGH_REL_THRESHOLD 1024 45 | #define POSITIVE_SHORTCUT_THRESHOLD (float)0.95 46 | #define NEGATIVE_SHORTCUT_THRESHOLD (float)0.05 47 | #define SYMBOL_CAT_ORDER 250 48 | #define NUMBER_OF_SEQ_CAT 4 49 | #define POSITIVE_CAT (NUMBER_OF_SEQ_CAT-1) 50 | #define NEGATIVE_CAT 0 51 | 52 | typedef struct 53 | { 54 | unsigned char *charToOrderMap; // [256] table use to find a char's order 55 | char *precedenceMatrix; // [SAMPLE_SIZE][SAMPLE_SIZE]; table to find a 2-char sequence's frequency 56 | float mTypicalPositiveRatio; // = freqSeqs / totalSeqs 57 | PRBool keepEnglishLetter; // says if this script contains English characters (not implemented) 58 | const char* charsetName; 59 | } SequenceModel; 60 | 61 | 62 | class nsSingleByteCharSetProber : public nsCharSetProber{ 63 | public: 64 | nsSingleByteCharSetProber(SequenceModel *model) 65 | :mModel(model), mReversed(PR_FALSE), mNameProber(0) { Reset(); } 66 | nsSingleByteCharSetProber(SequenceModel *model, PRBool reversed, nsCharSetProber* nameProber) 67 | :mModel(model), mReversed(reversed), mNameProber(nameProber) { Reset(); } 68 | 69 | virtual const char* GetCharSetName(); 70 | virtual nsProbingState HandleData(const char* aBuf, PRUint32 aLen); 71 | virtual nsProbingState GetState(void) {return mState;} 72 | virtual void Reset(void); 73 | virtual float GetConfidence(void); 74 | virtual void SetOpion() {} 75 | 76 | // This feature is not implemented yet. any current language model 77 | // contain this parameter as PR_FALSE. No one is looking at this 78 | // parameter or calling this method. 79 | // Moreover, the nsSBCSGroupProber which calls the HandleData of this 80 | // prober has a hard-coded call to FilterWithoutEnglishLetters which gets rid 81 | // of the English letters. 82 | PRBool KeepEnglishLetters() {return mModel->keepEnglishLetter;} // (not implemented) 83 | 84 | #ifdef DEBUG_chardet 85 | virtual void DumpStatus(); 86 | #endif 87 | 88 | protected: 89 | nsProbingState mState; 90 | const SequenceModel *mModel; 91 | const PRBool mReversed; // PR_TRUE if we need to reverse every pair in the model lookup 92 | 93 | //char order of last character 94 | unsigned char mLastOrder; 95 | 96 | PRUint32 mTotalSeqs; 97 | PRUint32 mSeqCounters[NUMBER_OF_SEQ_CAT]; 98 | 99 | PRUint32 mTotalChar; 100 | //characters that fall in our sampling range 101 | PRUint32 mFreqChar; 102 | 103 | // Optional auxiliary prober for name decision. created and destroyed by the GroupProber 104 | nsCharSetProber* mNameProber; 105 | 106 | }; 107 | 108 | 109 | extern SequenceModel Koi8rModel; 110 | extern SequenceModel Win1251Model; 111 | extern SequenceModel Latin5Model; 112 | extern SequenceModel MacCyrillicModel; 113 | extern SequenceModel Ibm866Model; 114 | extern SequenceModel Ibm855Model; 115 | extern SequenceModel Latin7Model; 116 | extern SequenceModel Win1253Model; 117 | extern SequenceModel Latin5BulgarianModel; 118 | extern SequenceModel Win1251BulgarianModel; 119 | extern SequenceModel Latin2HungarianModel; 120 | extern SequenceModel Win1250HungarianModel; 121 | extern SequenceModel Win1255Model; 122 | 123 | #endif /* nsSingleByteCharSetProber_h__ */ 124 | 125 | -------------------------------------------------------------------------------- /nspr-emu/prcpucfg_mac.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is the Netscape Portable Runtime (NSPR). 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 1998-2000 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | 38 | #ifndef nspr_cpucfg___ 39 | #define nspr_cpucfg___ 40 | 41 | #ifndef XP_UNIX 42 | #define XP_UNIX 43 | #endif 44 | 45 | #define PR_AF_INET6 30 /* same as AF_INET6 */ 46 | 47 | #if defined(i386) 48 | #undef IS_BIG_ENDIAN 49 | #define IS_LITTLE_ENDIAN 1 50 | #else 51 | #undef IS_LITTLE_ENDIAN 52 | #define IS_BIG_ENDIAN 1 53 | #endif 54 | 55 | #define HAVE_LONG_LONG 56 | #undef HAVE_ALIGNED_DOUBLES 57 | #define HAVE_ALIGNED_LONGLONGS 1 58 | 59 | #define PR_BYTES_PER_BYTE 1 60 | #define PR_BYTES_PER_SHORT 2 61 | #define PR_BYTES_PER_INT 4 62 | #define PR_BYTES_PER_INT64 8 63 | #define PR_BYTES_PER_LONG 4 64 | #define PR_BYTES_PER_FLOAT 4 65 | #define PR_BYTES_PER_DOUBLE 8 66 | #define PR_BYTES_PER_WORD 4 67 | #define PR_BYTES_PER_DWORD 8 68 | #define PR_BYTES_PER_WORD_LOG2 2 69 | #define PR_BYTES_PER_DWORD_LOG2 3 70 | 71 | #define PR_BITS_PER_BYTE 8 72 | #define PR_BITS_PER_SHORT 16 73 | #define PR_BITS_PER_INT 32 74 | #define PR_BITS_PER_INT64 64 75 | #define PR_BITS_PER_LONG 32 76 | #define PR_BITS_PER_FLOAT 32 77 | #define PR_BITS_PER_DOUBLE 64 78 | #define PR_BITS_PER_WORD 32 79 | #define PR_BITS_PER_DWORD 64 80 | 81 | #define PR_BITS_PER_BYTE_LOG2 3 82 | #define PR_BITS_PER_SHORT_LOG2 4 83 | #define PR_BITS_PER_INT_LOG2 5 84 | #define PR_BITS_PER_INT64_LOG2 6 85 | #define PR_BITS_PER_LONG_LOG2 5 86 | #define PR_BITS_PER_FLOAT_LOG2 5 87 | #define PR_BITS_PER_DOUBLE_LOG2 6 88 | #define PR_BITS_PER_WORD_LOG2 5 89 | 90 | #define PR_ALIGN_OF_SHORT 2 91 | #define PR_ALIGN_OF_INT 4 92 | #define PR_ALIGN_OF_LONG 4 93 | #define PR_ALIGN_OF_INT64 4 94 | #define PR_ALIGN_OF_FLOAT 4 95 | #define PR_ALIGN_OF_DOUBLE 4 96 | #define PR_ALIGN_OF_POINTER 4 97 | #define PR_ALIGN_OF_WORD 4 98 | 99 | #ifndef NO_NSPR_10_SUPPORT 100 | 101 | #define BYTES_PER_BYTE PR_BYTES_PER_BYTE 102 | #define BYTES_PER_SHORT PR_BYTES_PER_SHORT 103 | #define BYTES_PER_INT PR_BYTES_PER_INT 104 | #define BYTES_PER_INT64 PR_BYTES_PER_INT64 105 | #define BYTES_PER_LONG PR_BYTES_PER_LONG 106 | #define BYTES_PER_FLOAT PR_BYTES_PER_FLOAT 107 | #define BYTES_PER_DOUBLE PR_BYTES_PER_DOUBLE 108 | #define BYTES_PER_WORD PR_BYTES_PER_WORD 109 | #define BYTES_PER_DWORD PR_BYTES_PER_DWORD 110 | 111 | #define BITS_PER_BYTE PR_BITS_PER_BYTE 112 | #define BITS_PER_SHORT PR_BITS_PER_SHORT 113 | #define BITS_PER_INT PR_BITS_PER_INT 114 | #define BITS_PER_INT64 PR_BITS_PER_INT64 115 | #define BITS_PER_LONG PR_BITS_PER_LONG 116 | #define BITS_PER_FLOAT PR_BITS_PER_FLOAT 117 | #define BITS_PER_DOUBLE PR_BITS_PER_DOUBLE 118 | #define BITS_PER_WORD PR_BITS_PER_WORD 119 | 120 | #define BITS_PER_BYTE_LOG2 PR_BITS_PER_BYTE_LOG2 121 | #define BITS_PER_SHORT_LOG2 PR_BITS_PER_SHORT_LOG2 122 | #define BITS_PER_INT_LOG2 PR_BITS_PER_INT_LOG2 123 | #define BITS_PER_INT64_LOG2 PR_BITS_PER_INT64_LOG2 124 | #define BITS_PER_LONG_LOG2 PR_BITS_PER_LONG_LOG2 125 | #define BITS_PER_FLOAT_LOG2 PR_BITS_PER_FLOAT_LOG2 126 | #define BITS_PER_DOUBLE_LOG2 PR_BITS_PER_DOUBLE_LOG2 127 | #define BITS_PER_WORD_LOG2 PR_BITS_PER_WORD_LOG2 128 | 129 | #define ALIGN_OF_SHORT PR_ALIGN_OF_SHORT 130 | #define ALIGN_OF_INT PR_ALIGN_OF_INT 131 | #define ALIGN_OF_LONG PR_ALIGN_OF_LONG 132 | #define ALIGN_OF_INT64 PR_ALIGN_OF_INT64 133 | #define ALIGN_OF_FLOAT PR_ALIGN_OF_FLOAT 134 | #define ALIGN_OF_DOUBLE PR_ALIGN_OF_DOUBLE 135 | #define ALIGN_OF_POINTER PR_ALIGN_OF_POINTER 136 | #define ALIGN_OF_WORD PR_ALIGN_OF_WORD 137 | 138 | #define BYTES_PER_WORD_LOG2 PR_BYTES_PER_WORD_LOG2 139 | #define BYTES_PER_DWORD_LOG2 PR_BYTES_PER_DWORD_LOG2 140 | #define WORDS_PER_DWORD_LOG2 PR_WORDS_PER_DWORD_LOG2 141 | 142 | #endif /* NO_NSPR_10_SUPPORT */ 143 | 144 | #endif /* nspr_cpucfg___ */ 145 | 146 | -------------------------------------------------------------------------------- /nspr-emu/prmem.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is the Netscape Portable Runtime (NSPR). 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 1998-2000 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | 38 | /* 39 | ** File: prmem.h 40 | ** Description: API to NSPR memory management functions 41 | ** 42 | */ 43 | #ifndef prmem_h___ 44 | #define prmem_h___ 45 | 46 | #include "prtypes.h" 47 | #include 48 | 49 | PR_BEGIN_EXTERN_C 50 | 51 | /* 52 | ** Thread safe memory allocation. 53 | ** 54 | ** NOTE: pr wraps up malloc, free, calloc, realloc so they are already 55 | ** thread safe (and are not declared here - look in stdlib.h). 56 | */ 57 | 58 | /* 59 | ** PR_Malloc, PR_Calloc, PR_Realloc, and PR_Free have the same signatures 60 | ** as their libc equivalent malloc, calloc, realloc, and free, and have 61 | ** the same semantics. (Note that the argument type size_t is replaced 62 | ** by PRUint32.) Memory allocated by PR_Malloc, PR_Calloc, or PR_Realloc 63 | ** must be freed by PR_Free. 64 | */ 65 | 66 | #define PR_Malloc malloc 67 | #define PR_Calloc calloc 68 | #define PR_Realloc realloc 69 | #define PR_Free free 70 | 71 | /* 72 | ** The following are some convenience macros defined in terms of 73 | ** PR_Malloc, PR_Calloc, PR_Realloc, and PR_Free. 74 | */ 75 | 76 | /*********************************************************************** 77 | ** FUNCTION: PR_MALLOC() 78 | ** DESCRIPTION: 79 | ** PR_NEW() allocates an untyped item of size _size from the heap. 80 | ** INPUTS: _size: size in bytes of item to be allocated 81 | ** OUTPUTS: untyped pointer to the node allocated 82 | ** RETURN: pointer to node or error returned from malloc(). 83 | ***********************************************************************/ 84 | #define PR_MALLOC(_bytes) (PR_Malloc((_bytes))) 85 | 86 | /*********************************************************************** 87 | ** FUNCTION: PR_NEW() 88 | ** DESCRIPTION: 89 | ** PR_NEW() allocates an item of type _struct from the heap. 90 | ** INPUTS: _struct: a data type 91 | ** OUTPUTS: pointer to _struct 92 | ** RETURN: pointer to _struct or error returns from malloc(). 93 | ***********************************************************************/ 94 | #define PR_NEW(_struct) ((_struct *) PR_MALLOC(sizeof(_struct))) 95 | 96 | /*********************************************************************** 97 | ** FUNCTION: PR_REALLOC() 98 | ** DESCRIPTION: 99 | ** PR_REALLOC() re-allocates _ptr bytes from the heap as a _size 100 | ** untyped item. 101 | ** INPUTS: _ptr: pointer to node to reallocate 102 | ** _size: size of node to allocate 103 | ** OUTPUTS: pointer to node allocated 104 | ** RETURN: pointer to node allocated 105 | ***********************************************************************/ 106 | #define PR_REALLOC(_ptr, _size) (PR_Realloc((_ptr), (_size))) 107 | 108 | /*********************************************************************** 109 | ** FUNCTION: PR_CALLOC() 110 | ** DESCRIPTION: 111 | ** PR_CALLOC() allocates a _size bytes untyped item from the heap 112 | ** and sets the allocated memory to all 0x00. 113 | ** INPUTS: _size: size of node to allocate 114 | ** OUTPUTS: pointer to node allocated 115 | ** RETURN: pointer to node allocated 116 | ***********************************************************************/ 117 | #define PR_CALLOC(_size) (PR_Calloc(1, (_size))) 118 | 119 | /*********************************************************************** 120 | ** FUNCTION: PR_NEWZAP() 121 | ** DESCRIPTION: 122 | ** PR_NEWZAP() allocates an item of type _struct from the heap 123 | ** and sets the allocated memory to all 0x00. 124 | ** INPUTS: _struct: a data type 125 | ** OUTPUTS: pointer to _struct 126 | ** RETURN: pointer to _struct 127 | ***********************************************************************/ 128 | #define PR_NEWZAP(_struct) ((_struct*)PR_Calloc(1, sizeof(_struct))) 129 | 130 | /*********************************************************************** 131 | ** FUNCTION: PR_DELETE() 132 | ** DESCRIPTION: 133 | ** PR_DELETE() unallocates an object previosly allocated via PR_NEW() 134 | ** or PR_NEWZAP() to the heap. 135 | ** INPUTS: pointer to previously allocated object 136 | ** OUTPUTS: the referenced object is returned to the heap 137 | ** RETURN: void 138 | ***********************************************************************/ 139 | #define PR_DELETE(_ptr) { PR_Free(_ptr); (_ptr) = NULL; } 140 | 141 | /*********************************************************************** 142 | ** FUNCTION: PR_FREEIF() 143 | ** DESCRIPTION: 144 | ** PR_FREEIF() conditionally unallocates an object previously allocated 145 | ** vial PR_NEW() or PR_NEWZAP(). If the pointer to the object is 146 | ** equal to zero (0), the object is not released. 147 | ** INPUTS: pointer to previously allocated object 148 | ** OUTPUTS: the referenced object is conditionally returned to the heap 149 | ** RETURN: void 150 | ***********************************************************************/ 151 | #define PR_FREEIF(_ptr) if (_ptr) PR_DELETE(_ptr) 152 | 153 | PR_END_EXTERN_C 154 | 155 | #endif /* prmem_h___ */ 156 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsMBCSGroupProber.cpp: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is Mozilla Universal charset detector code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 2001 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * Shy Shalom 24 | * Proofpoint, Inc. 25 | * 26 | * Alternatively, the contents of this file may be used under the terms of 27 | * either the GNU General Public License Version 2 or later (the "GPL"), or 28 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 29 | * in which case the provisions of the GPL or the LGPL are applicable instead 30 | * of those above. If you wish to allow use of your version of this file only 31 | * under the terms of either the GPL or the LGPL, and not to allow others to 32 | * use your version of this file under the terms of the MPL, indicate your 33 | * decision by deleting the provisions above and replace them with the notice 34 | * and other provisions required by the GPL or the LGPL. If you do not delete 35 | * the provisions above, a recipient may use your version of this file under 36 | * the terms of any one of the MPL, the GPL or the LGPL. 37 | * 38 | * ***** END LICENSE BLOCK ***** */ 39 | #include 40 | 41 | #include "nsMBCSGroupProber.h" 42 | #include "nsUniversalDetector.h" 43 | 44 | #if defined(DEBUG_chardet) || defined(DEBUG_jgmyers) 45 | const char *ProberName[] = 46 | { 47 | "UTF8", 48 | "SJIS", 49 | "EUCJP", 50 | "GB18030", 51 | "EUCKR", 52 | "Big5", 53 | "EUCTW", 54 | }; 55 | 56 | #endif 57 | 58 | nsMBCSGroupProber::nsMBCSGroupProber(PRUint32 aLanguageFilter) 59 | { 60 | for (PRUint32 i = 0; i < NUM_OF_PROBERS; i++) 61 | mProbers[i] = nsnull; 62 | 63 | mProbers[0] = new nsUTF8Prober(); 64 | if (aLanguageFilter & NS_FILTER_JAPANESE) 65 | { 66 | mProbers[1] = new nsSJISProber(); 67 | mProbers[2] = new nsEUCJPProber(); 68 | } 69 | if (aLanguageFilter & NS_FILTER_CHINESE_SIMPLIFIED) 70 | mProbers[3] = new nsGB18030Prober(); 71 | if (aLanguageFilter & NS_FILTER_KOREAN) 72 | mProbers[4] = new nsEUCKRProber(); 73 | if (aLanguageFilter & NS_FILTER_CHINESE_TRADITIONAL) 74 | { 75 | mProbers[5] = new nsBig5Prober(); 76 | mProbers[6] = new nsEUCTWProber(); 77 | } 78 | Reset(); 79 | } 80 | 81 | nsMBCSGroupProber::~nsMBCSGroupProber() 82 | { 83 | for (PRUint32 i = 0; i < NUM_OF_PROBERS; i++) 84 | { 85 | delete mProbers[i]; 86 | } 87 | } 88 | 89 | const char* nsMBCSGroupProber::GetCharSetName() 90 | { 91 | if (mBestGuess == -1) 92 | { 93 | GetConfidence(); 94 | if (mBestGuess == -1) 95 | mBestGuess = 0; 96 | } 97 | return mProbers[mBestGuess]->GetCharSetName(); 98 | } 99 | 100 | void nsMBCSGroupProber::Reset(void) 101 | { 102 | mActiveNum = 0; 103 | for (PRUint32 i = 0; i < NUM_OF_PROBERS; i++) 104 | { 105 | if (mProbers[i]) 106 | { 107 | mProbers[i]->Reset(); 108 | mIsActive[i] = PR_TRUE; 109 | ++mActiveNum; 110 | } 111 | else 112 | mIsActive[i] = PR_FALSE; 113 | } 114 | mBestGuess = -1; 115 | mState = eDetecting; 116 | mKeepNext = 0; 117 | } 118 | 119 | nsProbingState nsMBCSGroupProber::HandleData(const char* aBuf, PRUint32 aLen) 120 | { 121 | nsProbingState st; 122 | PRUint32 start = 0; 123 | PRUint32 keepNext = mKeepNext; 124 | 125 | //do filtering to reduce load to probers 126 | for (PRUint32 pos = 0; pos < aLen; ++pos) 127 | { 128 | if (aBuf[pos] & 0x80) 129 | { 130 | if (!keepNext) 131 | start = pos; 132 | keepNext = 2; 133 | } 134 | else if (keepNext) 135 | { 136 | if (--keepNext == 0) 137 | { 138 | for (PRUint32 i = 0; i < NUM_OF_PROBERS; i++) 139 | { 140 | if (!mIsActive[i]) 141 | continue; 142 | st = mProbers[i]->HandleData(aBuf + start, pos + 1 - start); 143 | if (st == eFoundIt) 144 | { 145 | mBestGuess = i; 146 | mState = eFoundIt; 147 | return mState; 148 | } 149 | } 150 | } 151 | } 152 | } 153 | 154 | if (keepNext) { 155 | for (PRUint32 i = 0; i < NUM_OF_PROBERS; i++) 156 | { 157 | if (!mIsActive[i]) 158 | continue; 159 | st = mProbers[i]->HandleData(aBuf + start, aLen - start); 160 | if (st == eFoundIt) 161 | { 162 | mBestGuess = i; 163 | mState = eFoundIt; 164 | return mState; 165 | } 166 | } 167 | } 168 | mKeepNext = keepNext; 169 | 170 | return mState; 171 | } 172 | 173 | float nsMBCSGroupProber::GetConfidence(void) 174 | { 175 | PRUint32 i; 176 | float bestConf = 0.0, cf; 177 | 178 | switch (mState) 179 | { 180 | case eFoundIt: 181 | return (float)0.99; 182 | case eNotMe: 183 | return (float)0.01; 184 | default: 185 | for (i = 0; i < NUM_OF_PROBERS; i++) 186 | { 187 | if (!mIsActive[i]) 188 | continue; 189 | cf = mProbers[i]->GetConfidence(); 190 | if (bestConf < cf) 191 | { 192 | bestConf = cf; 193 | mBestGuess = i; 194 | } 195 | } 196 | } 197 | return bestConf; 198 | } 199 | 200 | #ifdef DEBUG_chardet 201 | void nsMBCSGroupProber::DumpStatus() 202 | { 203 | PRUint32 i; 204 | float cf; 205 | 206 | GetConfidence(); 207 | for (i = 0; i < NUM_OF_PROBERS; i++) 208 | { 209 | if (!mIsActive[i]) 210 | printf(" MBCS inactive: [%s] (confidence is too low).\r\n", ProberName[i]); 211 | else 212 | { 213 | cf = mProbers[i]->GetConfidence(); 214 | printf(" MBCS %1.3f: [%s]\r\n", cf, ProberName[i]); 215 | } 216 | } 217 | } 218 | #endif 219 | 220 | #ifdef DEBUG_jgmyers 221 | void nsMBCSGroupProber::GetDetectorState(nsUniversalDetector::DetectorState (&states)[nsUniversalDetector::NumDetectors], PRUint32 &offset) 222 | { 223 | for (PRUint32 i = 0; i < NUM_OF_PROBERS; ++i) { 224 | states[offset].name = ProberName[i]; 225 | states[offset].isActive = mIsActive[i]; 226 | states[offset].confidence = mIsActive[i] ? mProbers[i]->GetConfidence() : 0.0; 227 | ++offset; 228 | } 229 | } 230 | #endif /* DEBUG_jgmyers */ 231 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsLatin1Prober.cpp: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is Mozilla Universal charset detector code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 2001 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * Shy Shalom 24 | * 25 | * Alternatively, the contents of this file may be used under the terms of 26 | * either the GNU General Public License Version 2 or later (the "GPL"), or 27 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 28 | * in which case the provisions of the GPL or the LGPL are applicable instead 29 | * of those above. If you wish to allow use of your version of this file only 30 | * under the terms of either the GPL or the LGPL, and not to allow others to 31 | * use your version of this file under the terms of the MPL, indicate your 32 | * decision by deleting the provisions above and replace them with the notice 33 | * and other provisions required by the GPL or the LGPL. If you do not delete 34 | * the provisions above, a recipient may use your version of this file under 35 | * the terms of any one of the MPL, the GPL or the LGPL. 36 | * 37 | * ***** END LICENSE BLOCK ***** */ 38 | 39 | #include "nsLatin1Prober.h" 40 | #include "prmem.h" 41 | #include 42 | 43 | #define UDF 0 // undefined 44 | #define OTH 1 //other 45 | #define ASC 2 // ascii capital letter 46 | #define ASS 3 // ascii small letter 47 | #define ACV 4 // accent capital vowel 48 | #define ACO 5 // accent capital other 49 | #define ASV 6 // accent small vowel 50 | #define ASO 7 // accent small other 51 | #define CLASS_NUM 8 // total classes 52 | 53 | static unsigned char Latin1_CharToClass[] = 54 | { 55 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 00 - 07 56 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 08 - 0F 57 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 10 - 17 58 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 18 - 1F 59 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 20 - 27 60 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 28 - 2F 61 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 30 - 37 62 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 38 - 3F 63 | OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, // 40 - 47 64 | ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, // 48 - 4F 65 | ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, // 50 - 57 66 | ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, // 58 - 5F 67 | OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, // 60 - 67 68 | ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, // 68 - 6F 69 | ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, // 70 - 77 70 | ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, // 78 - 7F 71 | OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, // 80 - 87 72 | OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, // 88 - 8F 73 | UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 90 - 97 74 | OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, // 98 - 9F 75 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // A0 - A7 76 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // A8 - AF 77 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // B0 - B7 78 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // B8 - BF 79 | ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, // C0 - C7 80 | ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, // C8 - CF 81 | ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, // D0 - D7 82 | ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, // D8 - DF 83 | ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, // E0 - E7 84 | ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, // E8 - EF 85 | ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, // F0 - F7 86 | ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, // F8 - FF 87 | }; 88 | 89 | 90 | /* 0 : illegal 91 | 1 : very unlikely 92 | 2 : normal 93 | 3 : very likely 94 | */ 95 | static unsigned char Latin1ClassModel[] = 96 | { 97 | /* UDF OTH ASC ASS ACV ACO ASV ASO */ 98 | /*UDF*/ 0, 0, 0, 0, 0, 0, 0, 0, 99 | /*OTH*/ 0, 3, 3, 3, 3, 3, 3, 3, 100 | /*ASC*/ 0, 3, 3, 3, 3, 3, 3, 3, 101 | /*ASS*/ 0, 3, 3, 3, 1, 1, 3, 3, 102 | /*ACV*/ 0, 3, 3, 3, 1, 2, 1, 2, 103 | /*ACO*/ 0, 3, 3, 3, 3, 3, 3, 3, 104 | /*ASV*/ 0, 3, 1, 3, 1, 1, 1, 3, 105 | /*ASO*/ 0, 3, 1, 3, 1, 1, 3, 3, 106 | }; 107 | 108 | void nsLatin1Prober::Reset(void) 109 | { 110 | mState = eDetecting; 111 | mLastCharClass = OTH; 112 | for (int i = 0; i < FREQ_CAT_NUM; i++) 113 | mFreqCounter[i] = 0; 114 | } 115 | 116 | 117 | nsProbingState nsLatin1Prober::HandleData(const char* aBuf, PRUint32 aLen) 118 | { 119 | char *newBuf1 = 0; 120 | PRUint32 newLen1 = 0; 121 | 122 | if (!FilterWithEnglishLetters(aBuf, aLen, &newBuf1, newLen1)) { 123 | newBuf1 = (char*)aBuf; 124 | newLen1 = aLen; 125 | } 126 | 127 | unsigned char charClass; 128 | unsigned char freq; 129 | for (PRUint32 i = 0; i < newLen1; i++) 130 | { 131 | charClass = Latin1_CharToClass[(unsigned char)newBuf1[i]]; 132 | freq = Latin1ClassModel[mLastCharClass*CLASS_NUM + charClass]; 133 | if (freq == 0) { 134 | mState = eNotMe; 135 | break; 136 | } 137 | mFreqCounter[freq]++; 138 | mLastCharClass = charClass; 139 | } 140 | 141 | if (newBuf1 != aBuf) 142 | PR_FREEIF(newBuf1); 143 | 144 | return mState; 145 | } 146 | 147 | float nsLatin1Prober::GetConfidence(void) 148 | { 149 | if (mState == eNotMe) 150 | return 0.01f; 151 | 152 | float confidence; 153 | PRUint32 total = 0; 154 | for (PRInt32 i = 0; i < FREQ_CAT_NUM; i++) 155 | total += mFreqCounter[i]; 156 | 157 | if(!total) 158 | confidence = 0.0f; 159 | else 160 | { 161 | confidence = mFreqCounter[3]*1.0f / total; 162 | confidence -= mFreqCounter[1]*20.0f/total; 163 | } 164 | 165 | if (confidence < 0.0f) 166 | confidence = 0.0f; 167 | 168 | // lower the confidence of latin1 so that other more accurate detector 169 | // can take priority. 170 | confidence *= 0.50f; 171 | 172 | return confidence; 173 | } 174 | 175 | #ifdef DEBUG_chardet 176 | void nsLatin1Prober::DumpStatus() 177 | { 178 | printf(" Latin1Prober: %1.3f [%s]\r\n", GetConfidence(), GetCharSetName()); 179 | } 180 | #endif 181 | 182 | 183 | -------------------------------------------------------------------------------- /nspr-emu/obsolete/protypes.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is the Netscape Portable Runtime (NSPR). 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 1998-2000 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | 38 | /* 39 | * This header typedefs the old 'native' types to the new PRs. 40 | * These definitions are scheduled to be eliminated at the earliest 41 | * possible time. The NSPR API is implemented and documented using 42 | * the new definitions. 43 | */ 44 | 45 | #if !defined(PROTYPES_H) 46 | #define PROTYPES_H 47 | 48 | typedef PRUintn uintn; 49 | #ifndef _XP_Core_ 50 | typedef PRIntn intn; 51 | #endif 52 | 53 | /* 54 | * It is trickier to define uint, int8, uint8, int16, uint16, 55 | * int32, uint32, int64, and uint64 because some of these int 56 | * types are defined by standard header files on some platforms. 57 | * Our strategy here is to include all such standard headers 58 | * first, and then define these int types only if they are not 59 | * defined by those standard headers. 60 | */ 61 | 62 | /* 63 | * BeOS defines all the int types below in its standard header 64 | * file SupportDefs.h. 65 | */ 66 | #ifdef XP_BEOS 67 | #include 68 | #endif 69 | 70 | /* 71 | * SVR4 typedef of uint is commonly found on UNIX machines. 72 | * 73 | * On AIX 4.3, sys/inttypes.h (which is included by sys/types.h) 74 | * defines the types int8, int16, int32, and int64. 75 | * 76 | * On OS/2, sys/types.h defines uint. 77 | */ 78 | #if defined(XP_UNIX) || defined(XP_OS2) 79 | #include 80 | #endif 81 | 82 | /* model.h on HP-UX defines int8, int16, and int32. */ 83 | #ifdef HPUX 84 | #include 85 | #endif 86 | 87 | /* 88 | * uint 89 | */ 90 | 91 | #if !defined(XP_BEOS) && !defined(XP_OS2) && !defined(XP_UNIX) || defined(NTO) 92 | typedef PRUintn uint; 93 | #endif 94 | 95 | /* 96 | * uint64 97 | */ 98 | 99 | #if !defined(XP_BEOS) 100 | typedef PRUint64 uint64; 101 | #endif 102 | 103 | /* 104 | * uint32 105 | */ 106 | 107 | #if !defined(XP_BEOS) 108 | #if !defined(_WIN32) && !defined(XP_OS2) && !defined(NTO) 109 | typedef PRUint32 uint32; 110 | #else 111 | typedef unsigned long uint32; 112 | #endif 113 | #endif 114 | 115 | /* 116 | * uint16 117 | */ 118 | 119 | #if !defined(XP_BEOS) 120 | typedef PRUint16 uint16; 121 | #endif 122 | 123 | /* 124 | * uint8 125 | */ 126 | 127 | #if !defined(XP_BEOS) 128 | typedef PRUint8 uint8; 129 | #endif 130 | 131 | /* 132 | * int64 133 | */ 134 | 135 | #if !defined(XP_BEOS) && !defined(_PR_AIX_HAVE_BSD_INT_TYPES) 136 | typedef PRInt64 int64; 137 | #endif 138 | 139 | /* 140 | * int32 141 | */ 142 | 143 | #if !defined(XP_BEOS) && !defined(_PR_AIX_HAVE_BSD_INT_TYPES) \ 144 | && !defined(HPUX) 145 | #if !defined(_WIN32) && !defined(XP_OS2) && !defined(NTO) 146 | typedef PRInt32 int32; 147 | #else 148 | typedef long int32; 149 | #endif 150 | #endif 151 | 152 | /* 153 | * int16 154 | */ 155 | 156 | #if !defined(XP_BEOS) && !defined(_PR_AIX_HAVE_BSD_INT_TYPES) \ 157 | && !defined(HPUX) 158 | typedef PRInt16 int16; 159 | #endif 160 | 161 | /* 162 | * int8 163 | */ 164 | 165 | #if !defined(XP_BEOS) && !defined(_PR_AIX_HAVE_BSD_INT_TYPES) \ 166 | && !defined(HPUX) 167 | typedef PRInt8 int8; 168 | #endif 169 | 170 | typedef PRFloat64 float64; 171 | typedef PRUptrdiff uptrdiff_t; 172 | typedef PRUword uprword_t; 173 | typedef PRWord prword_t; 174 | 175 | 176 | /* Re: prbit.h */ 177 | #define TEST_BIT PR_TEST_BIT 178 | #define SET_BIT PR_SET_BIT 179 | #define CLEAR_BIT PR_CLEAR_BIT 180 | 181 | /* Re: prarena.h->plarena.h */ 182 | #define PRArena PLArena 183 | #define PRArenaPool PLArenaPool 184 | #define PRArenaStats PLArenaStats 185 | #define PR_ARENA_ALIGN PL_ARENA_ALIGN 186 | #define PR_INIT_ARENA_POOL PL_INIT_ARENA_POOL 187 | #define PR_ARENA_ALLOCATE PL_ARENA_ALLOCATE 188 | #define PR_ARENA_GROW PL_ARENA_GROW 189 | #define PR_ARENA_MARK PL_ARENA_MARK 190 | #define PR_CLEAR_UNUSED PL_CLEAR_UNUSED 191 | #define PR_CLEAR_ARENA PL_CLEAR_ARENA 192 | #define PR_ARENA_RELEASE PL_ARENA_RELEASE 193 | #define PR_COUNT_ARENA PL_COUNT_ARENA 194 | #define PR_ARENA_DESTROY PL_ARENA_DESTROY 195 | #define PR_InitArenaPool PL_InitArenaPool 196 | #define PR_FreeArenaPool PL_FreeArenaPool 197 | #define PR_FinishArenaPool PL_FinishArenaPool 198 | #define PR_CompactArenaPool PL_CompactArenaPool 199 | #define PR_ArenaFinish PL_ArenaFinish 200 | #define PR_ArenaAllocate PL_ArenaAllocate 201 | #define PR_ArenaGrow PL_ArenaGrow 202 | #define PR_ArenaRelease PL_ArenaRelease 203 | #define PR_ArenaCountAllocation PL_ArenaCountAllocation 204 | #define PR_ArenaCountInplaceGrowth PL_ArenaCountInplaceGrowth 205 | #define PR_ArenaCountGrowth PL_ArenaCountGrowth 206 | #define PR_ArenaCountRelease PL_ArenaCountRelease 207 | #define PR_ArenaCountRetract PL_ArenaCountRetract 208 | 209 | /* Re: prhash.h->plhash.h */ 210 | #define PRHashEntry PLHashEntry 211 | #define PRHashTable PLHashTable 212 | #define PRHashNumber PLHashNumber 213 | #define PRHashFunction PLHashFunction 214 | #define PRHashComparator PLHashComparator 215 | #define PRHashEnumerator PLHashEnumerator 216 | #define PRHashAllocOps PLHashAllocOps 217 | #define PR_NewHashTable PL_NewHashTable 218 | #define PR_HashTableDestroy PL_HashTableDestroy 219 | #define PR_HashTableRawLookup PL_HashTableRawLookup 220 | #define PR_HashTableRawAdd PL_HashTableRawAdd 221 | #define PR_HashTableRawRemove PL_HashTableRawRemove 222 | #define PR_HashTableAdd PL_HashTableAdd 223 | #define PR_HashTableRemove PL_HashTableRemove 224 | #define PR_HashTableEnumerateEntries PL_HashTableEnumerateEntries 225 | #define PR_HashTableLookup PL_HashTableLookup 226 | #define PR_HashTableDump PL_HashTableDump 227 | #define PR_HashString PL_HashString 228 | #define PR_CompareStrings PL_CompareStrings 229 | #define PR_CompareValues PL_CompareValues 230 | 231 | #endif /* !defined(PROTYPES_H) */ 232 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsSBCSGroupProber.cpp: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is Mozilla Universal charset detector code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 2001 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * Shy Shalom 24 | * 25 | * Alternatively, the contents of this file may be used under the terms of 26 | * either the GNU General Public License Version 2 or later (the "GPL"), or 27 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 28 | * in which case the provisions of the GPL or the LGPL are applicable instead 29 | * of those above. If you wish to allow use of your version of this file only 30 | * under the terms of either the GPL or the LGPL, and not to allow others to 31 | * use your version of this file under the terms of the MPL, indicate your 32 | * decision by deleting the provisions above and replace them with the notice 33 | * and other provisions required by the GPL or the LGPL. If you do not delete 34 | * the provisions above, a recipient may use your version of this file under 35 | * the terms of any one of the MPL, the GPL or the LGPL. 36 | * 37 | * ***** END LICENSE BLOCK ***** */ 38 | 39 | #include 40 | #include "prmem.h" 41 | 42 | #include "nsSBCharSetProber.h" 43 | #include "nsSBCSGroupProber.h" 44 | 45 | #include "nsHebrewProber.h" 46 | 47 | nsSBCSGroupProber::nsSBCSGroupProber() 48 | { 49 | mProbers[0] = new nsSingleByteCharSetProber(&Win1251Model); 50 | mProbers[1] = new nsSingleByteCharSetProber(&Koi8rModel); 51 | mProbers[2] = new nsSingleByteCharSetProber(&Latin5Model); 52 | mProbers[3] = new nsSingleByteCharSetProber(&MacCyrillicModel); 53 | mProbers[4] = new nsSingleByteCharSetProber(&Ibm866Model); 54 | mProbers[5] = new nsSingleByteCharSetProber(&Ibm855Model); 55 | mProbers[6] = new nsSingleByteCharSetProber(&Latin7Model); 56 | mProbers[7] = new nsSingleByteCharSetProber(&Win1253Model); 57 | mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel); 58 | mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel); 59 | 60 | nsHebrewProber *hebprober = new nsHebrewProber(); 61 | // Notice: Any change in these indexes - 10,11,12 must be reflected 62 | // in the code below as well. 63 | mProbers[10] = hebprober; 64 | mProbers[11] = new nsSingleByteCharSetProber(&Win1255Model, PR_FALSE, hebprober); // Logical Hebrew 65 | mProbers[12] = new nsSingleByteCharSetProber(&Win1255Model, PR_TRUE, hebprober); // Visual Hebrew 66 | // Tell the Hebrew prober about the logical and visual probers 67 | if (mProbers[10] && mProbers[11] && mProbers[12]) // all are not null 68 | { 69 | hebprober->SetModelProbers(mProbers[11], mProbers[12]); 70 | } 71 | else // One or more is null. avoid any Hebrew probing, null them all 72 | { 73 | for (PRUint32 i = 10; i <= 12; ++i) 74 | { 75 | delete mProbers[i]; 76 | mProbers[i] = 0; 77 | } 78 | } 79 | 80 | // disable latin2 before latin1 is available, otherwise all latin1 81 | // will be detected as latin2 because of their similarity. 82 | //mProbers[10] = new nsSingleByteCharSetProber(&Latin2HungarianModel); 83 | //mProbers[11] = new nsSingleByteCharSetProber(&Win1250HungarianModel); 84 | 85 | Reset(); 86 | } 87 | 88 | nsSBCSGroupProber::~nsSBCSGroupProber() 89 | { 90 | for (PRUint32 i = 0; i < NUM_OF_SBCS_PROBERS; i++) 91 | { 92 | delete mProbers[i]; 93 | } 94 | } 95 | 96 | 97 | const char* nsSBCSGroupProber::GetCharSetName() 98 | { 99 | //if we have no answer yet 100 | if (mBestGuess == -1) 101 | { 102 | GetConfidence(); 103 | //no charset seems positive 104 | if (mBestGuess == -1) 105 | //we will use default. 106 | mBestGuess = 0; 107 | } 108 | return mProbers[mBestGuess]->GetCharSetName(); 109 | } 110 | 111 | void nsSBCSGroupProber::Reset(void) 112 | { 113 | mActiveNum = 0; 114 | for (PRUint32 i = 0; i < NUM_OF_SBCS_PROBERS; i++) 115 | { 116 | if (mProbers[i]) // not null 117 | { 118 | mProbers[i]->Reset(); 119 | mIsActive[i] = PR_TRUE; 120 | ++mActiveNum; 121 | } 122 | else 123 | mIsActive[i] = PR_FALSE; 124 | } 125 | mBestGuess = -1; 126 | mState = eDetecting; 127 | } 128 | 129 | 130 | nsProbingState nsSBCSGroupProber::HandleData(const char* aBuf, PRUint32 aLen) 131 | { 132 | nsProbingState st; 133 | PRUint32 i; 134 | char *newBuf1 = 0; 135 | PRUint32 newLen1 = 0; 136 | 137 | //apply filter to original buffer, and we got new buffer back 138 | //depend on what script it is, we will feed them the new buffer 139 | //we got after applying proper filter 140 | //this is done without any consideration to KeepEnglishLetters 141 | //of each prober since as of now, there are no probers here which 142 | //recognize languages with English characters. 143 | if (!FilterWithoutEnglishLetters(aBuf, aLen, &newBuf1, newLen1)) 144 | goto done; 145 | 146 | if (newLen1 == 0) 147 | goto done; // Nothing to see here, move on. 148 | 149 | for (i = 0; i < NUM_OF_SBCS_PROBERS; i++) 150 | { 151 | if (!mIsActive[i]) 152 | continue; 153 | st = mProbers[i]->HandleData(newBuf1, newLen1); 154 | if (st == eFoundIt) 155 | { 156 | mBestGuess = i; 157 | mState = eFoundIt; 158 | break; 159 | } 160 | else if (st == eNotMe) 161 | { 162 | mIsActive[i] = PR_FALSE; 163 | mActiveNum--; 164 | if (mActiveNum <= 0) 165 | { 166 | mState = eNotMe; 167 | break; 168 | } 169 | } 170 | } 171 | 172 | done: 173 | PR_FREEIF(newBuf1); 174 | 175 | return mState; 176 | } 177 | 178 | float nsSBCSGroupProber::GetConfidence(void) 179 | { 180 | PRUint32 i; 181 | float bestConf = 0.0, cf; 182 | 183 | switch (mState) 184 | { 185 | case eFoundIt: 186 | return (float)0.99; //sure yes 187 | case eNotMe: 188 | return (float)0.01; //sure no 189 | default: 190 | for (i = 0; i < NUM_OF_SBCS_PROBERS; i++) 191 | { 192 | if (!mIsActive[i]) 193 | continue; 194 | cf = mProbers[i]->GetConfidence(); 195 | if (bestConf < cf) 196 | { 197 | bestConf = cf; 198 | mBestGuess = i; 199 | } 200 | } 201 | } 202 | return bestConf; 203 | } 204 | 205 | #ifdef DEBUG_chardet 206 | void nsSBCSGroupProber::DumpStatus() 207 | { 208 | PRUint32 i; 209 | float cf; 210 | 211 | cf = GetConfidence(); 212 | printf(" SBCS Group Prober --------begin status \r\n"); 213 | for (i = 0; i < NUM_OF_SBCS_PROBERS; i++) 214 | { 215 | if (!mIsActive[i]) 216 | printf(" inactive: [%s] (i.e. confidence is too low).\r\n", mProbers[i]->GetCharSetName()); 217 | else 218 | mProbers[i]->DumpStatus(); 219 | } 220 | printf(" SBCS Group found best match [%s] confidence %f.\r\n", 221 | mProbers[mBestGuess]->GetCharSetName(), cf); 222 | } 223 | #endif 224 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsHebrewProber.cpp: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is Mozilla Universal charset detector code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Shy Shalom 19 | * Portions created by the Initial Developer are Copyright (C) 2005 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | 38 | #include "nsHebrewProber.h" 39 | #include 40 | 41 | // windows-1255 / ISO-8859-8 code points of interest 42 | #define FINAL_KAF ('\xea') 43 | #define NORMAL_KAF ('\xeb') 44 | #define FINAL_MEM ('\xed') 45 | #define NORMAL_MEM ('\xee') 46 | #define FINAL_NUN ('\xef') 47 | #define NORMAL_NUN ('\xf0') 48 | #define FINAL_PE ('\xf3') 49 | #define NORMAL_PE ('\xf4') 50 | #define FINAL_TSADI ('\xf5') 51 | #define NORMAL_TSADI ('\xf6') 52 | 53 | // Minimum Visual vs Logical final letter score difference. 54 | // If the difference is below this, don't rely solely on the final letter score distance. 55 | #define MIN_FINAL_CHAR_DISTANCE (5) 56 | 57 | // Minimum Visual vs Logical model score difference. 58 | // If the difference is below this, don't rely at all on the model score distance. 59 | #define MIN_MODEL_DISTANCE (0.01) 60 | 61 | #define VISUAL_HEBREW_NAME ("ISO-8859-8") 62 | #define LOGICAL_HEBREW_NAME ("windows-1255") 63 | 64 | PRBool nsHebrewProber::isFinal(char c) 65 | { 66 | return ((c == FINAL_KAF) || (c == FINAL_MEM) || (c == FINAL_NUN) || (c == FINAL_PE) || (c == FINAL_TSADI)); 67 | } 68 | 69 | PRBool nsHebrewProber::isNonFinal(char c) 70 | { 71 | return ((c == NORMAL_KAF) || (c == NORMAL_MEM) || (c == NORMAL_NUN) || (c == NORMAL_PE)); 72 | // The normal Tsadi is not a good Non-Final letter due to words like 73 | // 'lechotet' (to chat) containing an apostrophe after the tsadi. This 74 | // apostrophe is converted to a space in FilterWithoutEnglishLetters causing 75 | // the Non-Final tsadi to appear at an end of a word even though this is not 76 | // the case in the original text. 77 | // The letters Pe and Kaf rarely display a related behavior of not being a 78 | // good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak' for 79 | // example legally end with a Non-Final Pe or Kaf. However, the benefit of 80 | // these letters as Non-Final letters outweighs the damage since these words 81 | // are quite rare. 82 | } 83 | 84 | /** HandleData 85 | * Final letter analysis for logical-visual decision. 86 | * Look for evidence that the received buffer is either logical Hebrew or 87 | * visual Hebrew. 88 | * The following cases are checked: 89 | * 1) A word longer than 1 letter, ending with a final letter. This is an 90 | * indication that the text is laid out "naturally" since the final letter 91 | * really appears at the end. +1 for logical score. 92 | * 2) A word longer than 1 letter, ending with a Non-Final letter. In normal 93 | * Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi, should not end with 94 | * the Non-Final form of that letter. Exceptions to this rule are mentioned 95 | * above in isNonFinal(). This is an indication that the text is laid out 96 | * backwards. +1 for visual score 97 | * 3) A word longer than 1 letter, starting with a final letter. Final letters 98 | * should not appear at the beginning of a word. This is an indication that 99 | * the text is laid out backwards. +1 for visual score. 100 | * 101 | * The visual score and logical score are accumulated throughout the text and 102 | * are finally checked against each other in GetCharSetName(). 103 | * No checking for final letters in the middle of words is done since that case 104 | * is not an indication for either Logical or Visual text. 105 | * 106 | * The input buffer should not contain any white spaces that are not (' ') 107 | * or any low-ascii punctuation marks. 108 | */ 109 | nsProbingState nsHebrewProber::HandleData(const char* aBuf, PRUint32 aLen) 110 | { 111 | // Both model probers say it's not them. No reason to continue. 112 | if (GetState() == eNotMe) 113 | return eNotMe; 114 | 115 | const char *curPtr, *endPtr = aBuf+aLen; 116 | char cur; 117 | 118 | for (curPtr = (char*)aBuf; curPtr < endPtr; ++curPtr) 119 | { 120 | cur = *curPtr; 121 | if (cur == ' ') // We stand on a space - a word just ended 122 | { 123 | if (mBeforePrev != ' ') // *(curPtr-2) was not a space so prev is not a 1 letter word 124 | { 125 | if (isFinal(mPrev)) // case (1) [-2:not space][-1:final letter][cur:space] 126 | ++mFinalCharLogicalScore; 127 | else if (isNonFinal(mPrev)) // case (2) [-2:not space][-1:Non-Final letter][cur:space] 128 | ++mFinalCharVisualScore; 129 | } 130 | } 131 | else // Not standing on a space 132 | { 133 | if ((mBeforePrev == ' ') && (isFinal(mPrev)) && (cur != ' ')) // case (3) [-2:space][-1:final letter][cur:not space] 134 | ++mFinalCharVisualScore; 135 | } 136 | mBeforePrev = mPrev; 137 | mPrev = cur; 138 | } 139 | 140 | // Forever detecting, till the end or until both model probers return eNotMe (handled above). 141 | return eDetecting; 142 | } 143 | 144 | // Make the decision: is it Logical or Visual? 145 | const char* nsHebrewProber::GetCharSetName() 146 | { 147 | // If the final letter score distance is dominant enough, rely on it. 148 | PRInt32 finalsub = mFinalCharLogicalScore - mFinalCharVisualScore; 149 | if (finalsub >= MIN_FINAL_CHAR_DISTANCE) 150 | return LOGICAL_HEBREW_NAME; 151 | if (finalsub <= -(MIN_FINAL_CHAR_DISTANCE)) 152 | return VISUAL_HEBREW_NAME; 153 | 154 | // It's not dominant enough, try to rely on the model scores instead. 155 | float modelsub = mLogicalProb->GetConfidence() - mVisualProb->GetConfidence(); 156 | if (modelsub > MIN_MODEL_DISTANCE) 157 | return LOGICAL_HEBREW_NAME; 158 | if (modelsub < -(MIN_MODEL_DISTANCE)) 159 | return VISUAL_HEBREW_NAME; 160 | 161 | // Still no good, back to final letter distance, maybe it'll save the day. 162 | if (finalsub < 0) 163 | return VISUAL_HEBREW_NAME; 164 | 165 | // (finalsub > 0 - Logical) or (don't know what to do) default to Logical. 166 | return LOGICAL_HEBREW_NAME; 167 | } 168 | 169 | 170 | void nsHebrewProber::Reset(void) 171 | { 172 | mFinalCharLogicalScore = 0; 173 | mFinalCharVisualScore = 0; 174 | 175 | // mPrev and mBeforePrev are initialized to space in order to simulate a word 176 | // delimiter at the beginning of the data 177 | mPrev = ' '; 178 | mBeforePrev = ' '; 179 | } 180 | 181 | nsProbingState nsHebrewProber::GetState(void) 182 | { 183 | // Remain active as long as any of the model probers are active. 184 | if ((mLogicalProb->GetState() == eNotMe) && (mVisualProb->GetState() == eNotMe)) 185 | return eNotMe; 186 | return eDetecting; 187 | } 188 | 189 | #ifdef DEBUG_chardet 190 | void nsHebrewProber::DumpStatus() 191 | { 192 | printf(" HEB: %d - %d [Logical-Visual score]\r\n", mFinalCharLogicalScore, mFinalCharVisualScore); 193 | } 194 | #endif 195 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/CharDistribution.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is Mozilla Communicator client code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 1998 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | 38 | #ifndef CharDistribution_h__ 39 | #define CharDistribution_h__ 40 | 41 | #include "nscore.h" 42 | 43 | #define ENOUGH_DATA_THRESHOLD 1024 44 | 45 | class CharDistributionAnalysis 46 | { 47 | public: 48 | CharDistributionAnalysis() {Reset();} 49 | 50 | //feed a block of data and do distribution analysis 51 | void HandleData(const char* aBuf, PRUint32 aLen) {} 52 | 53 | //Feed a character with known length 54 | void HandleOneChar(const char* aStr, PRUint32 aCharLen) 55 | { 56 | PRInt32 order; 57 | 58 | //we only care about 2-bytes character in our distribution analysis 59 | order = (aCharLen == 2) ? GetOrder(aStr) : -1; 60 | 61 | if (order >= 0) 62 | { 63 | mTotalChars++; 64 | //order is valid 65 | if ((PRUint32)order < mTableSize) 66 | { 67 | if (512 > mCharToFreqOrder[order]) 68 | mFreqChars++; 69 | } 70 | } 71 | } 72 | 73 | //return confidence base on existing data 74 | float GetConfidence(); 75 | 76 | //Reset analyser, clear any state 77 | void Reset(void) 78 | { 79 | mDone = PR_FALSE; 80 | mTotalChars = 0; 81 | mFreqChars = 0; 82 | } 83 | 84 | //This function is for future extension. Caller can use this function to control 85 | //analyser's behavior 86 | void SetOpion(){} 87 | 88 | //It is not necessary to receive all data to draw conclusion. For charset detection, 89 | // certain amount of data is enough 90 | PRBool GotEnoughData() {return mTotalChars > ENOUGH_DATA_THRESHOLD;} 91 | 92 | protected: 93 | //we do not handle character base on its original encoding string, but 94 | //convert this encoding string to a number, here called order. 95 | //This allow multiple encoding of a language to share one frequency table 96 | virtual PRInt32 GetOrder(const char* str) {return -1;} 97 | 98 | //If this flag is set to PR_TRUE, detection is done and conclusion has been made 99 | PRBool mDone; 100 | 101 | //The number of characters whose frequency order is less than 512 102 | PRUint32 mFreqChars; 103 | 104 | //Total character encounted. 105 | PRUint32 mTotalChars; 106 | 107 | //Mapping table to get frequency order from char order (get from GetOrder()) 108 | const PRInt16 *mCharToFreqOrder; 109 | 110 | //Size of above table 111 | PRUint32 mTableSize; 112 | 113 | //This is a constant value varies from language to language, it is used in 114 | //calculating confidence. See my paper for further detail. 115 | float mTypicalDistributionRatio; 116 | }; 117 | 118 | 119 | class EUCTWDistributionAnalysis: public CharDistributionAnalysis 120 | { 121 | public: 122 | EUCTWDistributionAnalysis(); 123 | protected: 124 | 125 | //for euc-TW encoding, we are interested 126 | // first byte range: 0xc4 -- 0xfe 127 | // second byte range: 0xa1 -- 0xfe 128 | //no validation needed here. State machine has done that 129 | PRInt32 GetOrder(const char* str) 130 | { if ((unsigned char)*str >= (unsigned char)0xc4) 131 | return 94*((unsigned char)str[0]-(unsigned char)0xc4) + (unsigned char)str[1] - (unsigned char)0xa1; 132 | else 133 | return -1; 134 | } 135 | }; 136 | 137 | 138 | class EUCKRDistributionAnalysis : public CharDistributionAnalysis 139 | { 140 | public: 141 | EUCKRDistributionAnalysis(); 142 | protected: 143 | //for euc-KR encoding, we are interested 144 | // first byte range: 0xb0 -- 0xfe 145 | // second byte range: 0xa1 -- 0xfe 146 | //no validation needed here. State machine has done that 147 | PRInt32 GetOrder(const char* str) 148 | { if ((unsigned char)*str >= (unsigned char)0xb0) 149 | return 94*((unsigned char)str[0]-(unsigned char)0xb0) + (unsigned char)str[1] - (unsigned char)0xa1; 150 | else 151 | return -1; 152 | } 153 | }; 154 | 155 | class GB2312DistributionAnalysis : public CharDistributionAnalysis 156 | { 157 | public: 158 | GB2312DistributionAnalysis(); 159 | protected: 160 | //for GB2312 encoding, we are interested 161 | // first byte range: 0xb0 -- 0xfe 162 | // second byte range: 0xa1 -- 0xfe 163 | //no validation needed here. State machine has done that 164 | PRInt32 GetOrder(const char* str) 165 | { if ((unsigned char)*str >= (unsigned char)0xb0 && (unsigned char)str[1] >= (unsigned char)0xa1) 166 | return 94*((unsigned char)str[0]-(unsigned char)0xb0) + (unsigned char)str[1] - (unsigned char)0xa1; 167 | else 168 | return -1; 169 | } 170 | }; 171 | 172 | 173 | class Big5DistributionAnalysis : public CharDistributionAnalysis 174 | { 175 | public: 176 | Big5DistributionAnalysis(); 177 | protected: 178 | //for big5 encoding, we are interested 179 | // first byte range: 0xa4 -- 0xfe 180 | // second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe 181 | //no validation needed here. State machine has done that 182 | PRInt32 GetOrder(const char* str) 183 | { if ((unsigned char)*str >= (unsigned char)0xa4) 184 | if ((unsigned char)str[1] >= (unsigned char)0xa1) 185 | return 157*((unsigned char)str[0]-(unsigned char)0xa4) + (unsigned char)str[1] - (unsigned char)0xa1 +63; 186 | else 187 | return 157*((unsigned char)str[0]-(unsigned char)0xa4) + (unsigned char)str[1] - (unsigned char)0x40; 188 | else 189 | return -1; 190 | } 191 | }; 192 | 193 | class SJISDistributionAnalysis : public CharDistributionAnalysis 194 | { 195 | public: 196 | SJISDistributionAnalysis(); 197 | protected: 198 | //for sjis encoding, we are interested 199 | // first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe 200 | // second byte range: 0x40 -- 0x7e, 0x81 -- oxfe 201 | //no validation needed here. State machine has done that 202 | PRInt32 GetOrder(const char* str) 203 | { 204 | PRInt32 order; 205 | if ((unsigned char)*str >= (unsigned char)0x81 && (unsigned char)*str <= (unsigned char)0x9f) 206 | order = 188 * ((unsigned char)str[0]-(unsigned char)0x81); 207 | else if ((unsigned char)*str >= (unsigned char)0xe0 && (unsigned char)*str <= (unsigned char)0xef) 208 | order = 188 * ((unsigned char)str[0]-(unsigned char)0xe0 + 31); 209 | else 210 | return -1; 211 | order += (unsigned char)*(str+1) - 0x40; 212 | if ((unsigned char)str[1] > (unsigned char)0x7f) 213 | order--; 214 | return order; 215 | } 216 | }; 217 | 218 | class EUCJPDistributionAnalysis : public CharDistributionAnalysis 219 | { 220 | public: 221 | EUCJPDistributionAnalysis(); 222 | protected: 223 | //for euc-JP encoding, we are interested 224 | // first byte range: 0xa0 -- 0xfe 225 | // second byte range: 0xa1 -- 0xfe 226 | //no validation needed here. State machine has done that 227 | PRInt32 GetOrder(const char* str) 228 | { if ((unsigned char)*str >= (unsigned char)0xa0) 229 | return 94*((unsigned char)str[0]-(unsigned char)0xa1) + (unsigned char)str[1] - (unsigned char)0xa1; 230 | else 231 | return -1; 232 | } 233 | }; 234 | 235 | #endif //CharDistribution_h__ 236 | 237 | -------------------------------------------------------------------------------- /nspr-emu/prcpucfg_win.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is the Netscape Portable Runtime (NSPR). 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 1998-2000 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | 38 | #ifndef nspr_cpucfg___ 39 | #define nspr_cpucfg___ 40 | 41 | #ifndef XP_PC 42 | #define XP_PC 43 | #endif 44 | 45 | #ifndef WIN32 46 | #define WIN32 47 | #endif 48 | 49 | #ifndef WIN95 50 | #define WIN95 51 | #endif 52 | 53 | #define PR_AF_INET6 23 /* same as AF_INET6 */ 54 | 55 | #if defined(_M_IX86) || defined(_X86_) 56 | 57 | #define IS_LITTLE_ENDIAN 1 58 | #undef IS_BIG_ENDIAN 59 | 60 | #define PR_BYTES_PER_BYTE 1 61 | #define PR_BYTES_PER_SHORT 2 62 | #define PR_BYTES_PER_INT 4 63 | #define PR_BYTES_PER_INT64 8 64 | #define PR_BYTES_PER_LONG 4 65 | #define PR_BYTES_PER_FLOAT 4 66 | #define PR_BYTES_PER_WORD 4 67 | #define PR_BYTES_PER_DWORD 8 68 | #define PR_BYTES_PER_DOUBLE 8 69 | 70 | #define PR_BITS_PER_BYTE 8 71 | #define PR_BITS_PER_SHORT 16 72 | #define PR_BITS_PER_INT 32 73 | #define PR_BITS_PER_INT64 64 74 | #define PR_BITS_PER_LONG 32 75 | #define PR_BITS_PER_FLOAT 32 76 | #define PR_BITS_PER_WORD 32 77 | #define PR_BITS_PER_DWORD 64 78 | #define PR_BITS_PER_DOUBLE 64 79 | 80 | #define PR_BITS_PER_BYTE_LOG2 3 81 | #define PR_BITS_PER_SHORT_LOG2 4 82 | #define PR_BITS_PER_INT_LOG2 5 83 | #define PR_BITS_PER_INT64_LOG2 6 84 | #define PR_BITS_PER_LONG_LOG2 5 85 | #define PR_BITS_PER_FLOAT_LOG2 5 86 | #define PR_BITS_PER_WORD_LOG2 5 87 | #define PR_BITS_PER_DWORD_LOG2 6 88 | #define PR_BITS_PER_DOUBLE_LOG2 6 89 | 90 | #define PR_ALIGN_OF_SHORT 2 91 | #define PR_ALIGN_OF_INT 4 92 | #define PR_ALIGN_OF_LONG 4 93 | #define PR_ALIGN_OF_INT64 8 94 | #define PR_ALIGN_OF_FLOAT 4 95 | #define PR_ALIGN_OF_WORD 4 96 | #define PR_ALIGN_OF_DWORD 8 97 | #define PR_ALIGN_OF_DOUBLE 4 98 | #define PR_ALIGN_OF_POINTER 4 99 | 100 | #define PR_BYTES_PER_WORD_LOG2 2 101 | #define PR_BYTES_PER_DWORD_LOG2 2 102 | 103 | #elif defined(_M_X64) || defined(_M_AMD64) || defined(_AMD64_) || defined(__x86_64) 104 | 105 | #define IS_LITTLE_ENDIAN 1 106 | #undef IS_BIG_ENDIAN 107 | #define IS_64 108 | 109 | #define PR_BYTES_PER_BYTE 1 110 | #define PR_BYTES_PER_SHORT 2 111 | #define PR_BYTES_PER_INT 4 112 | #define PR_BYTES_PER_INT64 8 113 | #define PR_BYTES_PER_LONG 4 114 | #define PR_BYTES_PER_FLOAT 4 115 | #define PR_BYTES_PER_WORD 8 116 | #define PR_BYTES_PER_DWORD 8 117 | #define PR_BYTES_PER_DOUBLE 8 118 | 119 | #define PR_BITS_PER_BYTE 8 120 | #define PR_BITS_PER_SHORT 16 121 | #define PR_BITS_PER_INT 32 122 | #define PR_BITS_PER_INT64 64 123 | #define PR_BITS_PER_LONG 32 124 | #define PR_BITS_PER_FLOAT 32 125 | #define PR_BITS_PER_WORD 64 126 | #define PR_BITS_PER_DWORD 64 127 | #define PR_BITS_PER_DOUBLE 64 128 | 129 | #define PR_BITS_PER_BYTE_LOG2 3 130 | #define PR_BITS_PER_SHORT_LOG2 4 131 | #define PR_BITS_PER_INT_LOG2 5 132 | #define PR_BITS_PER_INT64_LOG2 6 133 | #define PR_BITS_PER_LONG_LOG2 5 134 | #define PR_BITS_PER_FLOAT_LOG2 5 135 | #define PR_BITS_PER_WORD_LOG2 6 136 | #define PR_BITS_PER_DWORD_LOG2 6 137 | #define PR_BITS_PER_DOUBLE_LOG2 6 138 | 139 | #define PR_ALIGN_OF_SHORT 2 140 | #define PR_ALIGN_OF_INT 4 141 | #define PR_ALIGN_OF_LONG 4 142 | #define PR_ALIGN_OF_INT64 8 143 | #define PR_ALIGN_OF_FLOAT 4 144 | #define PR_ALIGN_OF_WORD 8 145 | #define PR_ALIGN_OF_DWORD 8 146 | #define PR_ALIGN_OF_DOUBLE 8 147 | #define PR_ALIGN_OF_POINTER 8 148 | 149 | #define PR_BYTES_PER_WORD_LOG2 3 150 | #define PR_BYTES_PER_DWORD_LOG2 3 151 | 152 | #elif defined(_M_IA64) || defined(_IA64_) 153 | 154 | #define IS_LITTLE_ENDIAN 1 155 | #undef IS_BIG_ENDIAN 156 | #define IS_64 157 | 158 | #define PR_BYTES_PER_BYTE 1 159 | #define PR_BYTES_PER_SHORT 2 160 | #define PR_BYTES_PER_INT 4 161 | #define PR_BYTES_PER_INT64 8 162 | #define PR_BYTES_PER_LONG 4 163 | #define PR_BYTES_PER_FLOAT 4 164 | #define PR_BYTES_PER_WORD 8 165 | #define PR_BYTES_PER_DWORD 8 166 | #define PR_BYTES_PER_DOUBLE 8 167 | 168 | #define PR_BITS_PER_BYTE 8 169 | #define PR_BITS_PER_SHORT 16 170 | #define PR_BITS_PER_INT 32 171 | #define PR_BITS_PER_INT64 64 172 | #define PR_BITS_PER_LONG 32 173 | #define PR_BITS_PER_FLOAT 32 174 | #define PR_BITS_PER_WORD 64 175 | #define PR_BITS_PER_DWORD 64 176 | #define PR_BITS_PER_DOUBLE 64 177 | 178 | #define PR_BITS_PER_BYTE_LOG2 3 179 | #define PR_BITS_PER_SHORT_LOG2 4 180 | #define PR_BITS_PER_INT_LOG2 5 181 | #define PR_BITS_PER_INT64_LOG2 6 182 | #define PR_BITS_PER_LONG_LOG2 5 183 | #define PR_BITS_PER_FLOAT_LOG2 5 184 | #define PR_BITS_PER_WORD_LOG2 6 185 | #define PR_BITS_PER_DWORD_LOG2 6 186 | #define PR_BITS_PER_DOUBLE_LOG2 6 187 | 188 | #define PR_ALIGN_OF_SHORT 2 189 | #define PR_ALIGN_OF_INT 4 190 | #define PR_ALIGN_OF_LONG 4 191 | #define PR_ALIGN_OF_INT64 8 192 | #define PR_ALIGN_OF_FLOAT 4 193 | #define PR_ALIGN_OF_WORD 8 194 | #define PR_ALIGN_OF_DWORD 8 195 | #define PR_ALIGN_OF_DOUBLE 8 196 | #define PR_ALIGN_OF_POINTER 8 197 | 198 | #define PR_BYTES_PER_WORD_LOG2 3 199 | #define PR_BYTES_PER_DWORD_LOG2 3 200 | 201 | #else /* defined(_M_IX86) || defined(_X86_) */ 202 | 203 | #error unknown processor architecture 204 | 205 | #endif /* defined(_M_IX86) || defined(_X86_) */ 206 | 207 | #ifndef HAVE_LONG_LONG 208 | #define HAVE_LONG_LONG 209 | #endif 210 | 211 | #ifndef NO_NSPR_10_SUPPORT 212 | 213 | #define BYTES_PER_BYTE PR_BYTES_PER_BYTE 214 | #define BYTES_PER_SHORT PR_BYTES_PER_SHORT 215 | #define BYTES_PER_INT PR_BYTES_PER_INT 216 | #define BYTES_PER_INT64 PR_BYTES_PER_INT64 217 | #define BYTES_PER_LONG PR_BYTES_PER_LONG 218 | #define BYTES_PER_FLOAT PR_BYTES_PER_FLOAT 219 | #define BYTES_PER_DOUBLE PR_BYTES_PER_DOUBLE 220 | #define BYTES_PER_WORD PR_BYTES_PER_WORD 221 | #define BYTES_PER_DWORD PR_BYTES_PER_DWORD 222 | 223 | #define BITS_PER_BYTE PR_BITS_PER_BYTE 224 | #define BITS_PER_SHORT PR_BITS_PER_SHORT 225 | #define BITS_PER_INT PR_BITS_PER_INT 226 | #define BITS_PER_INT64 PR_BITS_PER_INT64 227 | #define BITS_PER_LONG PR_BITS_PER_LONG 228 | #define BITS_PER_FLOAT PR_BITS_PER_FLOAT 229 | #define BITS_PER_DOUBLE PR_BITS_PER_DOUBLE 230 | #define BITS_PER_WORD PR_BITS_PER_WORD 231 | 232 | #define BITS_PER_BYTE_LOG2 PR_BITS_PER_BYTE_LOG2 233 | #define BITS_PER_SHORT_LOG2 PR_BITS_PER_SHORT_LOG2 234 | #define BITS_PER_INT_LOG2 PR_BITS_PER_INT_LOG2 235 | #define BITS_PER_INT64_LOG2 PR_BITS_PER_INT64_LOG2 236 | #define BITS_PER_LONG_LOG2 PR_BITS_PER_LONG_LOG2 237 | #define BITS_PER_FLOAT_LOG2 PR_BITS_PER_FLOAT_LOG2 238 | #define BITS_PER_DOUBLE_LOG2 PR_BITS_PER_DOUBLE_LOG2 239 | #define BITS_PER_WORD_LOG2 PR_BITS_PER_WORD_LOG2 240 | 241 | #define ALIGN_OF_SHORT PR_ALIGN_OF_SHORT 242 | #define ALIGN_OF_INT PR_ALIGN_OF_INT 243 | #define ALIGN_OF_LONG PR_ALIGN_OF_LONG 244 | #define ALIGN_OF_INT64 PR_ALIGN_OF_INT64 245 | #define ALIGN_OF_FLOAT PR_ALIGN_OF_FLOAT 246 | #define ALIGN_OF_DOUBLE PR_ALIGN_OF_DOUBLE 247 | #define ALIGN_OF_POINTER PR_ALIGN_OF_POINTER 248 | #define ALIGN_OF_WORD PR_ALIGN_OF_WORD 249 | 250 | #define BYTES_PER_WORD_LOG2 PR_BYTES_PER_WORD_LOG2 251 | #define BYTES_PER_DWORD_LOG2 PR_BYTES_PER_DWORD_LOG2 252 | #define WORDS_PER_DWORD_LOG2 PR_WORDS_PER_DWORD_LOG2 253 | 254 | #endif /* NO_NSPR_10_SUPPORT */ 255 | 256 | #endif /* nspr_cpucfg___ */ 257 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsHebrewProber.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is Mozilla Universal charset detector code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Shy Shalom 19 | * Portions created by the Initial Developer are Copyright (C) 2005 20 | * the Initial Developer: All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | 38 | #ifndef nsHebrewProber_h__ 39 | #define nsHebrewProber_h__ 40 | 41 | #include "nsSBCharSetProber.h" 42 | 43 | // This prober doesn't actually recognize a language or a charset. 44 | // It is a helper prober for the use of the Hebrew model probers 45 | class nsHebrewProber: public nsCharSetProber 46 | { 47 | public: 48 | nsHebrewProber(void) :mLogicalProb(0), mVisualProb(0) { Reset(); } 49 | 50 | virtual ~nsHebrewProber(void) {} 51 | virtual nsProbingState HandleData(const char* aBuf, PRUint32 aLen); 52 | virtual const char* GetCharSetName(); 53 | virtual void Reset(void); 54 | 55 | virtual nsProbingState GetState(void); 56 | 57 | virtual float GetConfidence(void) { return (float)0.0; } 58 | virtual void SetOpion() {} 59 | 60 | void SetModelProbers(nsCharSetProber *logicalPrb, nsCharSetProber *visualPrb) 61 | { mLogicalProb = logicalPrb; mVisualProb = visualPrb; } 62 | 63 | #ifdef DEBUG_chardet 64 | virtual void DumpStatus(); 65 | #endif 66 | 67 | protected: 68 | static PRBool isFinal(char c); 69 | static PRBool isNonFinal(char c); 70 | 71 | PRInt32 mFinalCharLogicalScore, mFinalCharVisualScore; 72 | 73 | // The two last characters seen in the previous buffer. 74 | char mPrev, mBeforePrev; 75 | 76 | // These probers are owned by the group prober. 77 | nsCharSetProber *mLogicalProb, *mVisualProb; 78 | }; 79 | 80 | /** 81 | * ** General ideas of the Hebrew charset recognition ** 82 | * 83 | * Four main charsets exist in Hebrew: 84 | * "ISO-8859-8" - Visual Hebrew 85 | * "windows-1255" - Logical Hebrew 86 | * "ISO-8859-8-I" - Logical Hebrew 87 | * "x-mac-hebrew" - ?? Logical Hebrew ?? 88 | * 89 | * Both "ISO" charsets use a completely identical set of code points, whereas 90 | * "windows-1255" and "x-mac-hebrew" are two different proper supersets of 91 | * these code points. windows-1255 defines additional characters in the range 92 | * 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific 93 | * diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6. 94 | * x-mac-hebrew defines similar additional code points but with a different 95 | * mapping. 96 | * 97 | * As far as an average Hebrew text with no diacritics is concerned, all four 98 | * charsets are identical with respect to code points. Meaning that for the 99 | * main Hebrew alphabet, all four map the same values to all 27 Hebrew letters 100 | * (including final letters). 101 | * 102 | * The dominant difference between these charsets is their directionality. 103 | * "Visual" directionality means that the text is ordered as if the renderer is 104 | * not aware of a BIDI rendering algorithm. The renderer sees the text and 105 | * draws it from left to right. The text itself when ordered naturally is read 106 | * backwards. A buffer of Visual Hebrew generally looks like so: 107 | * "[last word of first line spelled backwards] [whole line ordered backwards 108 | * and spelled backwards] [first word of first line spelled backwards] 109 | * [end of line] [last word of second line] ... etc' " 110 | * adding punctuation marks, numbers and English text to visual text is 111 | * naturally also "visual" and from left to right. 112 | * 113 | * "Logical" directionality means the text is ordered "naturally" according to 114 | * the order it is read. It is the responsibility of the renderer to display 115 | * the text from right to left. A BIDI algorithm is used to place general 116 | * punctuation marks, numbers and English text in the text. 117 | * 118 | * Texts in x-mac-hebrew are almost impossible to find on the Internet. From 119 | * what little evidence I could find, it seems that its general directionality 120 | * is Logical. 121 | * 122 | * To sum up all of the above, the Hebrew probing mechanism knows about two 123 | * charsets: 124 | * Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are 125 | * backwards while line order is natural. For charset recognition purposes 126 | * the line order is unimportant (In fact, for this implementation, even 127 | * word order is unimportant). 128 | * Logical Hebrew - "windows-1255" - normal, naturally ordered text. 129 | * 130 | * "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be 131 | * specifically identified. 132 | * "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew 133 | * that contain special punctuation marks or diacritics is displayed with 134 | * some unconverted characters showing as question marks. This problem might 135 | * be corrected using another model prober for x-mac-hebrew. Due to the fact 136 | * that x-mac-hebrew texts are so rare, writing another model prober isn't 137 | * worth the effort and performance hit. 138 | * 139 | * *** The Prober *** 140 | * 141 | * The prober is divided between two nsSBCharSetProbers and an nsHebrewProber, 142 | * all of which are managed, created, fed data, inquired and deleted by the 143 | * nsSBCSGroupProber. The two nsSBCharSetProbers identify that the text is in 144 | * fact some kind of Hebrew, Logical or Visual. The final decision about which 145 | * one is it is made by the nsHebrewProber by combining final-letter scores 146 | * with the scores of the two nsSBCharSetProbers to produce a final answer. 147 | * 148 | * The nsSBCSGroupProber is responsible for stripping the original text of HTML 149 | * tags, English characters, numbers, low-ASCII punctuation characters, spaces 150 | * and new lines. It reduces any sequence of such characters to a single space. 151 | * The buffer fed to each prober in the SBCS group prober is pure text in 152 | * high-ASCII. 153 | * The two nsSBCharSetProbers (model probers) share the same language model: 154 | * Win1255Model. 155 | * The first nsSBCharSetProber uses the model normally as any other 156 | * nsSBCharSetProber does, to recognize windows-1255, upon which this model was 157 | * built. The second nsSBCharSetProber is told to make the pair-of-letter 158 | * lookup in the language model backwards. This in practice exactly simulates 159 | * a visual Hebrew model using the windows-1255 logical Hebrew model. 160 | * 161 | * The nsHebrewProber is not using any language model. All it does is look for 162 | * final-letter evidence suggesting the text is either logical Hebrew or visual 163 | * Hebrew. Disjointed from the model probers, the results of the nsHebrewProber 164 | * alone are meaningless. nsHebrewProber always returns 0.00 as confidence 165 | * since it never identifies a charset by itself. Instead, the pointer to the 166 | * nsHebrewProber is passed to the model probers as a helper "Name Prober". 167 | * When the Group prober receives a positive identification from any prober, 168 | * it asks for the name of the charset identified. If the prober queried is a 169 | * Hebrew model prober, the model prober forwards the call to the 170 | * nsHebrewProber to make the final decision. In the nsHebrewProber, the 171 | * decision is made according to the final-letters scores maintained and Both 172 | * model probers scores. The answer is returned in the form of the name of the 173 | * charset identified, either "windows-1255" or "ISO-8859-8". 174 | * 175 | */ 176 | #endif /* nsHebrewProber_h__ */ 177 | -------------------------------------------------------------------------------- /mozilla/extensions/universalchardet/src/base/nsUniversalDetector.cpp: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is Mozilla Universal charset detector code. 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 2001 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * Shy Shalom 24 | * 25 | * Alternatively, the contents of this file may be used under the terms of 26 | * either the GNU General Public License Version 2 or later (the "GPL"), or 27 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 28 | * in which case the provisions of the GPL or the LGPL are applicable instead 29 | * of those above. If you wish to allow use of your version of this file only 30 | * under the terms of either the GPL or the LGPL, and not to allow others to 31 | * use your version of this file under the terms of the MPL, indicate your 32 | * decision by deleting the provisions above and replace them with the notice 33 | * and other provisions required by the GPL or the LGPL. If you do not delete 34 | * the provisions above, a recipient may use your version of this file under 35 | * the terms of any one of the MPL, the GPL or the LGPL. 36 | * 37 | * ***** END LICENSE BLOCK ***** */ 38 | 39 | #include "nscore.h" 40 | 41 | #include "nsUniversalDetector.h" 42 | 43 | #include "nsMBCSGroupProber.h" 44 | #include "nsSBCSGroupProber.h" 45 | #include "nsEscCharsetProber.h" 46 | #include "nsLatin1Prober.h" 47 | 48 | nsUniversalDetector::nsUniversalDetector(PRUint32 aLanguageFilter) 49 | { 50 | mDone = PR_FALSE; 51 | mBestGuess = -1; //illegal value as signal 52 | mInTag = PR_FALSE; 53 | mEscCharSetProber = nsnull; 54 | 55 | mStart = PR_TRUE; 56 | mDetectedCharset = nsnull; 57 | mGotData = PR_FALSE; 58 | mInputState = ePureAscii; 59 | mLastChar = '\0'; 60 | mLanguageFilter = aLanguageFilter; 61 | 62 | PRUint32 i; 63 | for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++) 64 | mCharSetProbers[i] = nsnull; 65 | } 66 | 67 | nsUniversalDetector::~nsUniversalDetector() 68 | { 69 | for (PRInt32 i = 0; i < NUM_OF_CHARSET_PROBERS; i++) 70 | if (mCharSetProbers[i]) 71 | delete mCharSetProbers[i]; 72 | if (mEscCharSetProber) 73 | delete mEscCharSetProber; 74 | } 75 | 76 | void 77 | nsUniversalDetector::Reset() 78 | { 79 | mDone = PR_FALSE; 80 | mBestGuess = -1; //illegal value as signal 81 | mInTag = PR_FALSE; 82 | 83 | mStart = PR_TRUE; 84 | mDetectedCharset = nsnull; 85 | mGotData = PR_FALSE; 86 | mInputState = ePureAscii; 87 | mLastChar = '\0'; 88 | 89 | if (mEscCharSetProber) 90 | mEscCharSetProber->Reset(); 91 | 92 | PRUint32 i; 93 | for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++) 94 | if (mCharSetProbers[i]) 95 | mCharSetProbers[i]->Reset(); 96 | } 97 | 98 | //--------------------------------------------------------------------- 99 | #define SHORTCUT_THRESHOLD (float)0.95 100 | #define MINIMUM_THRESHOLD (float)0.20 101 | 102 | nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen) 103 | { 104 | if(mDone) 105 | return NS_OK; 106 | 107 | if (aLen > 0) 108 | mGotData = PR_TRUE; 109 | 110 | //If the data starts with BOM, we know it is UTF 111 | if (mStart) 112 | { 113 | mStart = PR_FALSE; 114 | if (aLen > 3) 115 | switch (aBuf[0]) 116 | { 117 | case '\xEF': 118 | if (('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2])) 119 | // EF BB BF UTF-8 encoded BOM 120 | mDetectedCharset = "UTF-8"; 121 | break; 122 | case '\xFE': 123 | if (('\xFF' == aBuf[1]) && ('\x00' == aBuf[2]) && ('\x00' == aBuf[3])) 124 | // FE FF 00 00 UCS-4, unusual octet order BOM (3412) 125 | mDetectedCharset = "X-ISO-10646-UCS-4-3412"; 126 | else if ('\xFF' == aBuf[1]) 127 | // FE FF UTF-16, big endian BOM 128 | mDetectedCharset = "UTF-16BE"; 129 | break; 130 | case '\x00': 131 | if (('\x00' == aBuf[1]) && ('\xFE' == aBuf[2]) && ('\xFF' == aBuf[3])) 132 | // 00 00 FE FF UTF-32, big-endian BOM 133 | mDetectedCharset = "UTF-32BE"; 134 | else if (('\x00' == aBuf[1]) && ('\xFF' == aBuf[2]) && ('\xFE' == aBuf[3])) 135 | // 00 00 FF FE UCS-4, unusual octet order BOM (2143) 136 | mDetectedCharset = "X-ISO-10646-UCS-4-2143"; 137 | break; 138 | case '\xFF': 139 | if (('\xFE' == aBuf[1]) && ('\x00' == aBuf[2]) && ('\x00' == aBuf[3])) 140 | // FF FE 00 00 UTF-32, little-endian BOM 141 | mDetectedCharset = "UTF-32LE"; 142 | else if ('\xFE' == aBuf[1]) 143 | // FF FE UTF-16, little endian BOM 144 | mDetectedCharset = "UTF-16LE"; 145 | break; 146 | } // switch 147 | 148 | if (mDetectedCharset) 149 | { 150 | mDone = PR_TRUE; 151 | return NS_OK; 152 | } 153 | } 154 | 155 | PRUint32 i; 156 | for (i = 0; i < aLen; i++) 157 | { 158 | //other than 0xa0, if every othe character is ascii, the page is ascii 159 | if (aBuf[i] & '\x80' && aBuf[i] != '\xA0') //Since many Ascii only page contains NBSP 160 | { 161 | //we got a non-ascii byte (high-byte) 162 | if (mInputState != eHighbyte) 163 | { 164 | //adjust state 165 | mInputState = eHighbyte; 166 | 167 | //kill mEscCharSetProber if it is active 168 | if (mEscCharSetProber) { 169 | delete mEscCharSetProber; 170 | mEscCharSetProber = nsnull; 171 | } 172 | 173 | //start multibyte and singlebyte charset prober 174 | if (nsnull == mCharSetProbers[0]) 175 | { 176 | mCharSetProbers[0] = new nsMBCSGroupProber(mLanguageFilter); 177 | if (nsnull == mCharSetProbers[0]) 178 | return NS_ERROR_OUT_OF_MEMORY; 179 | } 180 | if (nsnull == mCharSetProbers[1] && 181 | (mLanguageFilter & NS_FILTER_NON_CJK)) 182 | { 183 | mCharSetProbers[1] = new nsSBCSGroupProber; 184 | if (nsnull == mCharSetProbers[1]) 185 | return NS_ERROR_OUT_OF_MEMORY; 186 | } 187 | if (nsnull == mCharSetProbers[2]) 188 | { 189 | mCharSetProbers[2] = new nsLatin1Prober; 190 | if (nsnull == mCharSetProbers[2]) 191 | return NS_ERROR_OUT_OF_MEMORY; 192 | } 193 | } 194 | } 195 | else 196 | { 197 | //ok, just pure ascii so far 198 | if ( ePureAscii == mInputState && 199 | (aBuf[i] == '\033' || (aBuf[i] == '{' && mLastChar == '~')) ) 200 | { 201 | //found escape character or HZ "~{" 202 | mInputState = eEscAscii; 203 | } 204 | mLastChar = aBuf[i]; 205 | } 206 | } 207 | 208 | nsProbingState st; 209 | switch (mInputState) 210 | { 211 | case eEscAscii: 212 | if (nsnull == mEscCharSetProber) { 213 | mEscCharSetProber = new nsEscCharSetProber(mLanguageFilter); 214 | if (nsnull == mEscCharSetProber) 215 | return NS_ERROR_OUT_OF_MEMORY; 216 | } 217 | st = mEscCharSetProber->HandleData(aBuf, aLen); 218 | if (st == eFoundIt) 219 | { 220 | mDone = PR_TRUE; 221 | mDetectedCharset = mEscCharSetProber->GetCharSetName(); 222 | } 223 | break; 224 | case eHighbyte: 225 | for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++) 226 | { 227 | if (mCharSetProbers[i]) 228 | { 229 | st = mCharSetProbers[i]->HandleData(aBuf, aLen); 230 | if (st == eFoundIt) 231 | { 232 | mDone = PR_TRUE; 233 | mDetectedCharset = mCharSetProbers[i]->GetCharSetName(); 234 | return NS_OK; 235 | } 236 | } 237 | } 238 | break; 239 | 240 | default: //pure ascii 241 | ;//do nothing here 242 | } 243 | return NS_OK; 244 | } 245 | 246 | 247 | //--------------------------------------------------------------------- 248 | void nsUniversalDetector::DataEnd() 249 | { 250 | if (!mGotData) 251 | { 252 | // we haven't got any data yet, return immediately 253 | // caller program sometimes call DataEnd before anything has been sent to detector 254 | return; 255 | } 256 | 257 | if (mDetectedCharset) 258 | { 259 | mDone = PR_TRUE; 260 | Report(mDetectedCharset); 261 | return; 262 | } 263 | 264 | switch (mInputState) 265 | { 266 | case eHighbyte: 267 | { 268 | float proberConfidence; 269 | float maxProberConfidence = (float)0.0; 270 | PRInt32 maxProber = 0; 271 | 272 | for (PRInt32 i = 0; i < NUM_OF_CHARSET_PROBERS; i++) 273 | { 274 | if (mCharSetProbers[i]) 275 | { 276 | proberConfidence = mCharSetProbers[i]->GetConfidence(); 277 | if (proberConfidence > maxProberConfidence) 278 | { 279 | maxProberConfidence = proberConfidence; 280 | maxProber = i; 281 | } 282 | } 283 | } 284 | //do not report anything because we are not confident of it, that's in fact a negative answer 285 | if (maxProberConfidence > MINIMUM_THRESHOLD) 286 | Report(mCharSetProbers[maxProber]->GetCharSetName()); 287 | } 288 | break; 289 | case eEscAscii: 290 | break; 291 | default: 292 | ; 293 | } 294 | return; 295 | } 296 | -------------------------------------------------------------------------------- /nspr-emu/prcpucfg_freebsd.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 | /* ***** BEGIN LICENSE BLOCK ***** 3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Mozilla Public License Version 6 | * 1.1 (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * http://www.mozilla.org/MPL/ 9 | * 10 | * Software distributed under the License is distributed on an "AS IS" basis, 11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 | * for the specific language governing rights and limitations under the 13 | * License. 14 | * 15 | * The Original Code is the Netscape Portable Runtime (NSPR). 16 | * 17 | * The Initial Developer of the Original Code is 18 | * Netscape Communications Corporation. 19 | * Portions created by the Initial Developer are Copyright (C) 1998-2000 20 | * the Initial Developer. All Rights Reserved. 21 | * 22 | * Contributor(s): 23 | * 24 | * Alternatively, the contents of this file may be used under the terms of 25 | * either the GNU General Public License Version 2 or later (the "GPL"), or 26 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 | * in which case the provisions of the GPL or the LGPL are applicable instead 28 | * of those above. If you wish to allow use of your version of this file only 29 | * under the terms of either the GPL or the LGPL, and not to allow others to 30 | * use your version of this file under the terms of the MPL, indicate your 31 | * decision by deleting the provisions above and replace them with the notice 32 | * and other provisions required by the GPL or the LGPL. If you do not delete 33 | * the provisions above, a recipient may use your version of this file under 34 | * the terms of any one of the MPL, the GPL or the LGPL. 35 | * 36 | * ***** END LICENSE BLOCK ***** */ 37 | 38 | #ifndef nspr_cpucfg___ 39 | #define nspr_cpucfg___ 40 | 41 | #ifndef XP_UNIX 42 | #define XP_UNIX 43 | #endif 44 | 45 | #ifndef FREEBSD 46 | #define FREEBSD 47 | #endif 48 | 49 | #define PR_AF_INET6 28 /* same as AF_INET6 */ 50 | 51 | #ifndef HAVE_LONG_LONG 52 | #define HAVE_LONG_LONG 53 | #endif 54 | 55 | #if defined(__i386__) 56 | 57 | #define IS_LITTLE_ENDIAN 1 58 | #undef IS_BIG_ENDIAN 59 | #undef HAVE_ALIGNED_DOUBLES 60 | #undef HAVE_ALIGNED_LONGLONGS 61 | 62 | #define PR_BYTES_PER_BYTE 1 63 | #define PR_BYTES_PER_SHORT 2 64 | #define PR_BYTES_PER_INT 4 65 | #define PR_BYTES_PER_INT64 8 66 | #define PR_BYTES_PER_LONG 4 67 | #define PR_BYTES_PER_FLOAT 4 68 | #define PR_BYTES_PER_DOUBLE 8 69 | #define PR_BYTES_PER_WORD 4 70 | #define PR_BYTES_PER_DWORD 8 71 | #define PR_BYTES_PER_WORD_LOG2 2 72 | #define PR_BYTES_PER_DWORD_LOG2 3 73 | 74 | #define PR_BITS_PER_BYTE 8 75 | #define PR_BITS_PER_SHORT 16 76 | #define PR_BITS_PER_INT 32 77 | #define PR_BITS_PER_INT64 64 78 | #define PR_BITS_PER_LONG 32 79 | #define PR_BITS_PER_FLOAT 32 80 | #define PR_BITS_PER_DOUBLE 64 81 | #define PR_BITS_PER_WORD 32 82 | 83 | #define PR_BITS_PER_BYTE_LOG2 3 84 | #define PR_BITS_PER_SHORT_LOG2 4 85 | #define PR_BITS_PER_INT_LOG2 5 86 | #define PR_BITS_PER_INT64_LOG2 6 87 | #define PR_BITS_PER_LONG_LOG2 5 88 | #define PR_BITS_PER_FLOAT_LOG2 5 89 | #define PR_BITS_PER_DOUBLE_LOG2 6 90 | #define PR_BITS_PER_WORD_LOG2 5 91 | 92 | #define PR_ALIGN_OF_SHORT 2 93 | #define PR_ALIGN_OF_INT 4 94 | #define PR_ALIGN_OF_LONG 4 95 | #define PR_ALIGN_OF_INT64 4 96 | #define PR_ALIGN_OF_FLOAT 4 97 | #define PR_ALIGN_OF_DOUBLE 4 98 | #define PR_ALIGN_OF_POINTER 4 99 | 100 | #elif defined(__alpha__) 101 | 102 | #define IS_LITTLE_ENDIAN 1 103 | #undef IS_BIG_ENDIAN 104 | #define HAVE_ALIGNED_DOUBLES 105 | #define HAVE_ALIGNED_LONGLONGS 106 | #define IS_64 107 | 108 | #define PR_BYTES_PER_BYTE 1 109 | #define PR_BYTES_PER_SHORT 2 110 | #define PR_BYTES_PER_INT 4 111 | #define PR_BYTES_PER_INT64 8 112 | #define PR_BYTES_PER_LONG 8 113 | #define PR_BYTES_PER_FLOAT 4 114 | #define PR_BYTES_PER_DOUBLE 8 115 | #define PR_BYTES_PER_WORD 8 116 | #define PR_BYTES_PER_DWORD 8 117 | #define PR_BYTES_PER_WORD_LOG2 3 118 | #define PR_BYTES_PER_DWORD_LOG2 3 119 | 120 | #define PR_BITS_PER_BYTE 8 121 | #define PR_BITS_PER_SHORT 16 122 | #define PR_BITS_PER_INT 32 123 | #define PR_BITS_PER_INT64 64 124 | #define PR_BITS_PER_LONG 64 125 | #define PR_BITS_PER_FLOAT 32 126 | #define PR_BITS_PER_DOUBLE 64 127 | #define PR_BITS_PER_WORD 64 128 | 129 | #define PR_BITS_PER_BYTE_LOG2 3 130 | #define PR_BITS_PER_SHORT_LOG2 4 131 | #define PR_BITS_PER_INT_LOG2 5 132 | #define PR_BITS_PER_INT64_LOG2 6 133 | #define PR_BITS_PER_LONG_LOG2 6 134 | #define PR_BITS_PER_FLOAT_LOG2 5 135 | #define PR_BITS_PER_DOUBLE_LOG2 6 136 | #define PR_BITS_PER_WORD_LOG2 6 137 | 138 | #define PR_ALIGN_OF_SHORT 2 139 | #define PR_ALIGN_OF_INT 4 140 | #define PR_ALIGN_OF_LONG 8 141 | #define PR_ALIGN_OF_INT64 8 142 | #define PR_ALIGN_OF_FLOAT 4 143 | #define PR_ALIGN_OF_DOUBLE 8 144 | #define PR_ALIGN_OF_POINTER 8 145 | 146 | #elif defined(__sparc__) 147 | 148 | #undef IS_LITTLE_ENDIAN 149 | #define IS_BIG_ENDIAN 1 150 | #define HAVE_ALIGNED_DOUBLES 151 | #define HAVE_ALIGNED_LONGLONGS 152 | #define IS_64 153 | 154 | #define PR_BYTES_PER_BYTE 1 155 | #define PR_BYTES_PER_SHORT 2 156 | #define PR_BYTES_PER_INT 4 157 | #define PR_BYTES_PER_INT64 8 158 | #define PR_BYTES_PER_LONG 8 159 | #define PR_BYTES_PER_FLOAT 4 160 | #define PR_BYTES_PER_DOUBLE 8 161 | #define PR_BYTES_PER_WORD 8 162 | #define PR_BYTES_PER_DWORD 8 163 | #define PR_BYTES_PER_WORD_LOG2 3 164 | #define PR_BYTES_PER_DWORD_LOG2 3 165 | 166 | #define PR_BITS_PER_BYTE 8 167 | #define PR_BITS_PER_SHORT 16 168 | #define PR_BITS_PER_INT 32 169 | #define PR_BITS_PER_INT64 64 170 | #define PR_BITS_PER_LONG 64 171 | #define PR_BITS_PER_FLOAT 32 172 | #define PR_BITS_PER_DOUBLE 64 173 | #define PR_BITS_PER_WORD 64 174 | 175 | #define PR_BITS_PER_BYTE_LOG2 3 176 | #define PR_BITS_PER_SHORT_LOG2 4 177 | #define PR_BITS_PER_INT_LOG2 5 178 | #define PR_BITS_PER_INT64_LOG2 6 179 | #define PR_BITS_PER_LONG_LOG2 6 180 | #define PR_BITS_PER_FLOAT_LOG2 5 181 | #define PR_BITS_PER_DOUBLE_LOG2 6 182 | #define PR_BITS_PER_WORD_LOG2 6 183 | 184 | #define PR_ALIGN_OF_SHORT 2 185 | #define PR_ALIGN_OF_INT 4 186 | #define PR_ALIGN_OF_LONG 8 187 | #define PR_ALIGN_OF_INT64 8 188 | #define PR_ALIGN_OF_FLOAT 4 189 | #define PR_ALIGN_OF_DOUBLE 8 190 | #define PR_ALIGN_OF_POINTER 8 191 | 192 | #elif defined(__ia64__) 193 | 194 | #define IS_LITTLE_ENDIAN 1 195 | #undef IS_BIG_ENDIAN 196 | #define HAVE_ALIGNED_DOUBLES 197 | #define HAVE_ALIGNED_LONGLONGS 198 | #define IS_64 199 | 200 | #define PR_BYTES_PER_BYTE 1 201 | #define PR_BYTES_PER_SHORT 2 202 | #define PR_BYTES_PER_INT 4 203 | #define PR_BYTES_PER_INT64 8 204 | #define PR_BYTES_PER_LONG 8 205 | #define PR_BYTES_PER_FLOAT 4 206 | #define PR_BYTES_PER_DOUBLE 8 207 | #define PR_BYTES_PER_WORD 8 208 | #define PR_BYTES_PER_DWORD 8 209 | #define PR_BYTES_PER_WORD_LOG2 3 210 | #define PR_BYTES_PER_DWORD_LOG2 3 211 | 212 | #define PR_BITS_PER_BYTE 8 213 | #define PR_BITS_PER_SHORT 16 214 | #define PR_BITS_PER_INT 32 215 | #define PR_BITS_PER_INT64 64 216 | #define PR_BITS_PER_LONG 64 217 | #define PR_BITS_PER_FLOAT 32 218 | #define PR_BITS_PER_DOUBLE 64 219 | #define PR_BITS_PER_WORD 64 220 | 221 | #define PR_BITS_PER_BYTE_LOG2 3 222 | #define PR_BITS_PER_SHORT_LOG2 4 223 | #define PR_BITS_PER_INT_LOG2 5 224 | #define PR_BITS_PER_INT64_LOG2 6 225 | #define PR_BITS_PER_LONG_LOG2 6 226 | #define PR_BITS_PER_FLOAT_LOG2 5 227 | #define PR_BITS_PER_DOUBLE_LOG2 6 228 | #define PR_BITS_PER_WORD_LOG2 6 229 | 230 | #define PR_ALIGN_OF_SHORT 2 231 | #define PR_ALIGN_OF_INT 4 232 | #define PR_ALIGN_OF_LONG 8 233 | #define PR_ALIGN_OF_INT64 8 234 | #define PR_ALIGN_OF_FLOAT 4 235 | #define PR_ALIGN_OF_DOUBLE 8 236 | #define PR_ALIGN_OF_POINTER 8 237 | #define PR_ALIGN_OF_WORD 8 238 | 239 | #elif defined(__amd64__) 240 | 241 | #define IS_LITTLE_ENDIAN 1 242 | #undef IS_BIG_ENDIAN 243 | #define HAVE_ALIGNED_DOUBLES 244 | #define HAVE_ALIGNED_LONGLONGS 245 | #define IS_64 246 | 247 | #define PR_BYTES_PER_BYTE 1 248 | #define PR_BYTES_PER_SHORT 2 249 | #define PR_BYTES_PER_INT 4 250 | #define PR_BYTES_PER_INT64 8 251 | #define PR_BYTES_PER_LONG 8 252 | #define PR_BYTES_PER_FLOAT 4 253 | #define PR_BYTES_PER_DOUBLE 8 254 | #define PR_BYTES_PER_WORD 8 255 | #define PR_BYTES_PER_DWORD 8 256 | #define PR_BYTES_PER_WORD_LOG2 3 257 | #define PR_BYTES_PER_DWORD_LOG2 3 258 | 259 | #define PR_BITS_PER_BYTE 8 260 | #define PR_BITS_PER_SHORT 16 261 | #define PR_BITS_PER_INT 32 262 | #define PR_BITS_PER_INT64 64 263 | #define PR_BITS_PER_LONG 64 264 | #define PR_BITS_PER_FLOAT 32 265 | #define PR_BITS_PER_DOUBLE 64 266 | #define PR_BITS_PER_WORD 64 267 | 268 | #define PR_BITS_PER_BYTE_LOG2 3 269 | #define PR_BITS_PER_SHORT_LOG2 4 270 | #define PR_BITS_PER_INT_LOG2 5 271 | #define PR_BITS_PER_INT64_LOG2 6 272 | #define PR_BITS_PER_LONG_LOG2 6 273 | #define PR_BITS_PER_FLOAT_LOG2 5 274 | #define PR_BITS_PER_DOUBLE_LOG2 6 275 | #define PR_BITS_PER_WORD_LOG2 6 276 | 277 | #define PR_ALIGN_OF_SHORT 2 278 | #define PR_ALIGN_OF_INT 4 279 | #define PR_ALIGN_OF_LONG 8 280 | #define PR_ALIGN_OF_INT64 8 281 | #define PR_ALIGN_OF_FLOAT 4 282 | #define PR_ALIGN_OF_DOUBLE 8 283 | #define PR_ALIGN_OF_POINTER 8 284 | #define PR_ALIGN_OF_WORD 8 285 | 286 | #else 287 | 288 | #error "Unknown CPU architecture" 289 | 290 | #endif 291 | 292 | #ifndef NO_NSPR_10_SUPPORT 293 | 294 | #define BYTES_PER_BYTE PR_BYTES_PER_BYTE 295 | #define BYTES_PER_SHORT PR_BYTES_PER_SHORT 296 | #define BYTES_PER_INT PR_BYTES_PER_INT 297 | #define BYTES_PER_INT64 PR_BYTES_PER_INT64 298 | #define BYTES_PER_LONG PR_BYTES_PER_LONG 299 | #define BYTES_PER_FLOAT PR_BYTES_PER_FLOAT 300 | #define BYTES_PER_DOUBLE PR_BYTES_PER_DOUBLE 301 | #define BYTES_PER_WORD PR_BYTES_PER_WORD 302 | #define BYTES_PER_DWORD PR_BYTES_PER_DWORD 303 | 304 | #define BITS_PER_BYTE PR_BITS_PER_BYTE 305 | #define BITS_PER_SHORT PR_BITS_PER_SHORT 306 | #define BITS_PER_INT PR_BITS_PER_INT 307 | #define BITS_PER_INT64 PR_BITS_PER_INT64 308 | #define BITS_PER_LONG PR_BITS_PER_LONG 309 | #define BITS_PER_FLOAT PR_BITS_PER_FLOAT 310 | #define BITS_PER_DOUBLE PR_BITS_PER_DOUBLE 311 | #define BITS_PER_WORD PR_BITS_PER_WORD 312 | 313 | #define BITS_PER_BYTE_LOG2 PR_BITS_PER_BYTE_LOG2 314 | #define BITS_PER_SHORT_LOG2 PR_BITS_PER_SHORT_LOG2 315 | #define BITS_PER_INT_LOG2 PR_BITS_PER_INT_LOG2 316 | #define BITS_PER_INT64_LOG2 PR_BITS_PER_INT64_LOG2 317 | #define BITS_PER_LONG_LOG2 PR_BITS_PER_LONG_LOG2 318 | #define BITS_PER_FLOAT_LOG2 PR_BITS_PER_FLOAT_LOG2 319 | #define BITS_PER_DOUBLE_LOG2 PR_BITS_PER_DOUBLE_LOG2 320 | #define BITS_PER_WORD_LOG2 PR_BITS_PER_WORD_LOG2 321 | 322 | #define ALIGN_OF_SHORT PR_ALIGN_OF_SHORT 323 | #define ALIGN_OF_INT PR_ALIGN_OF_INT 324 | #define ALIGN_OF_LONG PR_ALIGN_OF_LONG 325 | #define ALIGN_OF_INT64 PR_ALIGN_OF_INT64 326 | #define ALIGN_OF_FLOAT PR_ALIGN_OF_FLOAT 327 | #define ALIGN_OF_DOUBLE PR_ALIGN_OF_DOUBLE 328 | #define ALIGN_OF_POINTER PR_ALIGN_OF_POINTER 329 | #define ALIGN_OF_WORD PR_ALIGN_OF_WORD 330 | 331 | #define BYTES_PER_WORD_LOG2 PR_BYTES_PER_WORD_LOG2 332 | #define BYTES_PER_DWORD_LOG2 PR_BYTES_PER_DWORD_LOG2 333 | #define WORDS_PER_DWORD_LOG2 PR_WORDS_PER_DWORD_LOG2 334 | 335 | #endif /* NO_NSPR_10_SUPPORT */ 336 | 337 | #endif /* nspr_cpucfg___ */ 338 | --------------------------------------------------------------------------------