├── .gitignore ├── CharacterSet.h ├── CoreFoundationHack.h ├── CoreFoundationHack.xm ├── CoreTextHack.xm ├── EmojiAttributes.plist ├── EmojiSizeFix.xm ├── ICUBlocks.h ├── ICUHack.xm ├── LICENSE ├── Makefile ├── PSEmojiData.h ├── README.md ├── TextInputHack.xm ├── WebCoreHack.xm ├── WebCoreSupport ├── CharactersProperties.h ├── CoreGraphicsSPI.h ├── RefCounted.h ├── RefPtr.h ├── RenderText.h ├── StringImpl.h └── UAX.h ├── control ├── copyResources.sh ├── emojiprops.h ├── layout ├── DEBIAN │ ├── postinst │ └── postrm └── Library │ └── Application Support │ └── EmojiAttributes │ ├── Info.plist │ ├── emoji.bitmap │ └── uemoji.icu ├── unicode ├── cmemory.h ├── putilimp.h ├── ucln.h ├── ucln_cmn.h ├── ucmndata.h ├── ucptrie.h ├── ucptrie_impl.h ├── udata.h ├── udatamem.h ├── umapfile.h ├── umutex.h ├── uset_imp.h └── ustringtrie.h └── uset.h /.gitignore: -------------------------------------------------------------------------------- 1 | .theos 2 | .DS_Store 3 | packages -------------------------------------------------------------------------------- /CoreFoundationHack.h: -------------------------------------------------------------------------------- 1 | #import 2 | 3 | #if defined(__BIG_ENDIAN__) 4 | #define __CF_BIG_ENDIAN__ 1 5 | #define __CF_LITTLE_ENDIAN__ 0 6 | #endif 7 | 8 | #if defined(__LITTLE_ENDIAN__) 9 | #define __CF_LITTLE_ENDIAN__ 1 10 | #define __CF_BIG_ENDIAN__ 0 11 | #endif 12 | 13 | #define CF_INFO_BITS (!!(__CF_BIG_ENDIAN__) * 3) 14 | #define CF_IS_OBJC(typeID, obj) (1) 15 | 16 | #define CF_IS_SWIFT(type, obj) (0) 17 | 18 | #define WHITE_SPACE_CHARACTER (0x0020) 19 | #define ZERO_WIDTH_JOINER (0x200D) 20 | 21 | enum { 22 | _kCFRuntimeNotATypeID = 0 23 | }; 24 | 25 | static CFTypeID __kCFStringTypeID = _kCFRuntimeNotATypeID; 26 | 27 | enum { 28 | __kCFFreeContentsWhenDoneMask = 0x020, 29 | __kCFFreeContentsWhenDone = 0x020, 30 | __kCFContentsMask = 0x060, 31 | __kCFHasInlineContents = 0x000, 32 | __kCFNotInlineContentsNoFree = 0x040, 33 | __kCFNotInlineContentsDefaultFree = 0x020, 34 | __kCFNotInlineContentsCustomFree = 0x060, 35 | __kCFHasContentsAllocatorMask = 0x060, 36 | __kCFHasContentsAllocator = 0x060, 37 | __kCFHasContentsDeallocatorMask = 0x060, 38 | __kCFHasContentsDeallocator = 0x060, 39 | __kCFIsMutableMask = 0x01, 40 | __kCFIsMutable = 0x01, 41 | __kCFIsUnicodeMask = 0x10, 42 | __kCFIsUnicode = 0x10, 43 | __kCFHasNullByteMask = 0x08, 44 | __kCFHasNullByte = 0x08, 45 | __kCFHasLengthByteMask = 0x04, 46 | __kCFHasLengthByte = 0x04, 47 | }; 48 | 49 | struct __notInlineMutable { 50 | void *buffer; 51 | CFIndex length; 52 | CFIndex capacity; 53 | NSUInteger hasGap : 1; 54 | NSUInteger isFixedCapacity : 1; 55 | NSUInteger isExternalMutable : 1; 56 | NSUInteger capacityProvidedExternally : 1; 57 | #if __LP64__ 58 | unsigned long desiredCapacity : 60; 59 | #else 60 | unsigned long desiredCapacity : 28; 61 | #endif 62 | CFAllocatorRef contentsAllocator; 63 | }; 64 | 65 | typedef struct __CFRuntimeBase { 66 | uintptr_t _cfisa; 67 | uint8_t _cfinfo[4]; 68 | #if __LP64__ 69 | uint32_t _rc; 70 | #endif 71 | } CFRuntimeBase; 72 | 73 | struct __CFString { 74 | CFRuntimeBase base; 75 | union { 76 | struct __inline1 { 77 | CFIndex length; 78 | } inline1; 79 | struct __notInlineImmutable1 { 80 | void *buffer; 81 | CFIndex length; 82 | CFAllocatorRef contentsDeallocator; 83 | } notInlineImmutable1; 84 | struct __notInlineImmutable2 { 85 | void *buffer; 86 | CFAllocatorRef contentsDeallocator; 87 | } notInlineImmutable2; 88 | struct __notInlineMutable notInlineMutable; 89 | } variants; 90 | }; 91 | 92 | enum { 93 | kCFUniCharControlCharacterSet = 1, 94 | kCFUniCharWhitespaceCharacterSet, 95 | kCFUniCharWhitespaceAndNewlineCharacterSet, 96 | kCFUniCharDecimalDigitCharacterSet, 97 | kCFUniCharLetterCharacterSet, 98 | kCFUniCharLowercaseLetterCharacterSet, 99 | kCFUniCharUppercaseLetterCharacterSet, 100 | kCFUniCharNonBaseCharacterSet, 101 | kCFUniCharCanonicalDecomposableCharacterSet, 102 | kCFUniCharDecomposableCharacterSet = kCFUniCharCanonicalDecomposableCharacterSet, 103 | kCFUniCharAlphaNumericCharacterSet, 104 | kCFUniCharPunctuationCharacterSet, 105 | kCFUniCharIllegalCharacterSet, 106 | kCFUniCharTitlecaseLetterCharacterSet, 107 | kCFUniCharSymbolAndOperatorCharacterSet, 108 | kCFUniCharNewlineCharacterSet, 109 | 110 | kCFUniCharCompatibilityDecomposableCharacterSet = 100, 111 | kCFUniCharHFSPlusDecomposableCharacterSet, 112 | kCFUniCharStrongRightToLeftCharacterSet, 113 | kCFUniCharHasNonSelfLowercaseCharacterSet, 114 | kCFUniCharHasNonSelfUppercaseCharacterSet, 115 | kCFUniCharHasNonSelfTitlecaseCharacterSet, 116 | kCFUniCharHasNonSelfCaseFoldingCharacterSet, 117 | kCFUniCharHasNonSelfMirrorMappingCharacterSet, 118 | kCFUniCharControlAndFormatterCharacterSet, 119 | kCFUniCharCaseIgnorableCharacterSet, 120 | kCFUniCharGraphemeExtendCharacterSet 121 | }; 122 | 123 | typedef enum { 124 | kCFStringGraphemeCluster = 1, 125 | kCFStringComposedCharacterCluster = 2, 126 | kCFStringCursorMovementCluster = 3, 127 | kCFStringBackwardDeletionCluster = 4 128 | } CFStringCharacterClusterType; 129 | 130 | enum { 131 | kCFUniCharCombiningProperty = 0, 132 | kCFUniCharBidiProperty 133 | }; 134 | 135 | enum { 136 | kCFStringHangulStateL, 137 | kCFStringHangulStateV, 138 | kCFStringHangulStateT, 139 | kCFStringHangulStateLV, 140 | kCFStringHangulStateLVT, 141 | kCFStringHangulStateBreak 142 | }; 143 | 144 | typedef struct { 145 | CFCharacterSetRef cset; 146 | uint32_t flags; 147 | uint32_t rangeStart; 148 | uint32_t rangeLimit; 149 | const uint8_t *bitmap; 150 | } CFCharacterSetInlineBuffer; 151 | 152 | CF_EXTERN_C_BEGIN 153 | CF_EXPORT void CFCharacterSetInitInlineBuffer(CFCharacterSetRef cset, CFCharacterSetInlineBuffer *buffer); 154 | CF_EXPORT const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset, uint32_t plane); 155 | CF_EXPORT const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType, uint32_t plane); 156 | CF_EXTERN_C_END 157 | 158 | enum { 159 | kCFCharacterSetIsCompactBitmap = (1UL << 0), 160 | kCFCharacterSetNoBitmapAvailable = (1UL << 1), 161 | kCFCharacterSetIsInverted = (1UL << 2) 162 | }; 163 | 164 | #define kCFUniCharBitShiftForByte (3) 165 | #define kCFUniCharBitShiftForMask (7) 166 | 167 | #define MAX_CASE_MAPPING_BUF (8) 168 | #define ZERO_WIDTH_JOINER (0x200D) 169 | #define COMBINING_GRAPHEME_JOINER (0x034F) 170 | 171 | #define HANGUL_CHOSEONG_START (0x1100) 172 | #define HANGUL_CHOSEONG_END (0x115F) 173 | #define HANGUL_JUNGSEONG_START (0x1160) 174 | #define HANGUL_JUNGSEONG_END (0x11A2) 175 | #define HANGUL_JONGSEONG_START (0x11A8) 176 | #define HANGUL_JONGSEONG_END (0x11F9) 177 | 178 | #define HANGUL_SYLLABLE_START (0xAC00) 179 | #define HANGUL_SYLLABLE_END (0xD7AF) 180 | 181 | #define HANGUL_JONGSEONG_COUNT (28) 182 | 183 | #define MAX_TRANSCODING_LENGTH 4 184 | 185 | static uint8_t __CFTranscodingHintLength[] = { 186 | 2, 3, 4, 4, 4, 4, 4, 2, 2, 2, 2, 4, 0, 0, 0, 0 187 | }; 188 | -------------------------------------------------------------------------------- /CoreTextHack.xm: -------------------------------------------------------------------------------- 1 | #define COMPRESSED_SET 2 | #import 3 | #import "CharacterSet.h" 4 | #import "PSEmojiData.h" 5 | #import "uset.h" 6 | #import 7 | #import 8 | #include 9 | 10 | #define CreateMutableDict(dict) CFDictionaryCreateMutableCopy(kCFAllocatorDefault, CFDictionaryGetCount(dict), dict) 11 | 12 | extern "C" CFCharacterSetRef _CFCreateCharacterSetFromUSet(USet *); 13 | 14 | %config(generator=MobileSubstrate) 15 | 16 | %group CharacterSet 17 | 18 | CFCharacterSetRef (*CreateCharacterSetForFont)(CFStringRef const) = NULL; 19 | CFCharacterSetRef (*CreateCharacterSetWithCompressedBitmapRepresentation)(const CFDataRef characterSet) = NULL; 20 | CFDataRef (*XTCopyUncompressedBitmapRepresentation)(const UInt8 *, CFIndex); 21 | %hookf(CFCharacterSetRef, CreateCharacterSetForFont, CFStringRef const fontName) { 22 | if (CFStringEqual(fontName, CFSTR("AppleColorEmoji")) || CFStringEqual(fontName, CFSTR(".AppleColorEmojiUI"))) { 23 | if (IS_IOS_OR_NEWER(iOS_11_0)) { 24 | CFDataRef compressedData = CFDataCreateWithBytesNoCopy(kCFAllocatorDefault, compressedSet, compressedSetLength, kCFAllocatorNull); 25 | if (CreateCharacterSetWithCompressedBitmapRepresentation) { 26 | CFCharacterSetRef uncompressedSet = CreateCharacterSetWithCompressedBitmapRepresentation(compressedData); 27 | CFRelease(compressedData); 28 | return uncompressedSet; 29 | } 30 | CFDataRef uncompressedData = XTCopyUncompressedBitmapRepresentation(CFDataGetBytePtr(compressedData), CFDataGetLength(compressedData)); 31 | CFRelease(compressedData); 32 | if (uncompressedData) { 33 | CFCharacterSetRef ourSet = CFCharacterSetCreateWithBitmapRepresentation(kCFAllocatorDefault, uncompressedData); 34 | CFRelease(uncompressedData); 35 | return ourSet; 36 | } 37 | } 38 | CFDataRef uncompressedData = CFDataCreateWithBytesNoCopy(kCFAllocatorDefault, uncompressedSet, uncompressedSetLength, kCFAllocatorNull); 39 | CFCharacterSetRef ourSet = CFCharacterSetCreateWithBitmapRepresentation(kCFAllocatorDefault, uncompressedData); 40 | return ourSet; 41 | } 42 | return %orig(fontName); 43 | } 44 | 45 | %end 46 | 47 | static CFMutableDictionaryRef ctFontInfo = NULL; 48 | 49 | static CFMutableDictionaryRef getCTFontInfo(CFDictionaryRef dict) { 50 | if (ctFontInfo == NULL) { 51 | ctFontInfo = CFDictionaryCreateMutableCopy(kCFAllocatorDefault, CFDictionaryGetCount(dict), dict); 52 | CFDictionaryRef x = (CFDictionaryRef)CFDictionaryGetValue(ctFontInfo, CFSTR("Attrs")); 53 | CFMutableDictionaryRef attrs = CreateMutableDict(x); 54 | x = (CFDictionaryRef)CFDictionaryGetValue(attrs, CFSTR("AppleColorEmoji")); 55 | CFMutableDictionaryRef ace = CreateMutableDict(x); 56 | x = (CFDictionaryRef)CFDictionaryGetValue(ace, CFSTR("NSCTFontTraitsAttribute")); 57 | CFMutableDictionaryRef fontTraits = CreateMutableDict(x); 58 | SInt32 formatValue = 3; 59 | CFNumberRef formatRef = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &formatValue); 60 | CFDictionarySetValue(ace, CFSTR("NSCTFontFormatAttribute"), formatRef); 61 | CFRelease(formatRef); 62 | CFDictionarySetValue(ace, CFSTR("NSCTFontFeaturesAttribute"), (__bridge CFArrayRef)@[ 63 | @{ 64 | @"CTFeatureTypeIdentifier": @(701), 65 | @"CTFeatureTypeNameID": @(256), 66 | @"CTFeatureTypeSelectors": @[ 67 | @{ 68 | @"CTFeatureSelectorIdentifier": @(100), 69 | @"CTFeatureSelectorNameID": @(257) 70 | }, 71 | @{ 72 | @"CTFeatureSelectorIdentifier": @(200), 73 | @"CTFeatureSelectorNameID": @(258) 74 | } 75 | ] 76 | } 77 | ]); 78 | CFDictionarySetValue(attrs, CFSTR("AppleColorEmoji"), ace); 79 | long long symbolicTraitValue = 3221234688; 80 | CFNumberRef symbolicTraitRef = CFNumberCreate(kCFAllocatorDefault, kCFNumberLongLongType, &symbolicTraitValue); 81 | CFDictionarySetValue(fontTraits, CFSTR("NSCTFontSymbolicTrait"), symbolicTraitRef); 82 | CFRelease(symbolicTraitRef); 83 | CFDictionarySetValue(ctFontInfo, CFSTR("Attrs"), attrs); 84 | } 85 | return ctFontInfo; 86 | } 87 | 88 | %group FontAttributes1 89 | 90 | CFDictionaryRef (*CTFontGetPlistFromGSFontCacheB)(CFStringRef, bool); 91 | %hookf(CFDictionaryRef, CTFontGetPlistFromGSFontCacheB, CFStringRef plist, bool directAccess) { 92 | CFDictionaryRef dict = %orig(plist, directAccess); 93 | if (CFStringEqual(plist, CFSTR("CTFontInfo.plist"))) 94 | return getCTFontInfo(dict); 95 | return dict; 96 | } 97 | 98 | %end 99 | 100 | %group FontAttributes2 101 | 102 | CFDictionaryRef (*CTFontGetPlistFromGSFontCache)(CFStringRef); 103 | %hookf(CFDictionaryRef, CTFontGetPlistFromGSFontCache, CFStringRef plist) { 104 | CFDictionaryRef dict = %orig(plist); 105 | if (CFStringEqual(plist, CFSTR("CTFontInfo.plist"))) 106 | return getCTFontInfo(dict); 107 | return dict; 108 | } 109 | 110 | %end 111 | 112 | #if __LP64__ 113 | 114 | static USet *unicodeSet = NULL; 115 | static CFCharacterSetRef characterSet = NULL; 116 | 117 | %group EmojiPresentation 118 | 119 | void (*IsDefaultEmojiPresentation)(void *) = NULL; 120 | CFMutableCharacterSetRef *DefaultEmojiPresentationSet; 121 | 122 | %hookf(void, IsDefaultEmojiPresentation, void *arg0) { 123 | *DefaultEmojiPresentationSet = (CFMutableCharacterSetRef)characterSet; 124 | } 125 | 126 | %end 127 | 128 | %group EmojiPresentationUSet 129 | 130 | bool (*IsDefaultEmojiPresentationUSet)(UChar32) = NULL; 131 | %hookf(bool, IsDefaultEmojiPresentationUSet, UChar32 c) { 132 | return uset_contains(unicodeSet, c); 133 | } 134 | 135 | %end 136 | 137 | #endif 138 | 139 | %ctor { 140 | MSImageRef ct = MSGetImageByName(realPath2(@"/System/Library/Frameworks/CoreText.framework/CoreText")); 141 | CreateCharacterSetForFont = (CFCharacterSetRef (*)(CFStringRef const))_PSFindSymbolReadable(ct, "__Z25CreateCharacterSetForFontPK10__CFString"); 142 | HBLogDebug(@"[CoreTextHack: CharacterSet] CreateCharacterSetForFont found: %d", CreateCharacterSetForFont != NULL); 143 | XTCopyUncompressedBitmapRepresentation = (CFDataRef (*)(const UInt8 *, CFIndex))_PSFindSymbolCallable(ct, "__Z38XTCopyUncompressedBitmapRepresentationPKhm"); 144 | HBLogDebug(@"[CoreTextHack: CharacterSet] XTCopyUncompressedBitmapRepresentation found: %d", XTCopyUncompressedBitmapRepresentation != NULL); 145 | CreateCharacterSetWithCompressedBitmapRepresentation = (CFCharacterSetRef (*)(const CFDataRef))_PSFindSymbolCallable(ct, "__Z52CreateCharacterSetWithCompressedBitmapRepresentationPK8__CFData"); 146 | HBLogDebug(@"[CoreTextHack: CharacterSet] CreateCharacterSetWithCompressedBitmapRepresentation found: %d", CreateCharacterSetWithCompressedBitmapRepresentation != NULL); 147 | %init(CharacterSet); 148 | if (!IS_IOS_OR_NEWER(iOS_13_2)) { 149 | CTFontGetPlistFromGSFontCacheB = (CFDictionaryRef (*)(CFStringRef, bool))_PSFindSymbolReadable(ct, "__Z29CTFontGetPlistFromGSFontCachePK10__CFStringb"); 150 | HBLogDebug(@"[CoreTextHack: FontAttributes] CTFontGetPlistFromGSFontCacheB found: %d", CTFontGetPlistFromGSFontCacheB != NULL); 151 | if (CTFontGetPlistFromGSFontCacheB) { 152 | %init(FontAttributes1); 153 | } 154 | CTFontGetPlistFromGSFontCache = (CFDictionaryRef (*)(CFStringRef))_PSFindSymbolReadable(ct, "__Z29CTFontGetPlistFromGSFontCachePK10__CFString"); 155 | HBLogDebug(@"[CoreTextHack: FontAttributes] CTFontGetPlistFromGSFontCache found: %d", CTFontGetPlistFromGSFontCache != NULL); 156 | if (CTFontGetPlistFromGSFontCache) { 157 | %init(FontAttributes2); 158 | } 159 | } 160 | #if __LP64__ 161 | unicodeSet = uset_openEmpty(); 162 | for (int i = 0; i < presentationCount; ++i) 163 | uset_add(unicodeSet, presentation[i]); 164 | uset_freeze(unicodeSet); 165 | if (IS_IOS_BETWEEN_EEX(iOS_11_0, iOS_12_1)) { 166 | characterSet = _CFCreateCharacterSetFromUSet(unicodeSet); 167 | CFRetain(characterSet); 168 | IsDefaultEmojiPresentation = (void (*)(void *))_PSFindSymbolReadable(ct, "__ZZL26IsDefaultEmojiPresentationjEN4$_138__invokeEPv"); 169 | if (IsDefaultEmojiPresentation == NULL) 170 | IsDefaultEmojiPresentation = (void (*)(void *))_PSFindSymbolReadable(ct, "__ZZL26IsDefaultEmojiPresentationjEN4$_128__invokeEPv"); 171 | DefaultEmojiPresentationSet = (CFMutableCharacterSetRef (*))_PSFindSymbolReadable(ct, "__ZZL26IsDefaultEmojiPresentationjE28sDefaultEmojiPresentationSet"); 172 | HBLogDebug(@"[CoreTextHack: EmojiPresentation] IsDefaultEmojiPresentation found: %d", IsDefaultEmojiPresentation != NULL); 173 | HBLogDebug(@"[CoreTextHack: EmojiPresentation] DefaultEmojiPresentationSet found: %d", DefaultEmojiPresentationSet != NULL); 174 | %init(EmojiPresentation); 175 | } else if (IS_IOS_BETWEEN_EEX(iOS_12_1, iOS_15_4)) { 176 | IsDefaultEmojiPresentationUSet = (bool (*)(UChar32))_PSFindSymbolReadable(ct, "__Z26IsDefaultEmojiPresentationj"); 177 | HBLogDebug(@"[CoreTextHack: EmojiPresentation] IsDefaultEmojiPresentation (Uset) found: %d", IsDefaultEmojiPresentationUSet != NULL); 178 | if (IsDefaultEmojiPresentationUSet) { 179 | %init(EmojiPresentationUSet); 180 | } 181 | } 182 | #endif 183 | } 184 | 185 | #if __LP64__ 186 | 187 | %dtor { 188 | if (characterSet) 189 | CFRelease(characterSet); 190 | if (unicodeSet) 191 | uset_close(unicodeSet); 192 | } 193 | 194 | #endif -------------------------------------------------------------------------------- /EmojiAttributes.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Filter 6 | 7 | Bundles 8 | 9 | com.apple.UIKit 10 | com.apple.WebKit.WebContent 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /EmojiSizeFix.xm: -------------------------------------------------------------------------------- 1 | #if !__arm64e__ 2 | 3 | #import 4 | #import 5 | #import 6 | #import 7 | #import "WebCoreSupport/CoreGraphicsSPI.h" 8 | 9 | %config(generator=MobileSubstrate) 10 | 11 | short iOSVer = 0; 12 | 13 | CGFontRef cgFont = NULL; 14 | 15 | BOOL (*CTFontIsAppleColorEmoji)(CTFontRef); 16 | 17 | bool *findIsEmoji(void *arg0) { 18 | #if __LP64__ 19 | if (iOSVer >= 90) 20 | return (bool *)((uint8_t *)arg0 + 0x2B); 21 | if (iOSVer >= 70) 22 | return (bool *)((uint8_t *)arg0 + 0x8); 23 | return (bool *)((uint8_t *)arg0 + 0xC); 24 | #else 25 | if (iOSVer >= 90) 26 | return (bool *)((uint8_t *)arg0 + 0x1F); 27 | if (iOSVer >= 61) 28 | return (bool *)((uint8_t *)arg0 + 0x8); 29 | return (bool *)((uint8_t *)arg0 + 0xC); 30 | #endif 31 | } 32 | 33 | CTFontRef (*FontPlatformData_ctFont)(void *); 34 | %hookf(CTFontRef, FontPlatformData_ctFont, void *arg0) { 35 | bool *isEmoji = findIsEmoji(arg0); 36 | bool forEmoji = *isEmoji; 37 | *isEmoji = NO; 38 | CTFontRef font = %orig; 39 | *isEmoji = forEmoji; 40 | return font; 41 | } 42 | 43 | %group iOS60 44 | 45 | void (*platformInit)(void *); 46 | %hookf(void, platformInit, void *arg0) { 47 | bool *isEmoji = (bool *)((uint8_t *)arg0 + 0x34); 48 | bool forEmoji = *isEmoji; 49 | *isEmoji = NO; 50 | %orig; 51 | *isEmoji = forEmoji; 52 | } 53 | 54 | %end 55 | 56 | %group iOS6 57 | 58 | int (*CTFontGetWebKitEmojiRenderMode)(void); 59 | %hookf(int, CTFontGetWebKitEmojiRenderMode) { 60 | return 0; 61 | } 62 | 63 | CGFontRenderingStyle style = kCGFontRenderingStyleAntialiasing | kCGFontRenderingStyleSubpixelPositioning | kCGFontRenderingStyleSubpixelQuantization | kCGFontAntialiasingStyleUnfiltered; 64 | 65 | float (*platformWidthForGlyph)(void *, CGGlyph); 66 | %hookf(float, platformWidthForGlyph, void *arg0, CGGlyph code) { 67 | if (code == 0xFE0F) 68 | return 0.0; 69 | CTFontRef font = iOSVer >= 70 ? FontPlatformData_ctFont((void *)((uint8_t *)arg0 + 0x30)) : FontPlatformData_ctFont((void *)((uint8_t *)arg0 + 0x28)); 70 | BOOL isEmojiFont = CTFontIsAppleColorEmoji && CTFontIsAppleColorEmoji(font); 71 | if (!isEmojiFont) { 72 | CFStringRef fontName = CTFontCopyPostScriptName(font); 73 | isEmojiFont = CFStringEqual(fontName, CFSTR("AppleColorEmoji")); 74 | CFRelease(fontName); 75 | } 76 | if (isEmojiFont) { 77 | CGSize advance = CGSizeMake(0, 0); 78 | CTFontGetAdvancesForGlyphs(font, kCTFontOrientationHorizontal, &code, &advance, 1); 79 | return advance.width + 4.0; 80 | } 81 | return %orig; 82 | } 83 | 84 | %end 85 | 86 | %ctor { 87 | if (IS_IOS_BETWEEN_EEX(iOS_6_0, iOS_10_0)) { 88 | if (IS_IOS_OR_NEWER(iOS_9_0)) 89 | iOSVer = 90; 90 | else if (IS_IOS_OR_NEWER(iOS_7_0)) 91 | iOSVer = 70; 92 | else if (NSFoundationVersionNumber >= NSFoundationVersionNumber_iOS_6_1) 93 | iOSVer = 61; 94 | else 95 | iOSVer = 60; 96 | MSImageRef wcref = MSGetImageByName(realPath2(@"/System/Library/PrivateFrameworks/WebCore.framework/WebCore")); 97 | #if !__LP64__ 98 | MSImageRef ctref = MSGetImageByName(realPath2(@"/System/Library/Frameworks/CoreText.framework/CoreText")); 99 | CTFontIsAppleColorEmoji = (BOOL (*)(CTFontRef))MSFindSymbol(ctref, "_CTFontIsAppleColorEmoji"); 100 | CTFontGetWebKitEmojiRenderMode = (int (*)(void))MSFindSymbol(ctref, "_CTFontGetWebKitEmojiRenderMode"); 101 | platformWidthForGlyph = (float (*)(void *, CGGlyph))MSFindSymbol(wcref, "__ZNK7WebCore4Font21platformWidthForGlyphEt"); 102 | if (platformWidthForGlyph == NULL) 103 | platformWidthForGlyph = (float (*)(void *, CGGlyph))MSFindSymbol(wcref, "__ZNK7WebCore14SimpleFontData21platformWidthForGlyphEt"); 104 | platformInit = (void (*)(void *))MSFindSymbol(wcref, "__ZN7WebCore14SimpleFontData12platformInitEv"); 105 | HBLogDebug(@"[EmojiSizeFix] Found CTFontGetWebKitEmojiRenderMode: %d", CTFontGetWebKitEmojiRenderMode != NULL); 106 | HBLogDebug(@"[EmojiSizeFix] Found platformWidthForGlyph: %d", platformWidthForGlyph != NULL); 107 | HBLogDebug(@"[EmojiSizeFix] Found platformInit: %d", platformInit != NULL); 108 | if (iOSVer < 70) { 109 | %init(iOS6); 110 | if (iOSVer == 60) { 111 | %init(iOS60); 112 | } 113 | } 114 | #endif 115 | FontPlatformData_ctFont = (CTFontRef (*)(void *))MSFindSymbol(wcref, "__ZNK7WebCore16FontPlatformData6ctFontEv"); 116 | HBLogDebug(@"[EmojiSizeFix] Found FontPlatformData_ctFont: %d", FontPlatformData_ctFont != NULL); 117 | %init; 118 | } 119 | } 120 | 121 | #endif 122 | 123 | #if !__LP64__ 124 | 125 | %dtor { 126 | if (cgFont) 127 | CFRelease(cgFont); 128 | } 129 | 130 | #endif -------------------------------------------------------------------------------- /ICUHack.xm: -------------------------------------------------------------------------------- 1 | #import "ICUBlocks.h" 2 | #import "emojiprops.h" 3 | #include "unicode/ucptrie_impl.h" 4 | #include "unicode/ucmndata.h" 5 | #include "unicode/udatamem.h" 6 | #include "unicode/cmemory.h" 7 | #import 8 | #import 9 | #import 10 | #import 11 | 12 | #include 13 | #include 14 | 15 | #define UEMOJI_PATH PS_ROOT_PATH("/Library/Application Support/EmojiAttributes/uemoji.icu") 16 | 17 | #define uprv_memset(buffer, mark, size) U_STANDARD_CPP_NAMESPACE memset(buffer, mark, size) 18 | U_CAPI void U_EXPORT2 uprv_free(void *mem); 19 | U_CAPI void * U_EXPORT2 uprv_malloc(size_t s) U_MALLOC_ATTR U_ALLOC_SIZE_ATTR(1); 20 | 21 | void (*ucptrie_close)(UCPTrie *trie); 22 | int32_t (*ucptrie_internalSmallIndex)(const UCPTrie *trie, UChar32 c); 23 | UCPTrie *(*ucptrie_openFromBinary)(UCPTrieType type, UCPTrieValueWidth valueWidth, const void *data, int32_t length, int32_t *pActualLength, UErrorCode *pErrorCode); 24 | 25 | static UCPTrie *legacy_ucptrie_openFromBinary(UCPTrieType type, UCPTrieValueWidth valueWidth, const void *data, int32_t length, int32_t *pActualLength, UErrorCode *pErrorCode) { 26 | if (U_FAILURE(*pErrorCode)) { 27 | return nullptr; 28 | } 29 | 30 | if (length <= 0 || (U_POINTER_MASK_LSB(data, 3) != 0) || 31 | type < UCPTRIE_TYPE_ANY || UCPTRIE_TYPE_SMALL < type || 32 | valueWidth < UCPTRIE_VALUE_BITS_ANY || UCPTRIE_VALUE_BITS_8 < valueWidth) { 33 | *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 34 | return nullptr; 35 | } 36 | 37 | if (length < (int32_t)sizeof(UCPTrieHeader)) { 38 | *pErrorCode = U_INVALID_FORMAT_ERROR; 39 | return nullptr; 40 | } 41 | 42 | const UCPTrieHeader *header = (const UCPTrieHeader *)data; 43 | if (header->signature != UCPTRIE_SIG) { 44 | *pErrorCode = U_INVALID_FORMAT_ERROR; 45 | return nullptr; 46 | } 47 | 48 | int32_t options = header->options; 49 | int32_t typeInt = (options >> 6) & 3; 50 | int32_t valueWidthInt = options & UCPTRIE_OPTIONS_VALUE_BITS_MASK; 51 | if (typeInt > UCPTRIE_TYPE_SMALL || valueWidthInt > UCPTRIE_VALUE_BITS_8 || 52 | (options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0) { 53 | *pErrorCode = U_INVALID_FORMAT_ERROR; 54 | return nullptr; 55 | } 56 | UCPTrieType actualType = (UCPTrieType)typeInt; 57 | UCPTrieValueWidth actualValueWidth = (UCPTrieValueWidth)valueWidthInt; 58 | if (type < 0) { 59 | type = actualType; 60 | } 61 | if (valueWidth < 0) { 62 | valueWidth = actualValueWidth; 63 | } 64 | if (type != actualType || valueWidth != actualValueWidth) { 65 | *pErrorCode = U_INVALID_FORMAT_ERROR; 66 | return nullptr; 67 | } 68 | 69 | UCPTrie tempTrie; 70 | uprv_memset(&tempTrie, 0, sizeof(tempTrie)); 71 | tempTrie.indexLength = header->indexLength; 72 | tempTrie.dataLength = 73 | ((options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | header->dataLength; 74 | tempTrie.index3NullOffset = header->index3NullOffset; 75 | tempTrie.dataNullOffset = 76 | ((options & UCPTRIE_OPTIONS_DATA_NULL_OFFSET_MASK) << 8) | header->dataNullOffset; 77 | 78 | tempTrie.highStart = header->shiftedHighStart << UCPTRIE_SHIFT_2; 79 | tempTrie.shifted12HighStart = (tempTrie.highStart + 0xfff) >> 12; 80 | tempTrie.type = type; 81 | tempTrie.valueWidth = valueWidth; 82 | 83 | int32_t actualLength = (int32_t)sizeof(UCPTrieHeader) + tempTrie.indexLength * 2; 84 | if (valueWidth == UCPTRIE_VALUE_BITS_16) { 85 | actualLength += tempTrie.dataLength * 2; 86 | } else if (valueWidth == UCPTRIE_VALUE_BITS_32) { 87 | actualLength += tempTrie.dataLength * 4; 88 | } else { 89 | actualLength += tempTrie.dataLength; 90 | } 91 | if (length < actualLength) { 92 | *pErrorCode = U_INVALID_FORMAT_ERROR; 93 | return nullptr; 94 | } 95 | 96 | UCPTrie *trie = (UCPTrie *)uprv_malloc(sizeof(UCPTrie)); 97 | if (trie == nullptr) { 98 | *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 99 | return nullptr; 100 | } 101 | uprv_memcpy(trie, &tempTrie, sizeof(tempTrie)); 102 | 103 | const uint16_t *p16 = (const uint16_t *)(header + 1); 104 | trie->index = p16; 105 | p16 += trie->indexLength; 106 | 107 | int32_t nullValueOffset = trie->dataNullOffset; 108 | if (nullValueOffset >= trie->dataLength) { 109 | nullValueOffset = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET; 110 | } 111 | switch (valueWidth) { 112 | case UCPTRIE_VALUE_BITS_16: 113 | trie->data.ptr16 = p16; 114 | trie->nullValue = trie->data.ptr16[nullValueOffset]; 115 | break; 116 | case UCPTRIE_VALUE_BITS_32: 117 | trie->data.ptr32 = (const uint32_t *)p16; 118 | trie->nullValue = trie->data.ptr32[nullValueOffset]; 119 | break; 120 | case UCPTRIE_VALUE_BITS_8: 121 | trie->data.ptr8 = (const uint8_t *)p16; 122 | trie->nullValue = trie->data.ptr8[nullValueOffset]; 123 | break; 124 | default: 125 | *pErrorCode = U_INVALID_FORMAT_ERROR; 126 | return nullptr; 127 | } 128 | 129 | if (pActualLength != nullptr) { 130 | *pActualLength = actualLength; 131 | } 132 | return trie; 133 | } 134 | 135 | static int32_t legacy_ucptrie_internalSmallIndex(const UCPTrie *trie, UChar32 c) { 136 | int32_t i1 = c >> UCPTRIE_SHIFT_1; 137 | if (trie->type == UCPTRIE_TYPE_FAST) { 138 | i1 += UCPTRIE_BMP_INDEX_LENGTH - UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH; 139 | } else { 140 | i1 += UCPTRIE_SMALL_INDEX_LENGTH; 141 | } 142 | int32_t i3Block = trie->index[ 143 | (int32_t)trie->index[i1] + ((c >> UCPTRIE_SHIFT_2) & UCPTRIE_INDEX_2_MASK)]; 144 | int32_t i3 = (c >> UCPTRIE_SHIFT_3) & UCPTRIE_INDEX_3_MASK; 145 | int32_t dataBlock; 146 | if ((i3Block & 0x8000) == 0) { 147 | dataBlock = trie->index[i3Block + i3]; 148 | } else { 149 | i3Block = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3); 150 | i3 &= 7; 151 | dataBlock = ((int32_t)trie->index[i3Block++] << (2 + (2 * i3))) & 0x30000; 152 | dataBlock |= trie->index[i3Block + i3]; 153 | } 154 | return dataBlock + (c & UCPTRIE_SMALL_DATA_MASK); 155 | } 156 | 157 | static void legacy_ucptrie_close(UCPTrie *trie) { 158 | uprv_free(trie); 159 | } 160 | 161 | static UDataMemory *memory = nullptr; 162 | UCPTrie *cpTrie = nullptr; 163 | 164 | static void UDataMemory_init(UDataMemory *This) { 165 | uprv_memset(This, 0, sizeof(UDataMemory)); 166 | This->length=-1; 167 | } 168 | 169 | static UDataMemory *UDataMemory_createNewInstance(UErrorCode *pErr) { 170 | UDataMemory *This; 171 | 172 | if (U_FAILURE(*pErr)) { 173 | return NULL; 174 | } 175 | This = (UDataMemory *)uprv_malloc(sizeof(UDataMemory)); 176 | if (This == NULL) { 177 | *pErr = U_MEMORY_ALLOCATION_ERROR; } 178 | else { 179 | UDataMemory_init(This); 180 | This->heapAllocated = TRUE; 181 | } 182 | return This; 183 | } 184 | 185 | static void udata_open_custom(UErrorCode *status) { 186 | static const char *xinaPath = "/var/LIY/Application Support/EmojiAttributes/uemoji.icu"; 187 | int fd; 188 | int length; 189 | struct stat mystat; 190 | void *data; 191 | 192 | memory = UDataMemory_createNewInstance(status); 193 | if (U_FAILURE(*status)) { 194 | HBLogError(@"[ICUHack] udata_open_custom instance failed with error %s", u_errorName(*status)); 195 | return; 196 | } 197 | 198 | UDataMemory_init(memory); 199 | 200 | const char *path = UEMOJI_PATH; 201 | if (stat(path, &mystat) != 0 || mystat.st_size <= 0) { 202 | path = xinaPath; 203 | if (stat(path, &mystat) != 0 || mystat.st_size <= 0) { 204 | *status = U_FILE_ACCESS_ERROR; // custom 205 | HBLogError(@"[ICUHack] udata_open_custom stat() failed with error %d", errno); 206 | return; 207 | } 208 | } 209 | length = mystat.st_size; 210 | 211 | fd = open(path, O_RDONLY); 212 | if (fd == -1) { 213 | *status = U_FILE_ACCESS_ERROR; // custom 214 | HBLogError(@"[ICUHack] udata_open_custom open() failed with error %d", errno); 215 | return; 216 | } 217 | 218 | data = mmap(0, length, PROT_READ, MAP_SHARED, fd, 0); 219 | close(fd); 220 | if (data == MAP_FAILED) { 221 | *status = U_FILE_ACCESS_ERROR; // custom 222 | HBLogError(@"[ICUHack] udata_open_custom mmap() failed"); 223 | return; 224 | } 225 | 226 | memory->map = (char *)data + length; 227 | memory->pHeader = (const DataHeader *)data; 228 | memory->mapAddr = data; 229 | #if U_PLATFORM == U_PF_IPHONE 230 | posix_madvise(data, length, POSIX_MADV_RANDOM); 231 | #endif 232 | } 233 | 234 | static void EmojiProps_load(UErrorCode &errorCode) { 235 | udata_open_custom(&errorCode); 236 | if (U_FAILURE(errorCode)) { 237 | return; 238 | } 239 | const uint8_t *inBytes = (const uint8_t *)udata_getMemory(memory); 240 | const int32_t *inIndexes = (const int32_t *)inBytes; 241 | int32_t indexesLength = inIndexes[IX_CPTRIE_OFFSET] / 4; 242 | if (indexesLength <= IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET) { 243 | errorCode = U_INVALID_FORMAT_ERROR; // Not enough indexes. 244 | HBLogError(@"[ICUHack] EmojiProps_load invalid format error"); 245 | return; 246 | } 247 | 248 | int32_t i = IX_CPTRIE_OFFSET; 249 | int32_t offset = inIndexes[i++]; 250 | int32_t nextOffset = inIndexes[i]; 251 | cpTrie = ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_8, 252 | inBytes + offset, nextOffset - offset, nullptr, &errorCode); 253 | if (U_FAILURE(errorCode)) { 254 | HBLogError(@"[ICUHack] ucptrie_openFromBinary failed"); 255 | return; 256 | } 257 | } 258 | 259 | #ifndef UCHAR_RGI_EMOJI 260 | #define UCHAR_RGI_EMOJI 71 261 | #endif 262 | 263 | static UBool EmojiProps_hasBinaryPropertyImpl(UChar32 c, UProperty which) { 264 | if (which < UCHAR_EMOJI || UCHAR_RGI_EMOJI < which) { 265 | return false; 266 | } 267 | // Note: UCHAR_REGIONAL_INDICATOR is a single, hardcoded range implemented elsewhere. 268 | static constexpr int8_t bitFlags[] = { 269 | BIT_EMOJI, // UCHAR_EMOJI=57 270 | BIT_EMOJI_PRESENTATION, // UCHAR_EMOJI_PRESENTATION=58 271 | BIT_EMOJI_MODIFIER, // UCHAR_EMOJI_MODIFIER=59 272 | BIT_EMOJI_MODIFIER_BASE, // UCHAR_EMOJI_MODIFIER_BASE=60 273 | BIT_EMOJI_COMPONENT, // UCHAR_EMOJI_COMPONENT=61 274 | -1, // UCHAR_REGIONAL_INDICATOR=62 275 | -1, // UCHAR_PREPENDED_CONCATENATION_MARK=63 276 | BIT_EXTENDED_PICTOGRAPHIC, // UCHAR_EXTENDED_PICTOGRAPHIC=64 277 | BIT_BASIC_EMOJI, // UCHAR_BASIC_EMOJI=65 278 | -1, // UCHAR_EMOJI_KEYCAP_SEQUENCE=66 279 | -1, // UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE=67 280 | -1, // UCHAR_RGI_EMOJI_FLAG_SEQUENCE=68 281 | -1, // UCHAR_RGI_EMOJI_TAG_SEQUENCE=69 282 | -1, // UCHAR_RGI_EMOJI_ZWJ_SEQUENCE=70 283 | BIT_BASIC_EMOJI, // UCHAR_RGI_EMOJI=71 284 | }; 285 | int32_t bit = bitFlags[which - UCHAR_EMOJI]; 286 | if (bit < 0) { 287 | return false; // not a property that we support in this function 288 | } 289 | uint8_t bits = UCPTRIE_FAST_GET(cpTrie, UCPTRIE_8, c); 290 | return (bits >> bit) & 1; 291 | } 292 | 293 | #define _UTRIE2_INDEX_FROM_SUPP(trieIndex, c) \ 294 | (((int32_t)((trieIndex)[ \ 295 | (trieIndex)[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+ \ 296 | ((c)>>UTRIE2_SHIFT_1)]+ \ 297 | (((c)>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK)]) \ 298 | <>UTRIE2_SHIFT_2)]) \ 302 | <index, c) : \ 307 | (uint32_t)(c)<=0xffff ? \ 308 | _UTRIE2_INDEX_RAW( \ 309 | (c)<=0xdbff ? UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2) : 0, \ 310 | (trie)->index, c) : \ 311 | (uint32_t)(c)>0x10ffff ? \ 312 | (asciiOffset)+UTRIE2_BAD_UTF8_DATA_OFFSET : \ 313 | (c)>=(trie)->highStart ? \ 314 | (trie)->highValueIndex : \ 315 | _UTRIE2_INDEX_FROM_SUPP((trie)->index, c)) 316 | #define _UTRIE2_GET(trie, data, asciiOffset, c) \ 317 | (trie)->data[_UTRIE2_INDEX_FROM_CP(trie, asciiOffset, c)] 318 | #define UTRIE2_GET16(trie, c) _UTRIE2_GET((trie), index, (trie)->indexLength, (c)) 319 | 320 | %group getUnicodeProperties 321 | 322 | uint32_t (*u_getUnicodeProperties)(UChar32, int32_t) = NULL; 323 | %hookf(uint32_t, u_getUnicodeProperties, UChar32 c, int32_t column) { 324 | if (column >= propsVectorsColumns) 325 | return 0; 326 | uint16_t vecIndex = UTRIE2_GET16(&propsVectorsTrie, c); 327 | return propsVectors[vecIndex + column]; 328 | } 329 | 330 | %end 331 | 332 | %group hasBinaryProperty 333 | 334 | %hookf(UBool, u_hasBinaryProperty, UChar32 c, UProperty which) { 335 | return EmojiProps_hasBinaryPropertyImpl(c, which) || %orig; 336 | } 337 | 338 | %end 339 | 340 | %group inlineEmojiData 341 | 342 | %hookf(UDataMemory *, udata_openChoice, const char *path, const char *type, const char *name, UDataMemoryIsAcceptable *isAcceptable, void *context, UErrorCode *pErrorCode) { 343 | if (type && name && strcmp(type, "icu") == 0 && strcmp(name, "uemoji") == 0) { 344 | udata_open_custom(pErrorCode); 345 | return memory; 346 | } 347 | return %orig; 348 | } 349 | 350 | %end 351 | 352 | %ctor { 353 | MSImageRef ref = MSGetImageByName(realPath2(@"/usr/lib/libicucore.A.dylib")); 354 | #ifdef __LP64__ 355 | #if TARGET_OS_SIMULATOR 356 | // Unique bytes (iOS 13.5): E03F01C8 89C0488D 0D15CC1B (offset: 100) 357 | // Unique bytes (iOS 12.4): 0583E03F 01C889C0 488D0DB4 (offset: 100) 358 | // Unique bytes (iOS 8.2) : 554889E5 31C083FE 027F5C81 (offset: 0) 359 | // Starting byte (iOS 13.5): 0x31 360 | // Starting byte (iOS 12.4): 0x55 361 | // Starting byte (iOS 8.2) : 0x55 362 | void *rp = libundirect_find(@"libicucore.A.dylib", (unsigned char[]){0xE0, 0x3F, 0x01, 0xC8, 0x89, 0xC0, 0x48, 0x8D, 0x0D, 0x15, 0xCC, 0x1B}, 12, 0x31); 363 | if (rp == NULL) 364 | rp = libundirect_find(@"libicucore.A.dylib", (unsigned char[]){0x05, 0x83, 0xE0, 0x3F, 0x01, 0xC8, 0x89, 0xC0, 0x48, 0x8D, 0x0D, 0xB4}, 12, 0x55); 365 | if (rp == NULL) 366 | rp = libundirect_find(@"libicucore.A.dylib", (unsigned char[]){0x55, 0x48, 0x89, 0xE5, 0x31, 0xC0, 0x83, 0xFE, 0x02, 0x7F, 0x5C, 0x81}, 12, 0x55) 367 | #else 368 | // Unique bytes: 3F080071 6D000054 00008052 C0035FD6 (offset: 0) 369 | // Starting byte: 0x3F 370 | void *rp = libundirect_find(@"libicucore.A.dylib", (unsigned char[]){0x3F, 0x08, 0x00, 0x71, 0x6D, 0x00, 0x00, 0x54, 0x00, 0x00, 0x80, 0x52, 0xC0, 0x03, 0x5F, 0xD6}, 16, 0x3F); 371 | #endif 372 | #else 373 | const uint8_t *p = (const uint8_t *)MSFindSymbol(ref, "_u_isUAlphabetic"); 374 | void *rp = (void *)((const uint8_t *)p + 0x16); 375 | #endif 376 | u_getUnicodeProperties = (uint32_t (*)(UChar32, int32_t))rp; 377 | HBLogDebug(@"[ICUHack] u_getUnicodeProperties found %d", u_getUnicodeProperties != NULL); 378 | if (u_getUnicodeProperties) { 379 | %init(getUnicodeProperties); 380 | } 381 | if (IS_IOS_OR_NEWER(iOS_15_4)) { 382 | HBLogDebug(@"[ICUHack] Hooking inline emoji data"); 383 | %init(inlineEmojiData); 384 | } else { 385 | ucptrie_openFromBinary = (UCPTrie *(*)(UCPTrieType, UCPTrieValueWidth, const void *, int32_t, int32_t *, UErrorCode *))_PSFindSymbolCallable(ref, "_ucptrie_openFromBinary"); 386 | ucptrie_internalSmallIndex = (int32_t (*)(const UCPTrie *, UChar32))_PSFindSymbolCallable(ref, "_ucptrie_internalSmallIndex"); 387 | ucptrie_close = (void (*)(UCPTrie *))_PSFindSymbolCallable(ref, "_ucptrie_close"); 388 | if (ucptrie_openFromBinary == NULL) 389 | ucptrie_openFromBinary = legacy_ucptrie_openFromBinary; 390 | if (ucptrie_internalSmallIndex == NULL) 391 | ucptrie_internalSmallIndex = legacy_ucptrie_internalSmallIndex; 392 | if (ucptrie_close == NULL) 393 | ucptrie_close = legacy_ucptrie_close; 394 | HBLogDebug(@"[ICUHack] ucptrie_openFromBinary found: %d", ucptrie_openFromBinary != NULL); 395 | HBLogDebug(@"[ICUHack] ucptrie_internalSmallIndex found: %d", ucptrie_internalSmallIndex != NULL); 396 | HBLogDebug(@"[ICUHack] ucptrie_close found: %d", ucptrie_close != NULL); 397 | UErrorCode errorCode = U_ZERO_ERROR; 398 | EmojiProps_load(errorCode); 399 | if (U_FAILURE(errorCode)) { 400 | HBLogDebug(@"[ICUHack] Failed to load uemoji.icu because %s", u_errorName(errorCode)); 401 | return; 402 | } 403 | HBLogDebug(@"[ICUHack] Hooking hasBinaryProperty"); 404 | %init(hasBinaryProperty); 405 | } 406 | } 407 | 408 | %dtor { 409 | if (memory) 410 | udata_close(memory); 411 | if (cpTrie) 412 | ucptrie_close(cpTrie); 413 | } -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PACKAGE_VERSION = 1.9.0~b3 2 | 3 | ifeq ($(SIMULATOR),1) 4 | TARGET = simulator:clang:latest:8.0 5 | ARCHS = arm64 x86_64 6 | else 7 | ifeq ($(THEOS_PACKAGE_SCHEME),rootless) 8 | TARGET = iphone:clang:16.5:15.0 9 | else ifeq ($(THEOS_PACKAGE_SCHEME),roothide) 10 | TARGET = iphone:clang:16.5:15.0 11 | else 12 | TARGET = iphone:clang:14.5:5.0 13 | export PREFIX = $(THEOS)/toolchain/Xcode11.xctoolchain/usr/bin/ 14 | endif 15 | endif 16 | 17 | include $(THEOS)/makefiles/common.mk 18 | 19 | LIBRARY_NAME = EmojiAttributes 20 | $(LIBRARY_NAME)_INSTALL_PATH = /Library/MobileSubstrate/DynamicLibraries/EmojiPort 21 | $(LIBRARY_NAME)_FILES = ICUHack.xm CoreTextHack.xm 22 | ifeq ($(THEOS_PACKAGE_SCHEME),) 23 | $(LIBRARY_NAME)_FILES += CoreFoundationHack.xm TextInputHack.xm WebCoreHack.xm EmojiSizeFix.xm 24 | endif 25 | $(LIBRARY_NAME)_CCFLAGS = -std=c++11 -stdlib=libc++ 26 | $(LIBRARY_NAME)_EXTRA_FRAMEWORKS = CydiaSubstrate 27 | $(LIBRARY_NAME)_LIBRARIES = icucore undirect 28 | $(LIBRARY_NAME)_USE_SUBSTRATE = 1 29 | $(LIBRARY_NAME)_GENERATOR = MobileSubstrate 30 | 31 | include $(THEOS_MAKE_PATH)/library.mk 32 | 33 | ifeq ($(SIMULATOR),1) 34 | setup:: clean all 35 | @rm -f /opt/simject/$(LIBRARY_NAME).dylib 36 | @cp -v $(THEOS_OBJ_DIR)/$(LIBRARY_NAME).dylib /opt/simject/$(LIBRARY_NAME).dylib 37 | @cp -v $(PWD)/$(LIBRARY_NAME).plist /opt/simject/$(LIBRARY_NAME).plist 38 | endif 39 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # EmojiAttributes 2 | 3 | Various under-the-hood fixes for emoji display. 4 | 5 | # Technical Information 6 | 7 | ## CoreText 8 | This framework is an intermediate layer between text display and text representation in iOS. It mainly handles character sets of such supported fonts, including emoji. 9 | 10 | ### Character Set Addition 11 | Emoji character set is cached in bitmap format and retrievable from `CreateCharacterSetForFont()`. The set changes from version to version of iOS. We override this with the latest character set. To get the character set needed, we dump one from `libGSFontCache.dylib` which is what [EmojiCategory](https://github.com/PoomSmart/EmojiCategory) does. Without this override, emojis can be all shadowed (black out). 12 | 13 | ### Emoji Presentation Addition (iOS 11+) 14 | As of iOS 11, a weird function `IsDefaultEmojiPresentation()` seems to determine which emojis are really supported by the system before showing them. The said representation is an array of emoji strings that can be easily overridden. 15 | 16 | ## CoreFoundation 17 | This framework handles display emojis in most native (UIKit) applications. `CFStringGetRangeOfCharacterClusterAtIndex()` consults the cached emoji character set to determine the index of the given character, taking into account that it can be one of the characters (cluster) of one single emoji. We as well override the character set. Without this workaround, unsupported emojis will be rendered as blank or "?" icon. 18 | 19 | ## WebCore 20 | This framework does a lot of things to displaying content in websites, including displaying emojis in web pages. Until at some point in the past, Apple hardcoded all emoji unicodes in here for iteration through characters in a webpage to apply a compatible (emoji) font for them. For present days, read ICU section below. Without this hack, emojis will be displayed as blank rectangles. 21 | 22 | ## TextInput (iOS < 10) 23 | `-[NSString(TIExtras) _containsEmoji]` involves opening the emoji bitmap file `TIUserDictionaryEmojiCharacterSet.bitmap` residing in `/System/Library/Frameworks/TextInput.framework`. It simply needs to be replaced by the most recent bitmap so that such applications that perform checking emoji substrings will perform correctly. 24 | 25 | ## ICU 26 | Apple has transitioned to be relying more on ICU API when it has to deal with emojis. Instead of hardcoding emoji codepoints in `CoreFoundation` framework, it directly consults ICU which already has similar information. At a high level, ICU embeds "props" data related to emojis inside `libicucore.A.dylib`. EmojiAttributes flexes its best to redirect readings of those data to be from its own. 27 | 28 | ## Emoji Size Fix (iOS 6 - 9) 29 | Remove WebCore/CoreText emoji size restriction. See [here](https://emojier.com/faq/15122z-ios-small-font-size-emoji-hell). 30 | -------------------------------------------------------------------------------- /TextInputHack.xm: -------------------------------------------------------------------------------- 1 | #if !__arm64e__ 2 | 3 | #import 4 | #import 5 | 6 | %hook NSBundle 7 | 8 | - (NSString *)pathForResource:(NSString *)resourceName ofType:(NSString *)resourceType { 9 | if ([resourceName isEqualToString:@"TIUserDictionaryEmojiCharacterSet"]) { 10 | NSBundle *bundle = [[self class] bundleWithPath:@"/Library/Application Support/EmojiAttributes"]; 11 | return [bundle pathForResource:@"emoji" ofType:@"bitmap"]; 12 | } 13 | return %orig; 14 | } 15 | 16 | %end 17 | 18 | %ctor { 19 | if (IS_IOS_OR_NEWER(iOS_10_0)) 20 | return; 21 | dlopen(realPath2(@"/System/Library/PrivateFrameworks/TextInput.framework/TextInput"), RTLD_LAZY); 22 | %init; 23 | } 24 | 25 | #endif -------------------------------------------------------------------------------- /WebCoreHack.xm: -------------------------------------------------------------------------------- 1 | #if !__arm64e__ 2 | 3 | #import 4 | #import "WebCoreSupport/CharactersProperties.h" 5 | #import "WebCoreSupport/RenderText.h" 6 | #import "WebCoreSupport/CoreGraphicsSPI.h" 7 | #import 8 | #import 9 | #import 10 | 11 | #include 12 | 13 | %config(generator=MobileSubstrate) 14 | 15 | enum CodePath { 16 | Auto, Simple, Complex, SimpleWithGlyphOverflow 17 | }; 18 | 19 | using namespace WebCore; 20 | using namespace WTF; 21 | 22 | bool (*isCJKIdeograph)(UChar32); 23 | %hookf(bool, isCJKIdeograph, UChar32 c) { 24 | if (c >= 0x4E00 && c <= 0x9FFF) 25 | return true; 26 | if (c >= 0x3400 && c <= 0x4DBF) 27 | return true; 28 | if (c >= 0x2E80 && c <= 0x2EFF) 29 | return true; 30 | if (c >= 0x2F00 && c <= 0x2FDF) 31 | return true; 32 | if (c >= 0x31C0 && c <= 0x31EF) 33 | return true; 34 | if (c >= 0xF900 && c <= 0xFAFF) 35 | return true; 36 | if (c >= 0x20000 && c <= 0x2A6DF) 37 | return true; 38 | if (c >= 0x2A700 && c <= 0x2B73F) 39 | return true; 40 | if (c >= 0x2B740 && c <= 0x2B81F) 41 | return true; 42 | if (c >= 0x2F800 && c <= 0x2FA1F) 43 | return true; 44 | return false; 45 | } 46 | 47 | bool (*isCJKIdeographOrSymbol)(UChar32); 48 | %hookf(bool, isCJKIdeographOrSymbol, UChar32 c) { 49 | if ((c == 0x2C7) || (c == 0x2CA) || (c == 0x2CB) || (c == 0x2D9)) 50 | return true; 51 | if ((c == 0x2020) || (c == 0x2021) || (c == 0x2030) || (c == 0x203B) || (c == 0x203C) 52 | || (c == 0x2042) || (c == 0x2047) || (c == 0x2048) || (c == 0x2049) || (c == 0x2051) 53 | || (c == 0x20DD) || (c == 0x20DE) || (c == 0x2100) || (c == 0x2103) || (c == 0x2105) 54 | || (c == 0x2109) || (c == 0x210A) || (c == 0x2113) || (c == 0x2116) || (c == 0x2121) 55 | || (c == 0x212B) || (c == 0x213B) || (c == 0x2150) || (c == 0x2151) || (c == 0x2152)) 56 | return true; 57 | if (c >= 0x2156 && c <= 0x215A) 58 | return true; 59 | if (c >= 0x2160 && c <= 0x216B) 60 | return true; 61 | if (c >= 0x2170 && c <= 0x217B) 62 | return true; 63 | if ((c == 0x217F) || (c == 0x2189) || (c == 0x2307) || (c == 0x2312) || (c == 0x23BE) || (c == 0x23BF)) 64 | return true; 65 | if (c >= 0x23C0 && c <= 0x23CC) 66 | return true; 67 | if ((c == 0x23CE) || (c == 0x2423)) 68 | return true; 69 | if (c >= 0x2460 && c <= 0x2492) 70 | return true; 71 | if (c >= 0x249C && c <= 0x24FF) 72 | return true; 73 | if ((c == 0x25A0) || (c == 0x25A1) || (c == 0x25A2) || (c == 0x25AA) || (c == 0x25AB)) 74 | return true; 75 | if ((c == 0x25B1) || (c == 0x25B2) || (c == 0x25B3) || (c == 0x25B6) || (c == 0x25B7) || (c == 0x25BC) || (c == 0x25BD)) 76 | return true; 77 | if ((c == 0x25C0) || (c == 0x25C1) || (c == 0x25C6) || (c == 0x25C7) || (c == 0x25C9) || (c == 0x25CB) || (c == 0x25CC)) 78 | return true; 79 | if (c >= 0x25CE && c <= 0x25D3) 80 | return true; 81 | if (c >= 0x25E2 && c <= 0x25E6) 82 | return true; 83 | if (c == 0x25EF) 84 | return true; 85 | if (c >= 0x2600 && c <= 0x2603) 86 | return true; 87 | if ((c == 0x2605) || (c == 0x2606) || (c == 0x260E) || (c == 0x2616) || (c == 0x2617) || (c == 0x2640) || (c == 0x2642)) 88 | return true; 89 | if (c >= 0x2660 && c <= 0x266F) 90 | return true; 91 | if (c >= 0x2672 && c <= 0x267D) 92 | return true; 93 | if ((c == 0x26A0) || (c == 0x26BD) || (c == 0x26BE) || (c == 0x2713) || (c == 0x271A) || (c == 0x273F) || (c == 0x2740) || (c == 0x2756)) 94 | return true; 95 | if (c >= 0x2776 && c <= 0x277F) 96 | return true; 97 | if (c == 0x2B1A) 98 | return true; 99 | if (c >= 0x2FF0 && c <= 0x2FFF) 100 | return true; 101 | if (c >= 0x3000 && c < 0x3030) 102 | return true; 103 | if (c > 0x3030 && c <= 0x303F) 104 | return true; 105 | if (c >= 0x3040 && c <= 0x309F) 106 | return true; 107 | if (c >= 0x30A0 && c <= 0x30FF) 108 | return true; 109 | if (c >= 0x3100 && c <= 0x312F) 110 | return true; 111 | if (c >= 0x3190 && c <= 0x319F) 112 | return true; 113 | if (c >= 0x31A0 && c <= 0x31BF) 114 | return true; 115 | if (c >= 0x3200 && c <= 0x32FF) 116 | return true; 117 | if (c >= 0x3300 && c <= 0x33FF) 118 | return true; 119 | if (c >= 0xF860 && c <= 0xF862) 120 | return true; 121 | if (c >= 0xFE30 && c <= 0xFE4F) 122 | return true; 123 | if ((c == 0xFE10) || (c == 0xFE11) || (c == 0xFE12) || (c == 0xFE19)) 124 | return true; 125 | if ((c == 0xFF0D) || (c == 0xFF1B) || (c == 0xFF1C) || (c == 0xFF1E)) 126 | return false; 127 | if (c >= 0xFF00 && c <= 0xFFEF) 128 | return true; 129 | if (c == 0x1F100) 130 | return true; 131 | if (c >= 0x1F110 && c <= 0x1F129) 132 | return true; 133 | if (c >= 0x1F130 && c <= 0x1F149) 134 | return true; 135 | if (c >= 0x1F150 && c <= 0x1F169) 136 | return true; 137 | if (c >= 0x1F170 && c <= 0x1F189) 138 | return true; 139 | if (c >= 0x1F200 && c <= 0x1F6C5) 140 | return true; 141 | return isCJKIdeograph(c); 142 | } 143 | 144 | String (*RenderText_originalText)(void *); 145 | int (*RenderText_previousOffsetForBackwardDeletion)(void *, int); 146 | %hookf(int, RenderText_previousOffsetForBackwardDeletion, void *arg0, int current) { 147 | String m_text = RenderText_originalText(arg0); 148 | StringImpl& text = *m_text.impl(); 149 | bool sawRegionalIndicator = false; 150 | bool sawEmojiGroupCandidate = false; 151 | bool sawEmojiFitzpatrickModifier = false; 152 | while (current > 0) { 153 | UChar32 character; 154 | U16_PREV(text, 0, current, character); 155 | if (sawEmojiGroupCandidate) { 156 | sawEmojiGroupCandidate = false; 157 | if (character == zeroWidthJoiner) 158 | continue; 159 | U16_FWD_1_UNSAFE(text, current); 160 | break; 161 | } 162 | if (sawEmojiFitzpatrickModifier) { 163 | if (isEmojiFitzpatrickModifier(character)) { 164 | U16_FWD_1_UNSAFE(text, current); 165 | break; 166 | } 167 | if (!isVariationSelector(character)) 168 | break; 169 | } 170 | if (sawRegionalIndicator) { 171 | if (isEmojiRegionalIndicator(character)) 172 | break; 173 | U16_FWD_1_UNSAFE(text, current); 174 | } 175 | if (isInArmenianToLimbuRange(character)) 176 | break; 177 | if (isEmojiRegionalIndicator(character)) { 178 | sawRegionalIndicator = true; 179 | continue; 180 | } 181 | if (isEmojiFitzpatrickModifier(character)) { 182 | sawEmojiFitzpatrickModifier = true; 183 | continue; 184 | } 185 | if (isEmojiGroupCandidate(character)) { 186 | sawEmojiGroupCandidate = true; 187 | continue; 188 | } 189 | if (!isMark(character) && character != 0xFF9E && character != 0xFF9F) 190 | break; 191 | } 192 | if (current <= 0) 193 | return current; 194 | UChar character = text[current]; 195 | if ((character >= hangulChoseongStart && character <= hangulJongseongEnd) || (character >= hangulSyllableStart && character <= hangulSyllableEnd)) { 196 | HangulState state; 197 | if (character < hangulJungseongStart) 198 | state = HangulState::L; 199 | else if (character < hangulJongseongStart) 200 | state = HangulState::V; 201 | else if (character < hangulSyllableStart) 202 | state = HangulState::T; 203 | else 204 | state = isHangulLVT(character) ? HangulState::LVT : HangulState::LV; 205 | while (current > 0 && (character = text[current - 1]) >= hangulChoseongStart && character <= hangulSyllableEnd && (character <= hangulJongseongEnd || character >= hangulSyllableStart)) { 206 | switch (state) { 207 | case HangulState::V: 208 | if (character <= hangulChoseongEnd) 209 | state = HangulState::L; 210 | else if (character >= hangulSyllableStart && character <= hangulSyllableEnd && !isHangulLVT(character)) 211 | state = HangulState::LV; 212 | else if (character > hangulJungseongEnd) 213 | state = HangulState::Break; 214 | break; 215 | case HangulState::T: 216 | if (character >= hangulJungseongStart && character <= hangulJungseongEnd) 217 | state = HangulState::V; 218 | else if (character >= hangulSyllableStart && character <= hangulSyllableEnd) 219 | state = isHangulLVT(character) ? HangulState::LVT : HangulState::LV; 220 | else if (character < hangulJungseongStart) 221 | state = HangulState::Break; 222 | break; 223 | default: 224 | state = (character < hangulJungseongStart) ? HangulState::L : HangulState::Break; 225 | break; 226 | } 227 | if (state == HangulState::Break) 228 | break; 229 | --current; 230 | } 231 | } 232 | return current; 233 | } 234 | 235 | CodePath (*characterRangeCodePath)(const UChar *, unsigned); 236 | %hookf(CodePath, characterRangeCodePath, const UChar *characters, unsigned len) { 237 | CodePath result = Simple; 238 | bool previousCharacterIsEmojiGroupCandidate = false; 239 | for (unsigned i = 0; i < len; ++i) { 240 | const UChar c = characters[i]; 241 | if (c == zeroWidthJoiner && previousCharacterIsEmojiGroupCandidate) 242 | return Complex; 243 | 244 | previousCharacterIsEmojiGroupCandidate = false; 245 | if (c < 0x2E5) // U+02E5 through U+02E9 (Modifier Letters : Tone letters) 246 | continue; 247 | if (c <= 0x2E9) 248 | return Complex; 249 | 250 | if (c < 0x300) // U+0300 through U+036F Combining diacritical marks 251 | continue; 252 | if (c <= 0x36F) 253 | return Complex; 254 | 255 | if (c < 0x0591 || c == 0x05BE) // U+0591 through U+05CF excluding U+05BE Hebrew combining marks, Hebrew punctuation Paseq, Sof Pasuq and Nun Hafukha 256 | continue; 257 | if (c <= 0x05CF) 258 | return Complex; 259 | 260 | // U+0600 through U+109F Arabic, Syriac, Thaana, NKo, Samaritan, Mandaic, 261 | // Devanagari, Bengali, Gurmukhi, Gujarati, Oriya, Tamil, Telugu, Kannada, 262 | // Malayalam, Sinhala, Thai, Lao, Tibetan, Myanmar 263 | if (c < 0x0600) 264 | continue; 265 | if (c <= 0x109F) 266 | return Complex; 267 | 268 | // U+1100 through U+11FF Hangul Jamo (only Ancient Korean should be left here if you precompose; 269 | // Modern Korean will be precomposed as a result of step A) 270 | if (c < 0x1100) 271 | continue; 272 | if (c <= 0x11FF) 273 | return Complex; 274 | 275 | if (c < 0x135D) // U+135D through U+135F Ethiopic combining marks 276 | continue; 277 | if (c <= 0x135F) 278 | return Complex; 279 | 280 | if (c < 0x1700) // U+1780 through U+18AF Tagalog, Hanunoo, Buhid, Taghanwa,Khmer, Mongolian 281 | continue; 282 | if (c <= 0x18AF) 283 | return Complex; 284 | 285 | if (c < 0x1900) // U+1900 through U+194F Limbu (Unicode 4.0) 286 | continue; 287 | if (c <= 0x194F) 288 | return Complex; 289 | 290 | if (c < 0x1980) // U+1980 through U+19DF New Tai Lue 291 | continue; 292 | if (c <= 0x19DF) 293 | return Complex; 294 | 295 | if (c < 0x1A00) // U+1A00 through U+1CFF Buginese, Tai Tham, Balinese, Batak, Lepcha, Vedic 296 | continue; 297 | if (c <= 0x1CFF) 298 | return Complex; 299 | 300 | if (c < 0x1DC0) // U+1DC0 through U+1DFF Comining diacritical mark supplement 301 | continue; 302 | if (c <= 0x1DFF) 303 | return Complex; 304 | 305 | // U+1E00 through U+2000 characters with diacritics and stacked diacritics 306 | if (c <= 0x2000) { 307 | result = SimpleWithGlyphOverflow; 308 | continue; 309 | } 310 | 311 | if (c < 0x20D0) // U+20D0 through U+20FF Combining marks for symbols 312 | continue; 313 | if (c <= 0x20FF) 314 | return Complex; 315 | 316 | if (c < 0x26F9) 317 | continue; 318 | if (c < 0x26FA) 319 | return Complex; 320 | 321 | if (c < 0x2CEF) // U+2CEF through U+2CF1 Combining marks for Coptic 322 | continue; 323 | if (c <= 0x2CF1) 324 | return Complex; 325 | 326 | if (c < 0x302A) // U+302A through U+302F Ideographic and Hangul Tone marks 327 | continue; 328 | if (c <= 0x302F) 329 | return Complex; 330 | 331 | if (c < 0xA67C) // U+A67C through U+A67D Combining marks for old Cyrillic 332 | continue; 333 | if (c <= 0xA67D) 334 | return Complex; 335 | 336 | if (c < 0xA6F0) // U+A6F0 through U+A6F1 Combining mark for Bamum 337 | continue; 338 | if (c <= 0xA6F1) 339 | return Complex; 340 | 341 | // U+A800 through U+ABFF Nagri, Phags-pa, Saurashtra, Devanagari Extended, 342 | // Hangul Jamo Ext. A, Javanese, Myanmar Extended A, Tai Viet, Meetei Mayek, 343 | if (c < 0xA800) 344 | continue; 345 | if (c <= 0xABFF) 346 | return Complex; 347 | 348 | if (c < 0xD7B0) // U+D7B0 through U+D7FF Hangul Jamo Ext. B 349 | continue; 350 | if (c <= 0xD7FF) 351 | return Complex; 352 | 353 | if (c <= 0xDBFF) { 354 | // High surrogate 355 | 356 | if (i == len - 1) 357 | continue; 358 | 359 | UChar next = characters[++i]; 360 | if (!U16_IS_TRAIL(next)) 361 | continue; 362 | 363 | UChar32 supplementaryCharacter = U16_GET_SUPPLEMENTARY(c, next); 364 | 365 | if (supplementaryCharacter < 0x10A00) 366 | continue; 367 | if (supplementaryCharacter < 0x10A60) // Kharoshthi 368 | return Complex; 369 | if (supplementaryCharacter < 0x11000) 370 | continue; 371 | if (supplementaryCharacter < 0x11080) // Brahmi 372 | return Complex; 373 | if (supplementaryCharacter < 0x110D0) // Kaithi 374 | return Complex; 375 | if (supplementaryCharacter < 0x11100) 376 | continue; 377 | if (supplementaryCharacter < 0x11150) // Chakma 378 | return Complex; 379 | if (supplementaryCharacter < 0x11180) // Mahajani 380 | return Complex; 381 | if (supplementaryCharacter < 0x111E0) // Sharada 382 | return Complex; 383 | if (supplementaryCharacter < 0x11200) 384 | continue; 385 | if (supplementaryCharacter < 0x11250) // Khojki 386 | return Complex; 387 | if (supplementaryCharacter < 0x112B0) 388 | continue; 389 | if (supplementaryCharacter < 0x11300) // Khudawadi 390 | return Complex; 391 | if (supplementaryCharacter < 0x11380) // Grantha 392 | return Complex; 393 | if (supplementaryCharacter < 0x11400) 394 | continue; 395 | if (supplementaryCharacter < 0x11480) // Newa 396 | return Complex; 397 | if (supplementaryCharacter < 0x114E0) // Tirhuta 398 | return Complex; 399 | if (supplementaryCharacter < 0x11580) 400 | continue; 401 | if (supplementaryCharacter < 0x11600) // Siddham 402 | return Complex; 403 | if (supplementaryCharacter < 0x11660) // Modi 404 | return Complex; 405 | if (supplementaryCharacter < 0x11680) 406 | continue; 407 | if (supplementaryCharacter < 0x116D0) // Takri 408 | return Complex; 409 | if (supplementaryCharacter < 0x11C00) 410 | continue; 411 | if (supplementaryCharacter < 0x11C70) // Bhaiksuki 412 | return Complex; 413 | if (supplementaryCharacter < 0x11CC0) // Marchen 414 | return Complex; 415 | if (supplementaryCharacter < 0x1E900) 416 | continue; 417 | if (supplementaryCharacter < 0x1E960) // Adlam 418 | return Complex; 419 | if (supplementaryCharacter < 0x1F1E6) // U+1F1E6 through U+1F1FF Regional Indicator Symbols 420 | continue; 421 | if (supplementaryCharacter <= 0x1F1FF) 422 | return Complex; 423 | 424 | if (isEmojiFitzpatrickModifier(supplementaryCharacter)) 425 | return Complex; 426 | if (isEmojiGroupCandidate(supplementaryCharacter)) { 427 | previousCharacterIsEmojiGroupCandidate = true; 428 | continue; 429 | } 430 | 431 | if (supplementaryCharacter < 0xE0000) 432 | continue; 433 | if (supplementaryCharacter < 0xE0080) // Tags 434 | return Complex; 435 | if (supplementaryCharacter < 0xE0100) // U+E0100 through U+E01EF Unicode variation selectors. 436 | continue; 437 | if (supplementaryCharacter <= 0xE01EF) 438 | return Complex; 439 | 440 | // FIXME: Check for Brahmi (U+11000 block), Kaithi (U+11080 block) and other Complex scripts 441 | // in plane 1 or higher. 442 | 443 | continue; 444 | } 445 | 446 | if (c < 0xFE00) // U+FE00 through U+FE0F Unicode variation selectors 447 | continue; 448 | if (c <= 0xFE0F) 449 | return Complex; 450 | 451 | if (c < 0xFE20) // U+FE20 through U+FE2F Combining half marks 452 | continue; 453 | if (c <= 0xFE2F) 454 | return Complex; 455 | } 456 | return result; 457 | } 458 | 459 | bool (*advanceByCombiningCharacterSequence)(const UChar *&, const UChar *, UChar32&, unsigned&); 460 | %hookf(bool, advanceByCombiningCharacterSequence, const UChar*&iterator, const UChar* end, UChar32& baseCharacter, unsigned& markCount) { 461 | markCount = 0; 462 | unsigned i = 0; 463 | unsigned remainingCharacters = end - iterator; 464 | U16_NEXT(iterator, i, remainingCharacters, baseCharacter); 465 | iterator = iterator + i; 466 | if (U_IS_SURROGATE(baseCharacter)) 467 | return false; 468 | bool sawEmojiGroupCandidate = isEmojiGroupCandidate(baseCharacter); 469 | bool sawJoiner = false; 470 | bool sawRegionalIndicator = isEmojiRegionalIndicator(baseCharacter); 471 | while (iterator < end) { 472 | UChar32 nextCharacter; 473 | unsigned markLength = 0; 474 | bool shouldContinue = false; 475 | U16_NEXT(iterator, markLength, static_cast(end - iterator), nextCharacter); 476 | if (isVariationSelector(nextCharacter) || isEmojiFitzpatrickModifier(nextCharacter)) 477 | shouldContinue = true; 478 | if (sawRegionalIndicator && isEmojiRegionalIndicator(nextCharacter)) { 479 | shouldContinue = true; 480 | sawRegionalIndicator = false; 481 | } 482 | if (sawJoiner && isEmojiGroupCandidate(nextCharacter)) 483 | shouldContinue = true; 484 | sawJoiner = false; 485 | if (sawEmojiGroupCandidate && nextCharacter == zeroWidthJoiner) { 486 | sawJoiner = true; 487 | shouldContinue = true; 488 | } 489 | if (!shouldContinue && !(U_GET_GC_MASK(nextCharacter) & U_GC_M_MASK)) 490 | break; 491 | markCount += markLength; 492 | iterator += markLength; 493 | } 494 | return true; 495 | } 496 | 497 | %ctor { 498 | if (IS_IOS_OR_NEWER(iOS_10_0)) 499 | return; 500 | MSImageRef ref = MSGetImageByName(realPath2(@"/System/Library/PrivateFrameworks/WebCore.framework/WebCore")); 501 | isCJKIdeograph = (bool (*)(UChar32))MSFindSymbol(ref, "__ZN7WebCore11FontCascade14isCJKIdeographEi"); 502 | if (isCJKIdeograph == NULL) 503 | isCJKIdeograph = (bool (*)(UChar32))MSFindSymbol(ref, "__ZN7WebCore4Font14isCJKIdeographEi"); 504 | HBLogDebug(@"[WebCoreHack] Found isCJKIdeograph: %d", isCJKIdeograph != NULL); 505 | isCJKIdeographOrSymbol = (bool (*)(UChar32))MSFindSymbol(ref, "__ZN7WebCore11FontCascade22isCJKIdeographOrSymbolEi"); 506 | if (isCJKIdeographOrSymbol == NULL) 507 | isCJKIdeographOrSymbol = (bool (*)(UChar32))MSFindSymbol(ref, "__ZN7WebCore4Font22isCJKIdeographOrSymbolEi"); 508 | HBLogDebug(@"[WebCoreHack] Found isCJKIdeographOrSymbol: %d", isCJKIdeographOrSymbol != NULL); 509 | RenderText_originalText = (String (*)(void *))MSFindSymbol(ref, "__ZNK7WebCore10RenderText12originalTextEv"); 510 | HBLogDebug(@"[WebCoreHack] Found RenderText_originalText: %d", RenderText_originalText != NULL); 511 | RenderText_previousOffsetForBackwardDeletion = (int (*)(void *, int))MSFindSymbol(ref, "__ZNK7WebCore10RenderText33previousOffsetForBackwardDeletionEi"); 512 | HBLogDebug(@"[WebCoreHack] Found RenderText_previousOffsetForBackwardDeletion: %d", RenderText_previousOffsetForBackwardDeletion != NULL); 513 | characterRangeCodePath = (CodePath (*)(const UChar *, unsigned))MSFindSymbol(ref, "__ZN7WebCore11FontCascade22characterRangeCodePathEPKDsj"); 514 | if (characterRangeCodePath == NULL) 515 | characterRangeCodePath = (CodePath (*)(const UChar *, unsigned))MSFindSymbol(ref, "__ZN7WebCore11FontCascade22characterRangeCodePathEPKtj"); // missing in iOS 5 516 | if (characterRangeCodePath == NULL) 517 | characterRangeCodePath = (CodePath (*)(const UChar *, unsigned))MSFindSymbol(ref, "__ZN7WebCore4Font22characterRangeCodePathEPKtj"); 518 | HBLogDebug(@"[WebCoreHack] Found characterRangeCodePath: %d", characterRangeCodePath != NULL); 519 | #if __LP64__ || !TARGET_OS_SIMULATOR 520 | advanceByCombiningCharacterSequence = (bool (*)(const UChar *&, const UChar *, UChar32&, unsigned&))MSFindSymbol(ref, "__ZN7WebCoreL35advanceByCombiningCharacterSequenceERPKDsS1_RiRj"); 521 | if (advanceByCombiningCharacterSequence == NULL) 522 | advanceByCombiningCharacterSequence = (bool (*)(const UChar *&, const UChar *, UChar32&, unsigned&))MSFindSymbol(ref, "__ZN7WebCoreL35advanceByCombiningCharacterSequenceERPKtS1_RiRj"); // missing in iOS 5-6 523 | HBLogDebug(@"[WebCoreHack] Found advanceByCombiningCharacterSequence: %d", advanceByCombiningCharacterSequence != NULL); 524 | #endif 525 | %init; 526 | } 527 | 528 | #endif -------------------------------------------------------------------------------- /WebCoreSupport/CharactersProperties.h: -------------------------------------------------------------------------------- 1 | #import 2 | #import 3 | 4 | namespace WebCore { 5 | 6 | static inline bool isEmojiGroupCandidate(UChar32 character) { 7 | switch (static_cast(ublock_getCode(character))) { 8 | case UBLOCK_MISCELLANEOUS_SYMBOLS: 9 | case UBLOCK_DINGBATS: 10 | case UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS: 11 | case UBLOCK_EMOTICONS: 12 | case UBLOCK_TRANSPORT_AND_MAP_SYMBOLS: 13 | case UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS: 14 | case UBLOCK_SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A: 15 | return true; 16 | default: 17 | return false; 18 | } 19 | } 20 | 21 | static inline bool isEmojiFitzpatrickModifier(UChar32 character){ 22 | return character >= 0x1F3FB && character <= 0x1F3FF; 23 | } 24 | 25 | inline bool isVariationSelector(UChar32 character){ 26 | return character >= 0xFE00 && character <= 0xFE0F; 27 | } 28 | 29 | } 30 | 31 | const UChar zeroWidthJoiner = 0x200D; 32 | const UChar hangulChoseongStart = 0x1100; 33 | const UChar hangulChoseongEnd = 0x115F; 34 | const UChar hangulJungseongStart = 0x1160; 35 | const UChar hangulJungseongEnd = 0x11A2; 36 | const UChar hangulJongseongStart = 0x11A8; 37 | const UChar hangulJongseongEnd = 0x11F9; 38 | const UChar hangulSyllableStart = 0xAC00; 39 | const UChar hangulSyllableEnd = 0xD7AF; 40 | const UChar hangulJongseongCount = 28; 41 | 42 | enum class HangulState { 43 | L, V, T, LV, LVT, Break 44 | }; 45 | 46 | static inline bool isHangulLVT(UChar character){ 47 | return (character - hangulSyllableStart) % hangulJongseongCount; 48 | } 49 | 50 | static inline bool isMark(UChar32 character){ 51 | return U_GET_GC_MASK(character) & U_GC_M_MASK; 52 | } 53 | 54 | inline bool isEmojiRegionalIndicator(UChar32 character) { 55 | return character >= 0x1F1E6 && character <= 0x1F1FF; 56 | } 57 | 58 | // inline bool isEmojiWithPresentationByDefault(UChar32 character) { 59 | // return u_hasBinaryProperty(character, UCHAR_EMOJI_PRESENTATION); 60 | // } 61 | 62 | // inline bool isEmojiModifierBase(UChar32 character) { 63 | // return u_hasBinaryProperty(character, UCHAR_EMOJI_MODIFIER_BASE); 64 | // } 65 | 66 | static inline bool isInArmenianToLimbuRange(UChar32 character){ 67 | return character >= 0x0530 && character < 0x1950; 68 | } 69 | -------------------------------------------------------------------------------- /WebCoreSupport/CoreGraphicsSPI.h: -------------------------------------------------------------------------------- 1 | #import 2 | 3 | enum { 4 | kCGFontRenderingStyleAntialiasing = 1 << 0, 5 | kCGFontRenderingStyleSmoothing = 1 << 1, 6 | kCGFontRenderingStyleSubpixelPositioning = 1 << 2, 7 | kCGFontRenderingStyleSubpixelQuantization = 1 << 3, 8 | kCGFontRenderingStylePlatformNative = 1 << 9, 9 | kCGFontRenderingStyleMask = 0x20F, 10 | }; 11 | typedef uint32_t CGFontRenderingStyle; 12 | 13 | enum { 14 | kCGFontAntialiasingStyleUnfiltered = 0 << 7, 15 | kCGFontAntialiasingStyleFilterLight = 1 << 7, 16 | }; 17 | typedef uint32_t CGFontAntialiasingStyle; 18 | -------------------------------------------------------------------------------- /WebCoreSupport/RefCounted.h: -------------------------------------------------------------------------------- 1 | namespace WTF { 2 | class RefCountedBase { 3 | public: 4 | void ref() const { ++m_refCount; } 5 | bool derefBase() const { 6 | unsigned tempRefCount = m_refCount - 1; 7 | if (!tempRefCount) 8 | return true; 9 | m_refCount = tempRefCount; 10 | return false; 11 | } 12 | protected: 13 | RefCountedBase() : m_refCount(1) { } 14 | private: 15 | mutable unsigned m_refCount; 16 | }; 17 | template class RefCounted : public RefCountedBase { 18 | public: 19 | void deref() const { 20 | if (derefBase()) 21 | delete static_cast(this); 22 | } 23 | protected: 24 | RefCounted() { } 25 | }; 26 | }; -------------------------------------------------------------------------------- /WebCoreSupport/RefPtr.h: -------------------------------------------------------------------------------- 1 | namespace WTF { 2 | 3 | template class PassRefPtr; 4 | 5 | template class RefPtr 6 | { 7 | public: 8 | RefPtr() : m_ptr(0) {} 9 | RefPtr(T *ptr) : m_ptr(ptr) { if (ptr) ptr->ref(); } 10 | RefPtr(const RefPtr& o) : m_ptr(o.m_ptr) { if (T *ptr = m_ptr) ptr->ref(); } 11 | // see comment in PassRefPtr.h for why this takes const reference 12 | template RefPtr(const PassRefPtr&); 13 | 14 | ~RefPtr() { if (T *ptr = m_ptr) ptr->deref(); } 15 | 16 | template RefPtr(const RefPtr& o) : m_ptr(o.get()) { if (T *ptr = m_ptr) ptr->ref(); } 17 | 18 | T *get() const { return m_ptr; } 19 | 20 | PassRefPtr release() { PassRefPtr tmp = adoptRef(m_ptr); m_ptr = 0; return tmp; } 21 | 22 | T& operator*() const { return *m_ptr; } 23 | T *operator->() const { return m_ptr; } 24 | 25 | bool operator!() const { return !m_ptr; } 26 | 27 | // This conversion operator allows implicit conversion to bool but not to other integer types. 28 | typedef T * (RefPtr::*UnspecifiedBoolType)() const; 29 | operator UnspecifiedBoolType() const { return m_ptr ? &RefPtr::get : 0; } 30 | 31 | RefPtr& operator=(const RefPtr&); 32 | RefPtr& operator=(T *); 33 | RefPtr& operator=(const PassRefPtr&); 34 | template RefPtr& operator=(const RefPtr&); 35 | template RefPtr& operator=(const PassRefPtr&); 36 | 37 | void swap(RefPtr&); 38 | 39 | private: 40 | T *m_ptr; 41 | }; 42 | 43 | template template inline RefPtr::RefPtr(const PassRefPtr& o) 44 | : m_ptr(o.release()) 45 | { 46 | } 47 | 48 | template inline RefPtr& RefPtr::operator=(const RefPtr& o) 49 | { 50 | T* optr = o.get(); 51 | if (optr) 52 | optr->ref(); 53 | T* ptr = m_ptr; 54 | m_ptr = optr; 55 | if (ptr) 56 | ptr->deref(); 57 | return *this; 58 | } 59 | 60 | template template inline RefPtr& RefPtr::operator=(const RefPtr& o) 61 | { 62 | T* optr = o.get(); 63 | if (optr) 64 | optr->ref(); 65 | T* ptr = m_ptr; 66 | m_ptr = optr; 67 | if (ptr) 68 | ptr->deref(); 69 | return *this; 70 | } 71 | 72 | template inline RefPtr& RefPtr::operator=(T* optr) 73 | { 74 | if (optr) 75 | optr->ref(); 76 | T* ptr = m_ptr; 77 | m_ptr = optr; 78 | if (ptr) 79 | ptr->deref(); 80 | return *this; 81 | } 82 | 83 | template inline RefPtr& RefPtr::operator=(const PassRefPtr& o) 84 | { 85 | T* ptr = m_ptr; 86 | m_ptr = o.release(); 87 | if (ptr) 88 | ptr->deref(); 89 | return *this; 90 | } 91 | 92 | template template inline RefPtr& RefPtr::operator=(const PassRefPtr& o) 93 | { 94 | T* ptr = m_ptr; 95 | m_ptr = o.release(); 96 | if (ptr) 97 | ptr->deref(); 98 | return *this; 99 | } 100 | 101 | template inline void RefPtr::swap(RefPtr& o) 102 | { 103 | std::swap(m_ptr, o.m_ptr); 104 | } 105 | 106 | template inline void swap(RefPtr& a, RefPtr& b) 107 | { 108 | a.swap(b); 109 | } 110 | 111 | template inline bool operator==(const RefPtr& a, const RefPtr& b) 112 | { 113 | return a.get() == b.get(); 114 | } 115 | 116 | template inline bool operator==(const RefPtr& a, U* b) 117 | { 118 | return a.get() == b; 119 | } 120 | 121 | template inline bool operator==(T* a, const RefPtr& b) 122 | { 123 | return a == b.get(); 124 | } 125 | 126 | template inline bool operator!=(const RefPtr& a, const RefPtr& b) 127 | { 128 | return a.get() != b.get(); 129 | } 130 | 131 | template inline bool operator!=(const RefPtr& a, U* b) 132 | { 133 | return a.get() != b; 134 | } 135 | 136 | template inline bool operator!=(T* a, const RefPtr& b) 137 | { 138 | return a != b.get(); 139 | } 140 | 141 | template inline RefPtr static_pointer_cast(const RefPtr& p) 142 | { 143 | return RefPtr(static_cast(p.get())); 144 | } 145 | 146 | template inline RefPtr const_pointer_cast(const RefPtr& p) 147 | { 148 | return RefPtr(const_cast(p.get())); 149 | } 150 | 151 | template inline T* getPtr(const RefPtr& p) 152 | { 153 | return p.get(); 154 | } 155 | 156 | } // namespace WTF 157 | 158 | using WTF::RefPtr; 159 | using WTF::static_pointer_cast; 160 | using WTF::const_pointer_cast; -------------------------------------------------------------------------------- /WebCoreSupport/RenderText.h: -------------------------------------------------------------------------------- 1 | #import "StringImpl.h" 2 | 3 | using namespace WTF; 4 | 5 | namespace WebCore { 6 | 7 | class InlineTextBox; 8 | 9 | class RenderText { 10 | private: 11 | String m_text; 12 | public: 13 | StringImpl* text() const { return m_text.impl(); } 14 | }; 15 | } // namespace WebCore -------------------------------------------------------------------------------- /WebCoreSupport/StringImpl.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #import "RefPtr.h" 5 | #import "RefCounted.h" 6 | 7 | typedef unsigned char LChar; 8 | 9 | namespace WTF { 10 | class StringImplBase { 11 | public: 12 | bool isStringImpl() { return (m_refCountAndFlags & s_refCountInvalidForStringImpl) != s_refCountInvalidForStringImpl; } 13 | unsigned length() const { return m_length; } 14 | void ref() { m_refCountAndFlags += s_refCountIncrement; } 15 | protected: 16 | enum BufferOwnership { 17 | BufferInternal, 18 | BufferOwned, 19 | BufferSubstring, 20 | BufferShared, 21 | }; 22 | 23 | StringImplBase() { } 24 | 25 | static const unsigned s_refCountMask = 0xFFFFFF80; 26 | static const unsigned s_refCountIncrement = 0x80; 27 | static const unsigned s_refCountFlagStatic = 0x40; 28 | static const unsigned s_refCountFlagHasTerminatingNullCharacter = 0x20; 29 | static const unsigned s_refCountFlagIsAtomic = 0x10; 30 | static const unsigned s_refCountFlagShouldReportedCost = 0x8; 31 | static const unsigned s_refCountFlagIsIdentifier = 0x4; 32 | static const unsigned s_refCountMaskBufferOwnership = 0x3; 33 | static const unsigned s_refCountInvalidForStringImpl = s_refCountFlagStatic | s_refCountFlagShouldReportedCost; 34 | 35 | unsigned m_refCountAndFlags; 36 | unsigned m_length; 37 | }; 38 | }; 39 | 40 | namespace WTF { 41 | class StringImpl : public StringImplBase { 42 | public: 43 | CFStringRef createCFString(); 44 | UChar operator[](unsigned i) { return m_data[i]; } 45 | //operator NSString*(); 46 | const UChar* characters() const { return m_data; } 47 | bool is8Bit() const { return m_hashAndFlags & s_hashFlag8BitBuffer; } 48 | const LChar* characters8() const { return m_data8; } 49 | const UChar* characters16() const { return m_data16; } 50 | void deref() { m_refCountAndFlags -= s_refCountIncrement; if (!(m_refCountAndFlags & (s_refCountMask | s_refCountFlagStatic))) delete this; } 51 | unsigned length() const { return m_length; } 52 | private: 53 | static const unsigned s_hashFlag8BitBuffer = 1u << 3; 54 | const UChar* m_data; 55 | unsigned m_refCount; 56 | unsigned m_length; 57 | union { 58 | const LChar* m_data8; 59 | const UChar* m_data16; 60 | }; 61 | mutable unsigned m_hashAndFlags; 62 | }; 63 | 64 | bool equal(const StringImpl*, const StringImpl*); 65 | bool equal(const StringImpl*, const char*); 66 | inline bool equal(const char* a, StringImpl* b) { return equal(b, a); } 67 | 68 | bool equalIgnoringCase(StringImpl*, StringImpl*); 69 | bool equalIgnoringCase(StringImpl*, const char*); 70 | inline bool equalIgnoringCase(const char* a, StringImpl* b) { return equalIgnoringCase(b, a); } 71 | bool equalIgnoringCase(const UChar* a, const char* b, unsigned length); 72 | inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, a, length); } 73 | 74 | int codePointCompare(const StringImpl*, const StringImpl*); 75 | }; 76 | 77 | template 78 | bool isPointerTypeAlignmentOkay(Type* ptr) 79 | { 80 | return !(reinterpret_cast(ptr) % __alignof__(Type)); 81 | } 82 | 83 | template 84 | TypePtr reinterpret_cast_ptr(void* ptr) 85 | { 86 | return reinterpret_cast(ptr); 87 | } 88 | 89 | template 90 | TypePtr reinterpret_cast_ptr(const void* ptr) 91 | { 92 | return reinterpret_cast(ptr); 93 | } 94 | 95 | namespace WTF { 96 | class CStringBuffer : public RefCounted { 97 | public: 98 | const char* data() { return mutableData(); } 99 | size_t length() const { return m_length; } 100 | private: 101 | friend class CString; 102 | CStringBuffer(size_t length) : m_length(length) { } 103 | char* mutableData() { return reinterpret_cast_ptr(this + 1); } 104 | const size_t m_length; 105 | }; 106 | class CString { 107 | public: 108 | CString(CStringBuffer* buffer) : m_buffer(buffer) { } 109 | const char *data() { return m_buffer ? m_buffer->data() : 0; } 110 | private: 111 | RefPtr m_buffer; 112 | }; 113 | }; 114 | 115 | namespace WTF { 116 | 117 | typedef enum { 118 | LenientConversion, 119 | StrictConversion, 120 | StrictConversionReplacingUnpairedSurrogatesWithFFFD, 121 | } ConversionMode; 122 | 123 | class String { 124 | public: 125 | String() { } 126 | String(StringImpl* impl) : m_impl(impl) { } 127 | String(RefPtr impl) : m_impl(impl) { } 128 | void swap(String& o) { m_impl.swap(o.m_impl); } 129 | bool isEmpty() const { return !m_impl || !m_impl->length(); } 130 | bool isNull() const { return !m_impl; } 131 | bool is8Bit() const { return m_impl->is8Bit(); } 132 | StringImpl* impl() const { return m_impl.get(); } 133 | unsigned length() const { 134 | if (!m_impl) 135 | return 0; 136 | return m_impl->length(); 137 | } 138 | const LChar* characters8() const { 139 | if (!m_impl) 140 | return 0; 141 | return m_impl->characters8(); 142 | } 143 | const UChar* characters16() const { 144 | if (!m_impl) 145 | return 0; 146 | return m_impl->characters16(); 147 | } 148 | const UChar* characters() const { 149 | if (!m_impl) 150 | return 0; 151 | return m_impl->characters(); 152 | } 153 | UChar operator[](unsigned index) const { 154 | if (!m_impl || index >= m_impl->length()) 155 | return 0; 156 | return m_impl->characters()[index]; 157 | } 158 | static String number(short); 159 | static String number(unsigned short); 160 | static String number(int); 161 | static String number(unsigned); 162 | static String number(long); 163 | static String number(unsigned long); 164 | static String number(long long); 165 | static String number(unsigned long long); 166 | static String number(double); 167 | 168 | void append(const String&); 169 | void append(char); 170 | void append(UChar); 171 | void append(const UChar*, unsigned length); 172 | void insert(const String&, unsigned pos); 173 | void insert(const UChar*, unsigned length, unsigned pos); 174 | 175 | CString utf8(ConversionMode = LenientConversion) const; 176 | private: 177 | RefPtr m_impl; 178 | }; 179 | }; 180 | 181 | namespace WTF { 182 | class AtomicString { 183 | public: 184 | bool isEmpty() const { return m_string.isEmpty(); }; 185 | const String& string() const { return m_string; }; 186 | private: 187 | String m_string; 188 | }; 189 | }; 190 | 191 | namespace WTF { 192 | class StringBuilder { 193 | public: 194 | void append(const UChar*, unsigned); 195 | void append(const LChar*, unsigned); 196 | void append(const char* characters, unsigned length) { append(reinterpret_cast(characters), length); } 197 | void append(const char* characters) { 198 | if (characters) 199 | append(characters, strlen(characters)); 200 | } 201 | String toString() { 202 | shrinkToFit(); 203 | if (m_string.isNull()) 204 | reifyString(); 205 | return m_string; 206 | } 207 | private: 208 | void shrinkToFit(); 209 | void reifyString() const; 210 | mutable String m_string; 211 | unsigned m_length; 212 | }; 213 | }; 214 | 215 | namespace WebCore { 216 | class TextBreakIterator; 217 | }; 218 | 219 | namespace WTF { 220 | class StringView { 221 | public: 222 | StringView(); 223 | StringView(const String&); 224 | StringView(const StringImpl&); 225 | StringView(const StringImpl*); 226 | class UpconvertedCharacters; 227 | const UChar* characters16() const; 228 | bool is8Bit() const; 229 | UpconvertedCharacters upconvertedCharacters() const; 230 | private: 231 | const void *m_characters { nullptr }; 232 | unsigned m_length { 0 }; 233 | }; 234 | static const unsigned is16BitStringFlag = 1u << 31; 235 | inline bool StringView::is8Bit() const { 236 | return !(m_length & is16BitStringFlag); 237 | } 238 | class StringView::UpconvertedCharacters { 239 | public: 240 | explicit UpconvertedCharacters(const StringView&); 241 | operator const UChar*() const { return m_characters; }; 242 | const UChar* get() const { return m_characters; }; 243 | private: 244 | //Vector m_upconvertedCharacters; 245 | const UChar* m_characters; 246 | }; 247 | inline const UChar* StringView::characters16() const { 248 | return static_cast(m_characters); 249 | } 250 | inline StringView::UpconvertedCharacters StringView::upconvertedCharacters() const { 251 | return UpconvertedCharacters(*this); 252 | } 253 | inline StringView::UpconvertedCharacters::UpconvertedCharacters(const StringView& string) { 254 | if (!string.is8Bit()) { 255 | m_characters = string.characters16(); 256 | return; 257 | } 258 | } 259 | inline StringView::StringView(const String& string) { 260 | if (!string.impl()) { 261 | m_characters = nullptr; 262 | m_length = 0; 263 | return; 264 | } 265 | if (string.is8Bit()) { 266 | return; 267 | } 268 | } 269 | }; -------------------------------------------------------------------------------- /WebCoreSupport/UAX.h: -------------------------------------------------------------------------------- 1 | #define ADDITIONAL_EMOJI_SUPPORT 1 2 | 3 | enum LineBreakIteratorMode { 4 | LineBreakIteratorModeUAX14, 5 | LineBreakIteratorModeUAX14Loose, 6 | LineBreakIteratorModeUAX14Normal, 7 | LineBreakIteratorModeUAX14Strict, 8 | }; 9 | 10 | static const char* uax14Prologue = 11 | "!!chain;" 12 | "!!LBCMNoChain;" 13 | "!!lookAheadHardBreak;"; 14 | 15 | static const char* uax14AssignmentsBefore = 16 | // explicitly enumerate $CJ since ICU versions prior to 49 don't support :LineBreak=Conditional_Japanese_Starter: 17 | "$CJ = [" 18 | #if (U_ICU_VERSION_MAJOR_NUM >= 4) && (U_ICU_VERSION_MINOR_NUM >= 9) 19 | ":LineBreak=Conditional_Japanese_Starter:" 20 | #else 21 | "\\u3041\\u3043\\u3045\\u3047\\u3049\\u3063\\u3083\\u3085\\u3087\\u308E\\u3095\\u3096\\u30A1\\u30A3\\u30A5\\u30A7" 22 | "\\u30A9\\u30C3\\u30E3\\u30E5\\u30E7\\u30EE\\u30F5\\u30F6\\u30FC" 23 | "\\u31F0\\u31F1\\u31F2\\u31F3\\u31F4\\u31F5\\u31F6\\u31F7\\u31F8\\u31F9\\u31FA\\u31FB\\u31FC\\u31FD\\u31FE\\u31FF" 24 | "\\uFF67\\uFF68\\uFF69\\uFF6A\\uFF6B\\uFF6C\\uFF6D\\uFF6E\\uFF6F\\uFF70" 25 | #endif 26 | "];"; 27 | 28 | static const char* uax14AssignmentsCustomLooseCJK = 29 | "$BA_SUB = [\\u2010\\u2013];" 30 | "$EX_SUB = [\\u0021\\u003F\\uFF01\\uFF1F];" 31 | "$ID_SUB = '';" 32 | "$IN_SUB = [\\u2025\\u2026];" 33 | "$IS_SUB = [\\u003A\\u003B];" 34 | "$NS_SUB = [\\u203C\\u2047\\u2048\\u2049\\u3005\\u301C\\u303B\\u309D\\u309E\\u30A0\\u30FB\\u30FD\\u30FE\\uFF1A\\uFF1B\\uFF65];" 35 | "$PO_SUB = [\\u0025\\u00A2\\u00B0\\u2030\\u2032\\u2033\\u2103\\uFF05\\uFFE0];" 36 | "$PR_SUB = [\\u0024\\u00A3\\u00A5\\u20AC\\u2116\\uFF04\\uFFE1\\uFFE5];" 37 | "$ID_ADD = [$CJ $BA_SUB $EX_SUB $IN_SUB $IS_SUB $NS_SUB $PO_SUB $PR_SUB];" 38 | "$NS_ADD = '';"; 39 | 40 | static const char* uax14AssignmentsCustomLooseNonCJK = 41 | "$BA_SUB = '';" 42 | "$EX_SUB = '';" 43 | "$ID_SUB = '';" 44 | "$IN_SUB = [\\u2025\\u2026];" 45 | "$IS_SUB = '';" 46 | "$NS_SUB = [\\u3005\\u303B\\u309D\\u309E\\u30FD\\u30FE];" 47 | "$PO_SUB = '';" 48 | "$PR_SUB = '';" 49 | "$ID_ADD = [$CJ $IN_SUB $NS_SUB];" 50 | "$NS_ADD = '';"; 51 | 52 | static const char* uax14AssignmentsCustomNormalCJK = 53 | "$BA_SUB = [\\u2010\\u2013];" 54 | "$EX_SUB = '';" 55 | "$IN_SUB = '';" 56 | "$ID_SUB = '';" 57 | "$IS_SUB = '';" 58 | "$NS_SUB = [\\u301C\\u30A0];" 59 | "$PO_SUB = '';" 60 | "$PR_SUB = '';" 61 | "$ID_ADD = [$CJ $BA_SUB $NS_SUB];" 62 | "$NS_ADD = '';"; 63 | 64 | static const char* uax14AssignmentsCustomNormalNonCJK = 65 | "$BA_SUB = '';" 66 | "$EX_SUB = '';" 67 | "$ID_SUB = '';" 68 | "$IN_SUB = '';" 69 | "$IS_SUB = '';" 70 | "$NS_SUB = '';" 71 | "$PO_SUB = '';" 72 | "$PR_SUB = '';" 73 | "$ID_ADD = [$CJ];" 74 | "$NS_ADD = '';"; 75 | 76 | static const char* uax14AssignmentsCustomStrictCJK = 77 | "$BA_SUB = '';" 78 | "$EX_SUB = '';" 79 | "$ID_SUB = '';" 80 | "$IN_SUB = '';" 81 | "$IS_SUB = '';" 82 | "$NS_SUB = '';" 83 | "$PO_SUB = '';" 84 | "$PR_SUB = '';" 85 | "$ID_ADD = '';" 86 | "$NS_ADD = [$CJ];"; 87 | 88 | #define uax14AssignmentsCustomStrictNonCJK uax14AssignmentsCustomStrictCJK 89 | #define uax14AssignmentsCustomDefaultCJK uax14AssignmentsCustomNormalCJK 90 | #define uax14AssignmentsCustomDefaultNonCJK uax14AssignmentsCustomStrictNonCJK 91 | 92 | static const char* uax14AssignmentsAfter = 93 | "$AI = [:LineBreak = Ambiguous:];" 94 | "$AL = [:LineBreak = Alphabetic:];" 95 | "$BA = [[:LineBreak = Break_After:] - $BA_SUB];" 96 | "$BB = [:LineBreak = Break_Before:];" 97 | "$BK = [:LineBreak = Mandatory_Break:];" 98 | "$B2 = [:LineBreak = Break_Both:];" 99 | "$CB = [:LineBreak = Contingent_Break:];" 100 | "$CL = [:LineBreak = Close_Punctuation:];" 101 | "$CM = [:LineBreak = Combining_Mark:];" 102 | "$CP = [:LineBreak = Close_Parenthesis:];" 103 | "$CR = [:LineBreak = Carriage_Return:];" 104 | "$EX = [[:LineBreak = Exclamation:] - $EX_SUB];" 105 | "$GL = [:LineBreak = Glue:];" 106 | #if (U_ICU_VERSION_MAJOR_NUM >= 4) && (U_ICU_VERSION_MINOR_NUM >= 9) 107 | "$HL = [:LineBreak = Hebrew_Letter:];" 108 | #else 109 | "$HL = [[:Hebrew:] & [:Letter:]];" 110 | #endif 111 | "$HY = [:LineBreak = Hyphen:];" 112 | "$H2 = [:LineBreak = H2:];" 113 | "$H3 = [:LineBreak = H3:];" 114 | "$ID = [[[[:LineBreak = Ideographic:] - $CJ] $ID_ADD] - $ID_SUB];" 115 | "$IN = [[:LineBreak = Inseparable:] - $IN_SUB];" 116 | "$IS = [[:LineBreak = Infix_Numeric:] - $IS_SUB];" 117 | "$JL = [:LineBreak = JL:];" 118 | "$JV = [:LineBreak = JV:];" 119 | "$JT = [:LineBreak = JT:];" 120 | "$LF = [:LineBreak = Line_Feed:];" 121 | "$NL = [:LineBreak = Next_Line:];" 122 | "$NS = [[[[:LineBreak = Nonstarter:] - $CJ] $NS_ADD] - $NS_SUB];" 123 | "$NU = [:LineBreak = Numeric:];" 124 | "$OP = [:LineBreak = Open_Punctuation:];" 125 | "$PO = [[:LineBreak = Postfix_Numeric:] - $PO_SUB];" 126 | "$PR = [[:LineBreak = Prefix_Numeric:] - $PR_SUB];" 127 | "$QU = [:LineBreak = Quotation:];" 128 | "$RI = [\\U0001F1E6-\\U0001F1FF];" 129 | "$SA = [:LineBreak = Complex_Context:];" 130 | "$SG = [:LineBreak = Surrogate:];" 131 | "$SP = [:LineBreak = Space:];" 132 | "$SY = [:LineBreak = Break_Symbols:];" 133 | "$WJ = [:LineBreak = Word_Joiner:];" 134 | "$XX = [:LineBreak = Unknown:];" 135 | "$ZW = [:LineBreak = ZWSpace:];" 136 | "$ZWJ = \\u200D;" 137 | "$EmojiVar = \\uFE0F;" 138 | #if ADDITIONAL_EMOJI_SUPPORT 139 | "$EmojiForSeqs = [\\u2764 \\U0001F441 \\U0001F466-\\U0001F469 \\U0001F48B \\U0001F5E8];" 140 | "$EmojiForMods = [\\u261D \\u26F9 \\u270A-\\u270D \\U0001F385 \\U0001F3C3-\\U0001F3C4 \\U0001F3CA \\U0001F3CB \\U0001F442-\\U0001F443 \\U0001F446-\\U0001F450 \\U0001F466-\\U0001F469 \\U0001F46E-\\U0001F478 \\U0001F47C \\U0001F481-\\U0001F483 \\U0001F485-\\U0001F487 \\U0001F4AA \\U0001F575 \\U0001F590 \\U0001F595 \\U0001F596 \\U0001F645-\\U0001F647 \\U0001F64B-\\U0001F64F \\U0001F6A3 \\U0001F6B4-\\U0001F6B6 \\U0001F6C0 \\U0001F918] ;" // Emoji that take Fitzpatrick modifiers 141 | #else 142 | "$EmojiForSeqs = [\\u2764 \\U0001F466-\\U0001F469 \\U0001F48B];" 143 | "$EmojiForMods = [\\u261D \\u270A-\\u270C \\U0001F385 \\U0001F3C3-\\U0001F3C4 \\U0001F3C7 \\U0001F3CA \\U0001F442-\\U0001F443 \\U0001F446-\\U0001F450 \\U0001F466-\\U0001F469 \\U0001F46E-\\U0001F478 \\U0001F47C \\U0001F481-\\U0001F483 \\U0001F485-\\U0001F487 \\U0001F4AA \\U0001F596 \\U0001F645-\\U0001F647 \\U0001F64B-\\U0001F64F \\U0001F6A3 \\U0001F6B4-\\U0001F6B6 \\U0001F6C0] ;" // Emoji that take Fitzpatrick modifiers 144 | #endif 145 | "$EmojiMods = [\\U0001F3FB-\\U0001F3FF];" 146 | "$dictionary = [:LineBreak = Complex_Context:];" 147 | "$ALPlus = [$AL $AI $SA $SG $XX];" 148 | "$ALcm = $ALPlus $CM*;" 149 | "$BAcm = $BA $CM*;" 150 | "$BBcm = $BB $CM*;" 151 | "$B2cm = $B2 $CM*;" 152 | "$CLcm = $CL $CM*;" 153 | "$CPcm = $CP $CM*;" 154 | "$EXcm = $EX $CM*;" 155 | "$GLcm = $GL $CM*;" 156 | "$HLcm = $HL $CM*;" 157 | "$HYcm = $HY $CM*;" 158 | "$H2cm = $H2 $CM*;" 159 | "$H3cm = $H3 $CM*;" 160 | "$IDcm = $ID $CM*;" 161 | "$INcm = $IN $CM*;" 162 | "$IScm = $IS $CM*;" 163 | "$JLcm = $JL $CM*;" 164 | "$JVcm = $JV $CM*;" 165 | "$JTcm = $JT $CM*;" 166 | "$NScm = $NS $CM*;" 167 | "$NUcm = $NU $CM*;" 168 | "$OPcm = $OP $CM*;" 169 | "$POcm = $PO $CM*;" 170 | "$PRcm = $PR $CM*;" 171 | "$QUcm = $QU $CM*;" 172 | "$RIcm = $RI $CM*;" 173 | "$SYcm = $SY $CM*;" 174 | "$WJcm = $WJ $CM*;"; 175 | 176 | static const char* uax14Forward = 177 | "!!forward;" 178 | "$CAN_CM = [^$SP $BK $CR $LF $NL $ZW $CM];" 179 | "$CANT_CM = [$SP $BK $CR $LF $NL $ZW $CM];" 180 | "$AL_FOLLOW_NOCM = [$BK $CR $LF $NL $ZW $SP];" 181 | "$AL_FOLLOW_CM = [$CL $CP $EX $HL $IS $SY $WJ $GL $OP $QU $BA $HY $NS $IN $NU $ALPlus];" 182 | "$AL_FOLLOW = [$AL_FOLLOW_NOCM $AL_FOLLOW_CM];" 183 | "$LB4Breaks = [$BK $CR $LF $NL];" 184 | "$LB4NonBreaks = [^$BK $CR $LF $NL];" 185 | "$LB8Breaks = [$LB4Breaks $ZW];" 186 | "$LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]];" 187 | "$LB18NonBreaks = [$LB8NonBreaks - [$SP]];" 188 | "$LB18Breaks = [$LB8Breaks $SP];" 189 | "$LB20NonBreaks = [$LB18NonBreaks - $CB];" 190 | "$ALPlus $CM+;" 191 | "$BA $CM+;" 192 | "$BB $CM+;" 193 | "$B2 $CM+;" 194 | "$CL $CM+;" 195 | "$CP $CM+;" 196 | "$EX $CM+;" 197 | "$GL $CM+;" 198 | "$HL $CM+;" 199 | "$HY $CM+;" 200 | "$H2 $CM+;" 201 | "$H3 $CM+;" 202 | "$ID $CM+;" 203 | "$IN $CM+;" 204 | "$IS $CM+;" 205 | "$JL $CM+;" 206 | "$JV $CM+;" 207 | "$JT $CM+;" 208 | "$NS $CM+;" 209 | "$NU $CM+;" 210 | "$OP $CM+;" 211 | "$PO $CM+;" 212 | "$PR $CM+;" 213 | "$QU $CM+;" 214 | "$RI $CM+;" 215 | "$SY $CM+;" 216 | "$WJ $CM+;" 217 | "$CR $LF {100};" 218 | "$LB4NonBreaks? $LB4Breaks {100};" 219 | "$CAN_CM $CM* $LB4Breaks {100};" 220 | "$CM+ $LB4Breaks {100};" 221 | "$LB4NonBreaks [$SP $ZW];" 222 | "$CAN_CM $CM* [$SP $ZW];" 223 | "$CM+ [$SP $ZW];" 224 | "$EmojiForSeqs $EmojiVar? $EmojiMods? $ZWJ $EmojiForSeqs;" 225 | "$CAN_CM $CM+;" 226 | "$CM+;" 227 | "$CAN_CM $CM* $WJcm;" 228 | "$LB8NonBreaks $WJcm;" 229 | "$CM+ $WJcm;" 230 | "$WJcm $CANT_CM;" 231 | "$WJcm $CAN_CM $CM*;" 232 | "$GLcm $CAN_CM $CM*;" 233 | "$GLcm $CANT_CM;" 234 | "[[$LB8NonBreaks] - [$SP $BA $HY]] $CM* $GLcm;" 235 | "$CM+ GLcm;" 236 | "$LB8NonBreaks $CL;" 237 | "$CAN_CM $CM* $CL;" 238 | "$CM+ $CL;" 239 | "$LB8NonBreaks $CP;" 240 | "$CAN_CM $CM* $CP;" 241 | "$CM+ $CP;" 242 | "$LB8NonBreaks $EX;" 243 | "$CAN_CM $CM* $EX;" 244 | "$CM+ $EX;" 245 | "$LB8NonBreaks $IS;" 246 | "$CAN_CM $CM* $IS;" 247 | "$CM+ $IS;" 248 | "$LB8NonBreaks $SY;" 249 | "$CAN_CM $CM* $SY;" 250 | "$CM+ $SY;" 251 | "$OPcm $SP* $CAN_CM $CM*;" 252 | "$OPcm $SP* $CANT_CM;" 253 | "$OPcm $SP+ $CM+ $AL_FOLLOW?;" 254 | "$QUcm $SP* $OPcm;" 255 | "($CLcm | $CPcm) $SP* $NScm;" 256 | "$B2cm $SP* $B2cm;" 257 | "$LB18NonBreaks $CM* $QUcm;" 258 | "$CM+ $QUcm;" 259 | "$QUcm .?;" 260 | "$QUcm $LB18NonBreaks $CM*;" 261 | "$LB20NonBreaks $CM* ($BAcm | $HYcm | $NScm); " 262 | "$BBcm [^$CB];" 263 | "$BBcm $LB20NonBreaks $CM*;" 264 | "$HLcm ($HYcm | $BAcm) [^$CB]?;" 265 | "$SYcm $HLcm;" 266 | "($ALcm | $HLcm) $INcm;" 267 | "$CM+ $INcm;" 268 | "$EXcm $INcm;" 269 | "$IDcm $INcm;" 270 | "$INcm $INcm;" 271 | "$NUcm $INcm;" 272 | "$IDcm $POcm;" 273 | "$ALcm $NUcm;" 274 | "$HLcm $NUcm;" 275 | "$CM+ $NUcm;" 276 | "$NUcm $ALcm;" 277 | "$NUcm $HLcm;" 278 | "$PRcm $IDcm;" 279 | "$PRcm ($ALcm | $HLcm);" 280 | "$POcm ($ALcm | $HLcm);" 281 | "($PRcm | $POcm)? ($OPcm | $HYcm)? $NUcm ($NUcm | $SYcm | $IScm)* ($CLcm | $CPcm)? ($PRcm | $POcm)?;" 282 | "$JLcm ($JLcm | $JVcm | $H2cm | $H3cm);" 283 | "($JVcm | $H2cm) ($JVcm | $JTcm);" 284 | "($JTcm | $H3cm) $JTcm;" 285 | "($JLcm | $JVcm | $JTcm | $H2cm | $H3cm) $INcm;" 286 | "($JLcm | $JVcm | $JTcm | $H2cm | $H3cm) $POcm;" 287 | "$PRcm ($JLcm | $JVcm | $JTcm | $H2cm | $H3cm);" 288 | "($ALcm | $HLcm) ($ALcm | $HLcm);" 289 | "$CM+ ($ALcm | $HLcm);" 290 | "$IScm ($ALcm | $HLcm);" 291 | "($ALcm | $HLcm | $NUcm) $OPcm;" 292 | "$CM+ $OPcm;" 293 | "$CPcm ($ALcm | $HLcm | $NUcm);" 294 | #if ADDITIONAL_EMOJI_SUPPORT 295 | "$RIcm $RIcm;" 296 | #endif 297 | "$EmojiForMods $EmojiVar? $EmojiMods;"; 298 | 299 | static const char* uax14Reverse = 300 | "!!reverse;" 301 | "$CM+ $ALPlus;" 302 | "$CM+ $BA;" 303 | "$CM+ $BB;" 304 | "$CM+ $B2;" 305 | "$CM+ $CL;" 306 | "$CM+ $CP;" 307 | "$CM+ $EX;" 308 | "$CM+ $GL;" 309 | "$CM+ $HL;" 310 | "$CM+ $HY;" 311 | "$CM+ $H2;" 312 | "$CM+ $H3;" 313 | "$CM+ $ID;" 314 | "$CM+ $IN;" 315 | "$CM+ $IS;" 316 | "$CM+ $JL;" 317 | "$CM+ $JV;" 318 | "$CM+ $JT;" 319 | "$CM+ $NS;" 320 | "$CM+ $NU;" 321 | "$CM+ $OP;" 322 | "$CM+ $PO;" 323 | "$CM+ $PR;" 324 | "$CM+ $QU;" 325 | #if ADDITIONAL_EMOJI_SUPPORT 326 | "$CM+ $RI;" 327 | #endif 328 | "$CM+ $SY;" 329 | "$CM+ $WJ;" 330 | "$CM+;" 331 | "$AL_FOLLOW $CM+ / ([$BK $CR $LF $NL $ZW {eof}] | $SP+ $CM+ $SP | $SP+ $CM* ([^$OP $CM $SP] | [$AL {eof}]));" 332 | "[$PR] / $CM+ [$BK $CR $LF $NL $ZW $SP {eof}];" 333 | "$LB4Breaks [$LB4NonBreaks-$CM];" 334 | "$LB4Breaks $CM+ $CAN_CM;" 335 | "$LF $CR;" 336 | "[$SP $ZW] [$LB4NonBreaks-$CM];" 337 | "[$SP $ZW] $CM+ $CAN_CM;" 338 | "$EmojiForSeqs $ZWJ $EmojiMods? $EmojiVar? $EmojiForSeqs;" 339 | "$CM+ $CAN_CM;" 340 | "$CM* $WJ $CM* $CAN_CM;" 341 | "$CM* $WJ [$LB8NonBreaks-$CM];" 342 | "$CANT_CM $CM* $WJ;" 343 | "$CM* $CAN_CM $CM* $WJ;" 344 | "$CM* $GL $CM* [$LB8NonBreaks-[$CM $SP $BA $HY]];" 345 | "$CANT_CM $CM* $GL;" 346 | "$CM* $CAN_CM $CM* $GL;" 347 | "$CL $CM+ $CAN_CM;" 348 | "$CP $CM+ $CAN_CM;" 349 | "$EX $CM+ $CAN_CM;" 350 | "$IS $CM+ $CAN_CM;" 351 | "$SY $CM+ $CAN_CM;" 352 | "$CL [$LB8NonBreaks-$CM];" 353 | "$CP [$LB8NonBreaks-$CM];" 354 | "$EX [$LB8NonBreaks-$CM];" 355 | "$IS [$LB8NonBreaks-$CM];" 356 | "$SY [$LB8NonBreaks-$CM];" 357 | "[$CL $CP $EX $IS $SY] $CM+ $SP+ $CM* $OP; " 358 | "$CM* $CAN_CM $SP* $CM* $OP;" 359 | "$CANT_CM $SP* $CM* $OP;" 360 | "$AL_FOLLOW? $CM+ $SP $SP* $CM* $OP;" 361 | "$AL_FOLLOW_NOCM $CM+ $SP+ $CM* $OP;" 362 | "$CM* $AL_FOLLOW_CM $CM+ $SP+ $CM* $OP;" 363 | "$SY $CM $SP+ $OP;" 364 | "$CM* $OP $SP* $CM* $QU;" 365 | "$CM* $NS $SP* $CM* ($CL | $CP);" 366 | "$CM* $B2 $SP* $CM* $B2;" 367 | "$CM* $QU $CM* $CAN_CM;" 368 | "$CM* $QU $LB18NonBreaks;" 369 | "$CM* $CAN_CM $CM* $QU;" 370 | "$CANT_CM $CM* $QU;" 371 | "$CM* ($BA | $HY | $NS) $CM* [$LB20NonBreaks-$CM];" 372 | "$CM* [$LB20NonBreaks-$CM] $CM* $BB;" 373 | "[^$CB] $CM* $BB;" 374 | "[^$CB] $CM* ($HY | $BA) $CM* $HL;" 375 | "$CM* $HL $CM* $SY;" 376 | "$CM* $IN $CM* ($ALPlus | $HL);" 377 | "$CM* $IN $CM* $EX;" 378 | "$CM* $IN $CM* $ID;" 379 | "$CM* $IN $CM* $IN;" 380 | "$CM* $IN $CM* $NU;" 381 | "$CM* $PO $CM* $ID;" 382 | "$CM* $NU $CM* ($ALPlus | $HL);" 383 | "$CM* ($ALPlus | $HL) $CM* $NU;" 384 | "$CM* $ID $CM* $PR;" 385 | "$CM* ($ALPlus | $HL) $CM* $PR;" 386 | "$CM* ($ALPlus | $HL) $CM* $PO;" 387 | "($CM* ($PR | $PO))? ($CM* ($CL | $CP))? ($CM* ($NU | $IS | $SY))* $CM* $NU ($CM* ($OP | $HY))? ($CM* ($PR | $PO))?;" 388 | "$CM* ($H3 | $H2 | $JV | $JL) $CM* $JL;" 389 | "$CM* ($JT | $JV) $CM* ($H2 | $JV);" 390 | "$CM* $JT $CM* ($H3 | $JT);" 391 | "$CM* $IN $CM* ($H3 | $H2 | $JT | $JV | $JL);" 392 | "$CM* $PO $CM* ($H3 | $H2 | $JT | $JV | $JL);" 393 | "$CM* ($H3 | $H2 | $JT | $JV | $JL) $CM* $PR;" 394 | "$CM* ($ALPlus | $HL) $CM* ($ALPlus | $HL);" 395 | "$CM* ($ALPlus | $HL) $CM* $IS;" 396 | "$CM* $OP $CM* ($ALPlus | $HL | $NU);" 397 | "$CM* ($ALPlus | $HL | $NU) $CM* $CP;" 398 | #if ADDITIONAL_EMOJI_SUPPORT 399 | "$CM* $RI $CM* $RI;" 400 | #endif 401 | "$EmojiMods $EmojiVar? $EmojiForMods;"; 402 | 403 | static const char* uax14SafeForward = 404 | "!!safe_forward;" 405 | "[$CM $OP $QU $CL $CP $B2 $PR $HY $BA $SP $dictionary]+ [^$CM $OP $QU $CL $CP $B2 $PR $HY $BA $dictionary];" 406 | "$dictionary $dictionary;"; 407 | 408 | static const char* uax14SafeReverse = 409 | "!!safe_reverse;" 410 | "$CM+ [^$CM $BK $CR $LF $NL $ZW $SP];" 411 | "$CM+ $SP / .;" 412 | "$SP+ $CM* $OP;" 413 | "$SP+ $CM* $QU;" 414 | "$SP+ $CM* ($CL | $CP);" 415 | "$SP+ $CM* $B2;" 416 | "$CM* ($HY | $BA) $CM* $HL;" 417 | "($CM* ($IS | $SY))+ $CM* $NU;" 418 | "($CL | $CP) $CM* ($NU | $IS | $SY);" 419 | "$dictionary $dictionary;"; 420 | -------------------------------------------------------------------------------- /control: -------------------------------------------------------------------------------- 1 | Package: com.ps.emojiattributes 2 | Name: EmojiAttributes 3 | Depends: firmware (>= 5.1), mobilesubstrate, com.opa334.libundirect (>= 1.1.6) 4 | Conflicts: com.ps.flexmoji 5 | Version: 1.0.0 6 | Architecture: iphoneos-arm 7 | Description: Various under-the-hood fixes for emoji display. 8 | Maintainer: PoomSmart 9 | Author: PoomSmart 10 | Section: Tweaks 11 | Depiction: https://poomsmart.github.io/repo/depictions/emojiattributes.html 12 | SileoDepiction: https://poomsmart.github.io/repo/sileodepictions/emojiattributes.json 13 | -------------------------------------------------------------------------------- /copyResources.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Unnecessary as of iOS 10 3 | 4 | if [ -z $1 ];then 5 | echo "Runtime version required" 6 | exit 1 7 | fi 8 | 9 | EA_RUNTIME_ROOT=/Library/Developer/CoreSimulator/Profiles/Runtimes/iOS\ ${1}.simruntime/Contents/Resources/RuntimeRoot 10 | EA_BITMAP_NAME=emoji.bitmap 11 | 12 | sudo cp -v "${PWD}/layout/System/Library/PrivateFrameworks/TextInput.framework/${EA_BITMAP_NAME}" "${EA_RUNTIME_ROOT}/System/Library/PrivateFrameworks/TextInput.framework/" 13 | -------------------------------------------------------------------------------- /emojiprops.h: -------------------------------------------------------------------------------- 1 | // © 2021 and later: Unicode, Inc. and others. 2 | // License & terms of use: https://www.unicode.org/copyright.html 3 | 4 | // emojiprops.h 5 | // created: 2021sep03 Markus W. Scherer 6 | 7 | #ifndef __EMOJIPROPS_H__ 8 | #define __EMOJIPROPS_H__ 9 | 10 | #include 11 | #include 12 | 13 | enum { 14 | // Byte offsets from the start of the data, after the generic header, 15 | // in ascending order. 16 | // UCPTrie=CodePointTrie, follows the indexes 17 | IX_CPTRIE_OFFSET, 18 | IX_RESERVED1, 19 | IX_RESERVED2, 20 | IX_RESERVED3, 21 | 22 | // UCharsTrie=CharsTrie 23 | IX_BASIC_EMOJI_TRIE_OFFSET, 24 | IX_EMOJI_KEYCAP_SEQUENCE_TRIE_OFFSET, 25 | IX_RGI_EMOJI_MODIFIER_SEQUENCE_TRIE_OFFSET, 26 | IX_RGI_EMOJI_FLAG_SEQUENCE_TRIE_OFFSET, 27 | IX_RGI_EMOJI_TAG_SEQUENCE_TRIE_OFFSET, 28 | IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET, 29 | IX_RESERVED10, 30 | IX_RESERVED11, 31 | IX_RESERVED12, 32 | IX_TOTAL_SIZE, 33 | 34 | // Not initially byte offsets. 35 | IX_RESERVED14, 36 | IX_RESERVED15, 37 | IX_COUNT // 16 38 | }; 39 | 40 | // Properties in the code point trie. 41 | enum { 42 | // https://www.unicode.org/reports/tr51/#Emoji_Properties 43 | BIT_EMOJI, 44 | BIT_EMOJI_PRESENTATION, 45 | BIT_EMOJI_MODIFIER, 46 | BIT_EMOJI_MODIFIER_BASE, 47 | BIT_EMOJI_COMPONENT, 48 | BIT_EXTENDED_PICTOGRAPHIC, 49 | // https://www.unicode.org/reports/tr51/#Emoji_Sets 50 | BIT_BASIC_EMOJI 51 | }; 52 | 53 | #endif // __EMOJIPROPS_H__ -------------------------------------------------------------------------------- /layout/DEBIAN/postinst: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | killall -9 kbd com.apple.WebKit.WebContent || true 4 | -------------------------------------------------------------------------------- /layout/DEBIAN/postrm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | killall -9 kbd com.apple.WebKit.WebContent || true 4 | -------------------------------------------------------------------------------- /layout/Library/Application Support/EmojiAttributes/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | English 7 | CFBundleExecutable 8 | EmojiAttributes 9 | CFBundleIdentifier 10 | com.ps.emojiattributes 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundlePackageType 14 | BNDL 15 | CFBundleShortVersionString 16 | 1.0.0 17 | CFBundleSignature 18 | ???? 19 | CFBundleVersion 20 | 1.0 21 | NSPrincipalClass 22 | EmojiAttributes 23 | 24 | 25 | -------------------------------------------------------------------------------- /layout/Library/Application Support/EmojiAttributes/emoji.bitmap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PoomSmart/EmojiAttributes/f2031a851ee0857d22db475b0429ebcb7cce23e1/layout/Library/Application Support/EmojiAttributes/emoji.bitmap -------------------------------------------------------------------------------- /layout/Library/Application Support/EmojiAttributes/uemoji.icu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PoomSmart/EmojiAttributes/f2031a851ee0857d22db475b0429ebcb7cce23e1/layout/Library/Application Support/EmojiAttributes/uemoji.icu -------------------------------------------------------------------------------- /unicode/cmemory.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define U_POINTER_MASK_LSB(ptr, mask) ((uintptr_t)(ptr) & (mask)) 4 | 5 | #define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size) -------------------------------------------------------------------------------- /unicode/putilimp.h: -------------------------------------------------------------------------------- 1 | // © 2016 and later: Unicode, Inc. and others. 2 | // License & terms of use: http://www.unicode.org/copyright.html 3 | /* 4 | ****************************************************************************** 5 | * 6 | * Copyright (C) 1997-2016, International Business Machines 7 | * Corporation and others. All Rights Reserved. 8 | * 9 | ****************************************************************************** 10 | * 11 | * FILE NAME : putilimp.h 12 | * 13 | * Date Name Description 14 | * 10/17/04 grhoten Move internal functions from putil.h to this file. 15 | ****************************************************************************** 16 | */ 17 | 18 | #ifndef PUTILIMP_H 19 | #define PUTILIMP_H 20 | 21 | #include 22 | #include 23 | 24 | /** 25 | * \def U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC 26 | * Nearly all CPUs and compilers implement a right-shift of a signed integer 27 | * as an Arithmetic Shift Right which copies the sign bit (the Most Significant Bit (MSB)) 28 | * into the vacated bits (sign extension). 29 | * For example, (int32_t)0xfff5fff3>>4 becomes 0xffff5fff and -1>>1=-1. 30 | * 31 | * This can be useful for storing a signed value in the upper bits 32 | * and another bit field in the lower bits. 33 | * The signed value can be retrieved by simple right-shifting. 34 | * 35 | * This is consistent with the Java language. 36 | * 37 | * However, the C standard allows compilers to implement a right-shift of a signed integer 38 | * as a Logical Shift Right which copies a 0 into the vacated bits. 39 | * For example, (int32_t)0xfff5fff3>>4 becomes 0x0fff5fff and -1>>1=0x7fffffff. 40 | * 41 | * Code that depends on the natural behavior should be guarded with this macro, 42 | * with an alternate path for unusual platforms. 43 | * @internal 44 | */ 45 | #ifdef U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC 46 | /* Use the predefined value. */ 47 | #else 48 | /* 49 | * Nearly all CPUs & compilers implement a right-shift of a signed integer 50 | * as an Arithmetic Shift Right (with sign extension). 51 | */ 52 | # define U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC 1 53 | #endif 54 | 55 | /** Define this to 1 if your platform supports IEEE 754 floating point, 56 | to 0 if it does not. */ 57 | #ifndef IEEE_754 58 | # define IEEE_754 1 59 | #endif 60 | 61 | /** 62 | * uintptr_t is an optional part of the standard definitions in stdint.h. 63 | * The opengroup.org documentation for stdint.h says 64 | * "On XSI-conformant systems, the intptr_t and uintptr_t types are required; 65 | * otherwise, they are optional." 66 | * We assume that when uintptr_t is defined, UINTPTR_MAX is defined as well. 67 | * 68 | * Do not use ptrdiff_t since it is signed. size_t is unsigned. 69 | */ 70 | /* TODO: This check fails on some z environments. Filed a ticket #9357 for this. */ 71 | #if !defined(__intptr_t_defined) && !defined(UINTPTR_MAX) && (U_PLATFORM != U_PF_OS390) 72 | typedef size_t uintptr_t; 73 | #endif 74 | 75 | /*===========================================================================*/ 76 | /** @{ Information about POSIX support */ 77 | /*===========================================================================*/ 78 | 79 | #ifdef U_HAVE_NL_LANGINFO_CODESET 80 | /* Use the predefined value. */ 81 | #elif U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_ANDROID || U_PLATFORM == U_PF_QNX 82 | # define U_HAVE_NL_LANGINFO_CODESET 0 83 | #else 84 | # define U_HAVE_NL_LANGINFO_CODESET 1 85 | #endif 86 | 87 | #ifdef U_NL_LANGINFO_CODESET 88 | /* Use the predefined value. */ 89 | #elif !U_HAVE_NL_LANGINFO_CODESET 90 | # define U_NL_LANGINFO_CODESET -1 91 | #elif U_PLATFORM == U_PF_OS400 92 | /* not defined */ 93 | #else 94 | # define U_NL_LANGINFO_CODESET CODESET 95 | #endif 96 | 97 | #if defined(U_TZSET) || defined(U_HAVE_TZSET) 98 | /* Use the predefined value. */ 99 | #elif U_PLATFORM_USES_ONLY_WIN32_API 100 | // UWP doesn't support tzset or environment variables for tz 101 | #if U_PLATFORM_HAS_WINUWP_API == 0 102 | # define U_TZSET _tzset 103 | #endif 104 | #elif U_PLATFORM == U_PF_OS400 105 | /* not defined */ 106 | #else 107 | # define U_TZSET tzset 108 | #endif 109 | 110 | #if defined(U_TIMEZONE) || defined(U_HAVE_TIMEZONE) 111 | /* Use the predefined value. */ 112 | #elif U_PLATFORM == U_PF_ANDROID 113 | # define U_TIMEZONE timezone 114 | #elif defined(__UCLIBC__) 115 | // uClibc does not have __timezone or _timezone. 116 | #elif defined(_NEWLIB_VERSION) 117 | # define U_TIMEZONE _timezone 118 | #elif defined(__GLIBC__) 119 | // glibc 120 | # define U_TIMEZONE __timezone 121 | #elif U_PLATFORM_IS_LINUX_BASED 122 | // not defined 123 | #elif U_PLATFORM_USES_ONLY_WIN32_API 124 | # define U_TIMEZONE _timezone 125 | #elif U_PLATFORM == U_PF_BSD && !defined(__NetBSD__) 126 | /* not defined */ 127 | #elif U_PLATFORM == U_PF_OS400 128 | /* not defined */ 129 | #elif U_PLATFORM == U_PF_IPHONE 130 | /* not defined */ 131 | #else 132 | # define U_TIMEZONE timezone 133 | #endif 134 | 135 | #if defined(U_TZNAME) || defined(U_HAVE_TZNAME) 136 | /* Use the predefined value. */ 137 | #elif U_PLATFORM_USES_ONLY_WIN32_API 138 | /* not usable on all windows platforms */ 139 | #if U_PLATFORM_HAS_WINUWP_API == 0 140 | # define U_TZNAME _tzname 141 | #endif 142 | #elif U_PLATFORM == U_PF_OS400 143 | /* not defined */ 144 | #else 145 | # define U_TZNAME tzname 146 | #endif 147 | 148 | #ifdef U_HAVE_MMAP 149 | /* Use the predefined value. */ 150 | #elif U_PLATFORM_USES_ONLY_WIN32_API 151 | # define U_HAVE_MMAP 0 152 | #else 153 | # define U_HAVE_MMAP 1 154 | #endif 155 | 156 | #ifdef U_HAVE_POPEN 157 | /* Use the predefined value. */ 158 | #elif U_PLATFORM_USES_ONLY_WIN32_API 159 | # define U_HAVE_POPEN 0 160 | #elif U_PLATFORM == U_PF_OS400 161 | # define U_HAVE_POPEN 0 162 | #else 163 | # define U_HAVE_POPEN 1 164 | #endif 165 | 166 | /** 167 | * \def U_HAVE_DIRENT_H 168 | * Defines whether dirent.h is available. 169 | * @internal 170 | */ 171 | #ifdef U_HAVE_DIRENT_H 172 | /* Use the predefined value. */ 173 | #elif U_PLATFORM_USES_ONLY_WIN32_API 174 | # define U_HAVE_DIRENT_H 0 175 | #else 176 | # define U_HAVE_DIRENT_H 1 177 | #endif 178 | 179 | /** @} */ 180 | 181 | /*===========================================================================*/ 182 | /** @{ Programs used by ICU code */ 183 | /*===========================================================================*/ 184 | 185 | /** 186 | * \def U_MAKE_IS_NMAKE 187 | * Defines whether the "make" program is Windows nmake. 188 | */ 189 | #ifdef U_MAKE_IS_NMAKE 190 | /* Use the predefined value. */ 191 | #elif U_PLATFORM == U_PF_WINDOWS 192 | # define U_MAKE_IS_NMAKE 1 193 | #else 194 | # define U_MAKE_IS_NMAKE 0 195 | #endif 196 | 197 | /** @} */ 198 | 199 | /*==========================================================================*/ 200 | /* Platform utilities */ 201 | /*==========================================================================*/ 202 | 203 | /** 204 | * Platform utilities isolates the platform dependencies of the 205 | * library. For each platform which this code is ported to, these 206 | * functions may have to be re-implemented. 207 | */ 208 | 209 | /** 210 | * Floating point utility to determine if a double is Not a Number (NaN). 211 | * @internal 212 | */ 213 | U_CAPI UBool U_EXPORT2 uprv_isNaN(double d); 214 | /** 215 | * Floating point utility to determine if a double has an infinite value. 216 | * @internal 217 | */ 218 | U_CAPI UBool U_EXPORT2 uprv_isInfinite(double d); 219 | /** 220 | * Floating point utility to determine if a double has a positive infinite value. 221 | * @internal 222 | */ 223 | U_CAPI UBool U_EXPORT2 uprv_isPositiveInfinity(double d); 224 | /** 225 | * Floating point utility to determine if a double has a negative infinite value. 226 | * @internal 227 | */ 228 | U_CAPI UBool U_EXPORT2 uprv_isNegativeInfinity(double d); 229 | /** 230 | * Floating point utility that returns a Not a Number (NaN) value. 231 | * @internal 232 | */ 233 | U_CAPI double U_EXPORT2 uprv_getNaN(void); 234 | /** 235 | * Floating point utility that returns an infinite value. 236 | * @internal 237 | */ 238 | U_CAPI double U_EXPORT2 uprv_getInfinity(void); 239 | 240 | /** 241 | * Floating point utility to truncate a double. 242 | * @internal 243 | */ 244 | U_CAPI double U_EXPORT2 uprv_trunc(double d); 245 | /** 246 | * Floating point utility to calculate the floor of a double. 247 | * @internal 248 | */ 249 | U_CAPI double U_EXPORT2 uprv_floor(double d); 250 | /** 251 | * Floating point utility to calculate the ceiling of a double. 252 | * @internal 253 | */ 254 | U_CAPI double U_EXPORT2 uprv_ceil(double d); 255 | /** 256 | * Floating point utility to calculate the absolute value of a double. 257 | * @internal 258 | */ 259 | U_CAPI double U_EXPORT2 uprv_fabs(double d); 260 | /** 261 | * Floating point utility to calculate the fractional and integer parts of a double. 262 | * @internal 263 | */ 264 | U_CAPI double U_EXPORT2 uprv_modf(double d, double* pinteger); 265 | /** 266 | * Floating point utility to calculate the remainder of a double divided by another double. 267 | * @internal 268 | */ 269 | U_CAPI double U_EXPORT2 uprv_fmod(double d, double y); 270 | /** 271 | * Floating point utility to calculate d to the power of exponent (d^exponent). 272 | * @internal 273 | */ 274 | U_CAPI double U_EXPORT2 uprv_pow(double d, double exponent); 275 | /** 276 | * Floating point utility to calculate 10 to the power of exponent (10^exponent). 277 | * @internal 278 | */ 279 | U_CAPI double U_EXPORT2 uprv_pow10(int32_t exponent); 280 | /** 281 | * Floating point utility to calculate the maximum value of two doubles. 282 | * @internal 283 | */ 284 | U_CAPI double U_EXPORT2 uprv_fmax(double d, double y); 285 | /** 286 | * Floating point utility to calculate the minimum value of two doubles. 287 | * @internal 288 | */ 289 | U_CAPI double U_EXPORT2 uprv_fmin(double d, double y); 290 | /** 291 | * Private utility to calculate the maximum value of two integers. 292 | * @internal 293 | */ 294 | U_CAPI int32_t U_EXPORT2 uprv_max(int32_t d, int32_t y); 295 | /** 296 | * Private utility to calculate the minimum value of two integers. 297 | * @internal 298 | */ 299 | U_CAPI int32_t U_EXPORT2 uprv_min(int32_t d, int32_t y); 300 | 301 | #if U_IS_BIG_ENDIAN 302 | # define uprv_isNegative(number) (*((signed char *)&(number))<0) 303 | #else 304 | # define uprv_isNegative(number) (*((signed char *)&(number)+sizeof(number)-1)<0) 305 | #endif 306 | 307 | /** 308 | * Return the largest positive number that can be represented by an integer 309 | * type of arbitrary bit length. 310 | * @internal 311 | */ 312 | U_CAPI double U_EXPORT2 uprv_maxMantissa(void); 313 | 314 | /** 315 | * Floating point utility to calculate the logarithm of a double. 316 | * @internal 317 | */ 318 | U_CAPI double U_EXPORT2 uprv_log(double d); 319 | 320 | /** 321 | * Does common notion of rounding e.g. uprv_floor(x + 0.5); 322 | * @param x the double number 323 | * @return the rounded double 324 | * @internal 325 | */ 326 | U_CAPI double U_EXPORT2 uprv_round(double x); 327 | 328 | /** 329 | * Adds the signed integers a and b, storing the result in res. 330 | * Checks for signed integer overflow. 331 | * Similar to the GCC/Clang extension __builtin_add_overflow 332 | * 333 | * @param a The first operand. 334 | * @param b The second operand. 335 | * @param res a + b 336 | * @return true if overflow occurred; false if no overflow occurred. 337 | * @internal 338 | */ 339 | U_CAPI UBool U_EXPORT2 uprv_add32_overflow(int32_t a, int32_t b, int32_t* res); 340 | 341 | /** 342 | * Multiplies the signed integers a and b, storing the result in res. 343 | * Checks for signed integer overflow. 344 | * Similar to the GCC/Clang extension __builtin_mul_overflow 345 | * 346 | * @param a The first multiplicand. 347 | * @param b The second multiplicand. 348 | * @param res a * b 349 | * @return true if overflow occurred; false if no overflow occurred. 350 | * @internal 351 | */ 352 | U_CAPI UBool U_EXPORT2 uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res); 353 | 354 | #if 0 355 | /** 356 | * Returns the number of digits after the decimal point in a double number x. 357 | * 358 | * @param x the double number 359 | * @return the number of digits after the decimal point in a double number x. 360 | * @internal 361 | */ 362 | /*U_CAPI int32_t U_EXPORT2 uprv_digitsAfterDecimal(double x);*/ 363 | #endif 364 | 365 | #if !U_CHARSET_IS_UTF8 366 | /** 367 | * Please use ucnv_getDefaultName() instead. 368 | * Return the default codepage for this platform and locale. 369 | * This function can call setlocale() on Unix platforms. Please read the 370 | * platform documentation on setlocale() before calling this function. 371 | * @return the default codepage for this platform 372 | * @internal 373 | */ 374 | U_CAPI const char* U_EXPORT2 uprv_getDefaultCodepage(void); 375 | #endif 376 | 377 | /** 378 | * Please use uloc_getDefault() instead. 379 | * Return the default locale ID string by querying the system, or 380 | * zero if one cannot be found. 381 | * This function can call setlocale() on Unix platforms. Please read the 382 | * platform documentation on setlocale() before calling this function. 383 | * @return the default locale ID string 384 | * @internal 385 | */ 386 | U_CAPI const char* U_EXPORT2 uprv_getDefaultLocaleID(void); 387 | 388 | /** 389 | * Time zone utilities 390 | * 391 | * Wrappers for C runtime library functions relating to timezones. 392 | * The t_tzset() function (similar to tzset) uses the current setting 393 | * of the environment variable TZ to assign values to three global 394 | * variables: daylight, timezone, and tzname. These variables have the 395 | * following meanings, and are declared in <time.h>. 396 | * 397 | * daylight Nonzero if daylight-saving-time zone (DST) is specified 398 | * in TZ; otherwise, 0. Default value is 1. 399 | * timezone Difference in seconds between coordinated universal 400 | * time and local time. E.g., -28,800 for PST (GMT-8hrs) 401 | * tzname(0) Three-letter time-zone name derived from TZ environment 402 | * variable. E.g., "PST". 403 | * tzname(1) Three-letter DST zone name derived from TZ environment 404 | * variable. E.g., "PDT". If DST zone is omitted from TZ, 405 | * tzname(1) is an empty string. 406 | * 407 | * Notes: For example, to set the TZ environment variable to correspond 408 | * to the current time zone in Germany, you can use one of the 409 | * following statements: 410 | * 411 | * set TZ=GST1GDT 412 | * set TZ=GST+1GDT 413 | * 414 | * If the TZ value is not set, t_tzset() attempts to use the time zone 415 | * information specified by the operating system. Under Windows NT 416 | * and Windows 95, this information is specified in the Control Panel's 417 | * Date/Time application. 418 | * @internal 419 | */ 420 | U_CAPI void U_EXPORT2 uprv_tzset(void); 421 | 422 | /** 423 | * Difference in seconds between coordinated universal 424 | * time and local time. E.g., -28,800 for PST (GMT-8hrs) 425 | * @return the difference in seconds between coordinated universal time and local time. 426 | * @internal 427 | */ 428 | U_CAPI int32_t U_EXPORT2 uprv_timezone(void); 429 | 430 | /** 431 | * tzname(0) Three-letter time-zone name derived from TZ environment 432 | * variable. E.g., "PST". 433 | * tzname(1) Three-letter DST zone name derived from TZ environment 434 | * variable. E.g., "PDT". If DST zone is omitted from TZ, 435 | * tzname(1) is an empty string. 436 | * @internal 437 | */ 438 | U_CAPI const char* U_EXPORT2 uprv_tzname(int n); 439 | 440 | /** 441 | * Reset the global tzname cache. 442 | * @internal 443 | */ 444 | U_CAPI void uprv_tzname_clear_cache(void); 445 | 446 | /** 447 | * Get UTC (GMT) time measured in milliseconds since 0:00 on 1/1/1970. 448 | * This function is affected by 'faketime' and should be the bottleneck for all user-visible ICU time functions. 449 | * @return the UTC time measured in milliseconds 450 | * @internal 451 | */ 452 | U_CAPI UDate U_EXPORT2 uprv_getUTCtime(void); 453 | 454 | /** 455 | * Get UTC (GMT) time measured in milliseconds since 0:00 on 1/1/1970. 456 | * This function is not affected by 'faketime', so it should only be used by low level test functions- not by anything that 457 | * exposes time to the end user. 458 | * @return the UTC time measured in milliseconds 459 | * @internal 460 | */ 461 | U_CAPI UDate U_EXPORT2 uprv_getRawUTCtime(void); 462 | 463 | /** 464 | * Determine whether a pathname is absolute or not, as defined by the platform. 465 | * @param path Pathname to test 466 | * @return true if the path is absolute 467 | * @internal (ICU 3.0) 468 | */ 469 | U_CAPI UBool U_EXPORT2 uprv_pathIsAbsolute(const char *path); 470 | 471 | /** 472 | * Use U_MAX_PTR instead of this function. 473 | * @param void pointer to test 474 | * @return the largest possible pointer greater than the base 475 | * @internal (ICU 3.8) 476 | */ 477 | U_CAPI void * U_EXPORT2 uprv_maximumPtr(void *base); 478 | 479 | /** 480 | * Maximum value of a (void*) - use to indicate the limit of an 'infinite' buffer. 481 | * In fact, buffer sizes must not exceed 2GB so that the difference between 482 | * the buffer limit and the buffer start can be expressed in an int32_t. 483 | * 484 | * The definition of U_MAX_PTR must fulfill the following conditions: 485 | * - return the largest possible pointer greater than base 486 | * - return a valid pointer according to the machine architecture (AS/400, 64-bit, etc.) 487 | * - avoid wrapping around at high addresses 488 | * - make sure that the returned pointer is not farther from base than 0x7fffffff bytes 489 | * 490 | * @param base The beginning of a buffer to find the maximum offset from 491 | * @internal 492 | */ 493 | #ifndef U_MAX_PTR 494 | # if U_PLATFORM == U_PF_OS390 && !defined(_LP64) 495 | /* We have 31-bit pointers. */ 496 | # define U_MAX_PTR(base) ((void *)0x7fffffff) 497 | # elif U_PLATFORM == U_PF_OS400 498 | # define U_MAX_PTR(base) uprv_maximumPtr((void *)base) 499 | # elif 0 500 | /* 501 | * For platforms where pointers are scalar values (which is normal, but unlike i5/OS) 502 | * but that do not define uintptr_t. 503 | * 504 | * However, this does not work on modern compilers: 505 | * The C++ standard does not define pointer overflow, and allows compilers to 506 | * assume that p+u>p for any pointer p and any integer u>0. 507 | * Thus, modern compilers optimize away the ">" comparison. 508 | * (See ICU tickets #7187 and #8096.) 509 | */ 510 | # define U_MAX_PTR(base) \ 511 | ((void *)(((char *)(base)+0x7fffffffu) > (char *)(base) \ 512 | ? ((char *)(base)+0x7fffffffu) \ 513 | : (char *)-1)) 514 | # else 515 | /* Default version. C++ standard compliant for scalar pointers. */ 516 | # define U_MAX_PTR(base) \ 517 | ((void *)(((uintptr_t)(base)+0x7fffffffu) > (uintptr_t)(base) \ 518 | ? ((uintptr_t)(base)+0x7fffffffu) \ 519 | : (uintptr_t)-1)) 520 | # endif 521 | #endif 522 | 523 | 524 | #ifdef __cplusplus 525 | /** 526 | * Pin a buffer capacity such that doing pointer arithmetic 527 | * on the destination pointer and capacity cannot overflow. 528 | * 529 | * The pinned capacity must fulfill the following conditions (for positive capacities): 530 | * - dest + capacity is a valid pointer according to the machine architecture (AS/400, 64-bit, etc.) 531 | * - (dest + capacity) >= dest 532 | * - The size (in bytes) of T[capacity] does not exceed 0x7fffffff 533 | * 534 | * @param dest the destination buffer pointer. 535 | * @param capacity the requested buffer capacity, in units of type T. 536 | * @return the pinned capacity. 537 | * @internal 538 | */ 539 | template 540 | inline int32_t pinCapacity(T *dest, int32_t capacity) { 541 | if (capacity <= 0) { return capacity; } 542 | 543 | uintptr_t destInt = (uintptr_t)dest; 544 | uintptr_t maxInt; 545 | 546 | # if U_PLATFORM == U_PF_OS390 && !defined(_LP64) 547 | // We have 31-bit pointers. 548 | maxInt = 0x7fffffff; 549 | # elif U_PLATFORM == U_PF_OS400 550 | maxInt = (uintptr_t)uprv_maximumPtr((void *)dest); 551 | # else 552 | maxInt = destInt + 0x7fffffffu; 553 | if (maxInt < destInt) { 554 | // Less than 2GB to the end of the address space. 555 | // Pin to that to prevent address overflow. 556 | maxInt = (uintptr_t)-1; 557 | } 558 | # endif 559 | 560 | uintptr_t maxBytes = maxInt - destInt; // max. 2GB 561 | int32_t maxCapacity = (int32_t)(maxBytes / sizeof(T)); 562 | return capacity <= maxCapacity ? capacity : maxCapacity; 563 | } 564 | #endif // __cplusplus 565 | 566 | /* Dynamic Library Functions */ 567 | 568 | typedef void (UVoidFunction)(void); 569 | 570 | #if U_ENABLE_DYLOAD 571 | /** 572 | * Load a library 573 | * @internal (ICU 4.4) 574 | */ 575 | U_CAPI void * U_EXPORT2 uprv_dl_open(const char *libName, UErrorCode *status); 576 | 577 | /** 578 | * Close a library 579 | * @internal (ICU 4.4) 580 | */ 581 | U_CAPI void U_EXPORT2 uprv_dl_close( void *lib, UErrorCode *status); 582 | 583 | /** 584 | * Extract a symbol from a library (function) 585 | * @internal (ICU 4.8) 586 | */ 587 | U_CAPI UVoidFunction* U_EXPORT2 uprv_dlsym_func( void *lib, const char *symbolName, UErrorCode *status); 588 | 589 | /** 590 | * Extract a symbol from a library (function) 591 | * Not implemented, no clients. 592 | * @internal 593 | */ 594 | /* U_CAPI void * U_EXPORT2 uprv_dlsym_data( void *lib, const char *symbolName, UErrorCode *status); */ 595 | 596 | #endif 597 | 598 | /** 599 | * Define malloc and related functions 600 | * @internal 601 | */ 602 | #if U_PLATFORM == U_PF_OS400 603 | # define uprv_default_malloc(x) _C_TS_malloc(x) 604 | # define uprv_default_realloc(x,y) _C_TS_realloc(x,y) 605 | # define uprv_default_free(x) _C_TS_free(x) 606 | /* also _C_TS_calloc(x) */ 607 | #else 608 | /* C defaults */ 609 | # define uprv_default_malloc(x) malloc(x) 610 | # define uprv_default_realloc(x,y) realloc(x,y) 611 | # define uprv_default_free(x) free(x) 612 | #endif 613 | 614 | 615 | #endif -------------------------------------------------------------------------------- /unicode/ucln.h: -------------------------------------------------------------------------------- 1 | // © 2016 and later: Unicode, Inc. and others. 2 | // License & terms of use: http://www.unicode.org/copyright.html 3 | /* 4 | ****************************************************************************** 5 | * 6 | * Copyright (C) 2001-2013, International Business Machines 7 | * Corporation and others. All Rights Reserved. 8 | * 9 | ****************************************************************************** 10 | * file name: ucln.h 11 | * encoding: UTF-8 12 | * tab size: 8 (not used) 13 | * indentation:4 14 | * 15 | * created on: 2001July05 16 | * created by: George Rhoten 17 | */ 18 | 19 | #ifndef __UCLN_H__ 20 | #define __UCLN_H__ 21 | 22 | #include 23 | 24 | /** These are the functions used to register a library's memory cleanup 25 | * functions. Each library should define a single library register function 26 | * to call this API. In the i18n library, it is ucln_i18n_registerCleanup(). 27 | * 28 | * None of the cleanup functions should use a mutex to clean up an API's 29 | * allocated memory because a cleanup function is not meant to be thread safe, 30 | * and plenty of data cannot be reference counted in order to make sure that 31 | * no one else needs the allocated data. 32 | * 33 | * In order to make a cleanup function get called when u_cleanup is called, 34 | * You should add your function to the library specific cleanup function. 35 | * If the cleanup function is not in the common library, the code that 36 | * allocates the memory should call the library specific cleanup function. 37 | * For instance, in the i18n library, any memory allocated statically must 38 | * call ucln_i18n_registerCleanup() from the ucln_in.h header. These library 39 | * cleanup functions are needed in order to prevent a circular dependency 40 | * between the common library and any other library. 41 | * 42 | * The order of the cleanup is very important. In general, an API that 43 | * depends on a second API should be cleaned up before the second API. 44 | * For instance, the default converter in ustring depends upon the converter 45 | * API. So the default converter should be closed before the converter API 46 | * has its cache flushed. This will prevent any memory leaks due to 47 | * reference counting. 48 | * 49 | * Please see common/ucln_cmn.{h,c} and i18n/ucln_in.{h,c} for examples. 50 | */ 51 | 52 | /** 53 | * Data Type for cleanup function selector. These roughly correspond to libraries. 54 | */ 55 | typedef enum ECleanupLibraryType { 56 | UCLN_START = -1, 57 | UCLN_UPLUG, /* ICU plugins */ 58 | UCLN_CUSTOM, /* Custom is for anyone else. */ 59 | UCLN_CTESTFW, 60 | UCLN_TOOLUTIL, 61 | UCLN_LAYOUTEX, 62 | UCLN_LAYOUT, 63 | UCLN_IO, 64 | UCLN_I18N, 65 | UCLN_COMMON /* This must be the last one to cleanup. */ 66 | } ECleanupLibraryType; 67 | 68 | /** 69 | * Data type for cleanup function pointer 70 | */ 71 | U_CDECL_BEGIN 72 | typedef UBool U_CALLCONV cleanupFunc(void); 73 | typedef void U_CALLCONV initFunc(UErrorCode *); 74 | U_CDECL_END 75 | 76 | /** 77 | * Register a cleanup function 78 | * @param type which library to register for. 79 | * @param func the function pointer 80 | */ 81 | U_CAPI void U_EXPORT2 ucln_registerCleanup(ECleanupLibraryType type, 82 | cleanupFunc *func); 83 | 84 | /** 85 | * Request cleanup for one specific library. 86 | * Not thread safe. 87 | * @param type which library to cleanup 88 | */ 89 | U_CAPI void U_EXPORT2 ucln_cleanupOne(ECleanupLibraryType type); 90 | 91 | #endif -------------------------------------------------------------------------------- /unicode/ucln_cmn.h: -------------------------------------------------------------------------------- 1 | // © 2016 and later: Unicode, Inc. and others. 2 | // License & terms of use: http://www.unicode.org/copyright.html 3 | /* 4 | ****************************************************************************** 5 | * Copyright (C) 2001-2016, International Business Machines 6 | * Corporation and others. All Rights Reserved. 7 | ****************************************************************************** 8 | * file name: ucln_cmn.h 9 | * encoding: UTF-8 10 | * tab size: 8 (not used) 11 | * indentation:4 12 | * 13 | * created on: 2001July05 14 | * created by: George Rhoten 15 | */ 16 | 17 | #ifndef __UCLN_CMN_H__ 18 | #define __UCLN_CMN_H__ 19 | 20 | #include 21 | #include "ucln.h" 22 | 23 | /* These are the cleanup functions for various APIs. */ 24 | /* @return true if cleanup complete successfully.*/ 25 | U_CFUNC UBool utrace_cleanup(void); 26 | 27 | U_CFUNC UBool ucln_lib_cleanup(void); 28 | 29 | /* 30 | Please keep the order of enums declared in same order 31 | as the cleanup functions are suppose to be called. */ 32 | typedef enum ECleanupCommonType { 33 | UCLN_COMMON_START = -1, 34 | UCLN_COMMON_NUMPARSE_UNISETS, 35 | UCLN_COMMON_USPREP, 36 | UCLN_COMMON_BREAKITERATOR, 37 | UCLN_COMMON_RBBI, 38 | UCLN_COMMON_SERVICE, 39 | UCLN_COMMON_LOCALE_KEY_TYPE, 40 | UCLN_COMMON_LOCALE, 41 | UCLN_COMMON_LOCALE_ALIAS, 42 | UCLN_COMMON_LOCALE_KNOWN_CANONICALIZED, 43 | UCLN_COMMON_LOCALE_AVAILABLE, 44 | UCLN_COMMON_LIKELY_SUBTAGS, 45 | UCLN_COMMON_LOCALE_DISTANCE, 46 | UCLN_COMMON_ULOC, 47 | UCLN_COMMON_CURRENCY, 48 | UCLN_COMMON_LOADED_NORMALIZER2, 49 | UCLN_COMMON_NORMALIZER2, 50 | UCLN_COMMON_CHARACTERPROPERTIES, 51 | UCLN_COMMON_USET, 52 | UCLN_COMMON_UNAMES, 53 | UCLN_COMMON_UPROPS, 54 | UCLN_COMMON_EMOJIPROPS, 55 | UCLN_COMMON_UCNV, 56 | UCLN_COMMON_UCNV_IO, 57 | UCLN_COMMON_UDATA, 58 | UCLN_COMMON_PUTIL, 59 | UCLN_COMMON_UINIT, 60 | 61 | /* 62 | Unified caches caches collation stuff. Collation data structures 63 | contain resource bundles which means that unified cache cleanup 64 | must happen before resource bundle clean up. 65 | */ 66 | UCLN_COMMON_UNIFIED_CACHE, 67 | UCLN_COMMON_URES, 68 | UCLN_COMMON_MUTEX, // Mutexes should be the last to be cleaned up. 69 | UCLN_COMMON_COUNT /* This must be last */ 70 | } ECleanupCommonType; 71 | 72 | /* Main library cleanup registration function. */ 73 | /* See common/ucln.h for details on adding a cleanup function. */ 74 | /* Note: the global mutex must not be held when calling this function. */ 75 | U_CFUNC void U_EXPORT2 ucln_common_registerCleanup(ECleanupCommonType type, 76 | cleanupFunc *func); 77 | 78 | #endif -------------------------------------------------------------------------------- /unicode/ucmndata.h: -------------------------------------------------------------------------------- 1 | // © 2016 and later: Unicode, Inc. and others. 2 | // License & terms of use: http://www.unicode.org/copyright.html 3 | /* 4 | ****************************************************************************** 5 | * 6 | * Copyright (C) 1999-2011, International Business Machines 7 | * Corporation and others. All Rights Reserved. 8 | * 9 | ******************************************************************************/ 10 | 11 | 12 | /*---------------------------------------------------------------------------------- 13 | * 14 | * UCommonData An abstract interface for dealing with ICU Common Data Files. 15 | * ICU Common Data Files are a grouping of a number of individual 16 | * data items (resources, converters, tables, anything) into a 17 | * single file or dll. The combined format includes a table of 18 | * contents for locating the individual items by name. 19 | * 20 | * Two formats for the table of contents are supported, which is 21 | * why there is an abstract interface involved. 22 | * 23 | * These functions are part of the ICU internal implementation, and 24 | * are not intended to be used directly by applications. 25 | */ 26 | 27 | #ifndef __UCMNDATA_H__ 28 | #define __UCMNDATA_H__ 29 | 30 | #include "udata.h" 31 | #include "umapfile.h" 32 | 33 | 34 | #define COMMON_DATA_NAME U_ICUDATA_NAME 35 | 36 | typedef struct { 37 | uint16_t headerSize; 38 | uint8_t magic1; 39 | uint8_t magic2; 40 | } MappedData; 41 | 42 | 43 | typedef struct { 44 | MappedData dataHeader; 45 | UDataInfo info; 46 | } DataHeader; 47 | 48 | typedef struct { 49 | uint32_t nameOffset; 50 | uint32_t dataOffset; 51 | } UDataOffsetTOCEntry; 52 | 53 | typedef struct { 54 | uint32_t count; 55 | /** 56 | * Variable-length array declared with length 1 to disable bounds checkers. 57 | * The actual array length is in the count field. 58 | */ 59 | UDataOffsetTOCEntry entry[1]; 60 | } UDataOffsetTOC; 61 | 62 | /** 63 | * Get the header size from a const DataHeader *udh. 64 | * Handles opposite-endian data. 65 | * 66 | * @internal 67 | */ 68 | U_CFUNC uint16_t 69 | udata_getHeaderSize(const DataHeader *udh); 70 | 71 | /** 72 | * Get the UDataInfo.size from a const UDataInfo *info. 73 | * Handles opposite-endian data. 74 | * 75 | * @internal 76 | */ 77 | U_CFUNC uint16_t 78 | udata_getInfoSize(const UDataInfo *info); 79 | 80 | U_CDECL_BEGIN 81 | /* 82 | * "Virtual" functions for data lookup. 83 | * To call one, given a UDataMemory *p, the code looks like this: 84 | * p->vFuncs.Lookup(p, tocEntryName, pErrorCode); 85 | * (I sure do wish this was written in C++, not C) 86 | */ 87 | 88 | typedef const DataHeader * 89 | (U_CALLCONV * LookupFn)(const UDataMemory *pData, 90 | const char *tocEntryName, 91 | int32_t *pLength, 92 | UErrorCode *pErrorCode); 93 | 94 | typedef uint32_t 95 | (U_CALLCONV * NumEntriesFn)(const UDataMemory *pData); 96 | 97 | U_CDECL_END 98 | 99 | typedef struct { 100 | LookupFn Lookup; 101 | NumEntriesFn NumEntries; 102 | } commonDataFuncs; 103 | 104 | 105 | /* 106 | * Functions to check whether a UDataMemory refers to memory containing 107 | * a recognizable header and table of contents a Common Data Format 108 | * 109 | * If a valid header and TOC are found, 110 | * set the CommonDataFuncs function dispatch vector in the UDataMemory 111 | * to point to the right functions for the TOC type. 112 | * otherwise 113 | * set an errorcode. 114 | */ 115 | U_CFUNC void udata_checkCommonData(UDataMemory *pData, UErrorCode *pErrorCode); 116 | 117 | #endif 118 | -------------------------------------------------------------------------------- /unicode/ucptrie.h: -------------------------------------------------------------------------------- 1 | // © 2017 and later: Unicode, Inc. and others. 2 | // License & terms of use: http://www.unicode.org/copyright.html 3 | 4 | // ucptrie.h (modified from utrie2.h) 5 | // created: 2017dec29 Markus W. Scherer 6 | 7 | #ifndef __UCPTRIE_H__ 8 | #define __UCPTRIE_H__ 9 | 10 | #include 11 | #include "unicode/ucpmap.h" 12 | #include "unicode/utf8.h" 13 | 14 | #if U_SHOW_CPLUSPLUS_API 15 | #include "unicode/localpointer.h" 16 | #endif // U_SHOW_CPLUSPLUS_API 17 | 18 | U_CDECL_BEGIN 19 | 20 | /** 21 | * \file 22 | * 23 | * This file defines an immutable Unicode code point trie. 24 | * 25 | * @see UCPTrie 26 | * @see UMutableCPTrie 27 | */ 28 | 29 | #ifndef U_IN_DOXYGEN 30 | /** @internal */ 31 | typedef union UCPTrieData { 32 | /** @internal */ 33 | const void *ptr0; 34 | /** @internal */ 35 | const uint16_t *ptr16; 36 | /** @internal */ 37 | const uint32_t *ptr32; 38 | /** @internal */ 39 | const uint8_t *ptr8; 40 | } UCPTrieData; 41 | #endif 42 | 43 | /** 44 | * Immutable Unicode code point trie structure. 45 | * Fast, reasonably compact, map from Unicode code points (U+0000..U+10FFFF) to integer values. 46 | * For details see https://icu.unicode.org/design/struct/utrie 47 | * 48 | * Do not access UCPTrie fields directly; use public functions and macros. 49 | * Functions are easy to use: They support all trie types and value widths. 50 | * 51 | * When performance is really important, macros provide faster access. 52 | * Most macros are specific to either "fast" or "small" tries, see UCPTrieType. 53 | * There are "fast" macros for special optimized use cases. 54 | * 55 | * The macros will return bogus values, or may crash, if used on the wrong type or value width. 56 | * 57 | * @see UMutableCPTrie 58 | * @stable ICU 63 59 | */ 60 | struct UCPTrie { 61 | #ifndef U_IN_DOXYGEN 62 | /** @internal */ 63 | const uint16_t *index; 64 | /** @internal */ 65 | UCPTrieData data; 66 | 67 | /** @internal */ 68 | int32_t indexLength; 69 | /** @internal */ 70 | int32_t dataLength; 71 | /** Start of the last range which ends at U+10FFFF. @internal */ 72 | UChar32 highStart; 73 | /** highStart>>12 @internal */ 74 | uint16_t shifted12HighStart; 75 | 76 | /** @internal */ 77 | int8_t type; // UCPTrieType 78 | /** @internal */ 79 | int8_t valueWidth; // UCPTrieValueWidth 80 | 81 | /** padding/reserved @internal */ 82 | uint32_t reserved32; 83 | /** padding/reserved @internal */ 84 | uint16_t reserved16; 85 | 86 | /** 87 | * Internal index-3 null block offset. 88 | * Set to an impossibly high value (e.g., 0xffff) if there is no dedicated index-3 null block. 89 | * @internal 90 | */ 91 | uint16_t index3NullOffset; 92 | /** 93 | * Internal data null block offset, not shifted. 94 | * Set to an impossibly high value (e.g., 0xfffff) if there is no dedicated data null block. 95 | * @internal 96 | */ 97 | int32_t dataNullOffset; 98 | /** @internal */ 99 | uint32_t nullValue; 100 | 101 | #ifdef UCPTRIE_DEBUG 102 | /** @internal */ 103 | const char *name; 104 | #endif 105 | #endif 106 | }; 107 | #ifndef U_IN_DOXYGEN 108 | typedef struct UCPTrie UCPTrie; 109 | #endif 110 | 111 | /** 112 | * Selectors for the type of a UCPTrie. 113 | * Different trade-offs for size vs. speed. 114 | * 115 | * @see umutablecptrie_buildImmutable 116 | * @see ucptrie_openFromBinary 117 | * @see ucptrie_getType 118 | * @stable ICU 63 119 | */ 120 | enum UCPTrieType { 121 | /** 122 | * For ucptrie_openFromBinary() to accept any type. 123 | * ucptrie_getType() will return the actual type. 124 | * @stable ICU 63 125 | */ 126 | UCPTRIE_TYPE_ANY = -1, 127 | /** 128 | * Fast/simple/larger BMP data structure. Use functions and "fast" macros. 129 | * @stable ICU 63 130 | */ 131 | UCPTRIE_TYPE_FAST, 132 | /** 133 | * Small/slower BMP data structure. Use functions and "small" macros. 134 | * @stable ICU 63 135 | */ 136 | UCPTRIE_TYPE_SMALL 137 | }; 138 | #ifndef U_IN_DOXYGEN 139 | typedef enum UCPTrieType UCPTrieType; 140 | #endif 141 | 142 | /** 143 | * Selectors for the number of bits in a UCPTrie data value. 144 | * 145 | * @see umutablecptrie_buildImmutable 146 | * @see ucptrie_openFromBinary 147 | * @see ucptrie_getValueWidth 148 | * @stable ICU 63 149 | */ 150 | enum UCPTrieValueWidth { 151 | /** 152 | * For ucptrie_openFromBinary() to accept any data value width. 153 | * ucptrie_getValueWidth() will return the actual data value width. 154 | * @stable ICU 63 155 | */ 156 | UCPTRIE_VALUE_BITS_ANY = -1, 157 | /** 158 | * The trie stores 16 bits per data value. 159 | * It returns them as unsigned values 0..0xffff=65535. 160 | * @stable ICU 63 161 | */ 162 | UCPTRIE_VALUE_BITS_16, 163 | /** 164 | * The trie stores 32 bits per data value. 165 | * @stable ICU 63 166 | */ 167 | UCPTRIE_VALUE_BITS_32, 168 | /** 169 | * The trie stores 8 bits per data value. 170 | * It returns them as unsigned values 0..0xff=255. 171 | * @stable ICU 63 172 | */ 173 | UCPTRIE_VALUE_BITS_8 174 | }; 175 | #ifndef U_IN_DOXYGEN 176 | typedef enum UCPTrieValueWidth UCPTrieValueWidth; 177 | #endif 178 | 179 | /** 180 | * Returns the trie type. 181 | * 182 | * @param trie the trie 183 | * @return the trie type 184 | * @see ucptrie_openFromBinary 185 | * @see UCPTRIE_TYPE_ANY 186 | * @stable ICU 63 187 | */ 188 | U_CAPI UCPTrieType U_EXPORT2 189 | ucptrie_getType(const UCPTrie *trie); 190 | 191 | /** 192 | * Returns the number of bits in a trie data value. 193 | * 194 | * @param trie the trie 195 | * @return the number of bits in a trie data value 196 | * @see ucptrie_openFromBinary 197 | * @see UCPTRIE_VALUE_BITS_ANY 198 | * @stable ICU 63 199 | */ 200 | U_CAPI UCPTrieValueWidth U_EXPORT2 201 | ucptrie_getValueWidth(const UCPTrie *trie); 202 | 203 | /** 204 | * Returns the value for a code point as stored in the trie, with range checking. 205 | * Returns the trie error value if c is not in the range 0..U+10FFFF. 206 | * 207 | * Easier to use than UCPTRIE_FAST_GET() and similar macros but slower. 208 | * Easier to use because, unlike the macros, this function works on all UCPTrie 209 | * objects, for all types and value widths. 210 | * 211 | * @param trie the trie 212 | * @param c the code point 213 | * @return the trie value, 214 | * or the trie error value if the code point is not in the range 0..U+10FFFF 215 | * @stable ICU 63 216 | */ 217 | U_CAPI uint32_t U_EXPORT2 218 | ucptrie_get(const UCPTrie *trie, UChar32 c); 219 | 220 | /** 221 | * Returns the last code point such that all those from start to there have the same value. 222 | * Can be used to efficiently iterate over all same-value ranges in a trie. 223 | * (This is normally faster than iterating over code points and get()ting each value, 224 | * but much slower than a data structure that stores ranges directly.) 225 | * 226 | * If the UCPMapValueFilter function pointer is not NULL, then 227 | * the value to be delivered is passed through that function, and the return value is the end 228 | * of the range where all values are modified to the same actual value. 229 | * The value is unchanged if that function pointer is NULL. 230 | * 231 | * Example: 232 | * \code 233 | * UChar32 start = 0, end; 234 | * uint32_t value; 235 | * while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0, 236 | * NULL, NULL, &value)) >= 0) { 237 | * // Work with the range start..end and its value. 238 | * start = end + 1; 239 | * } 240 | * \endcode 241 | * 242 | * @param trie the trie 243 | * @param start range start 244 | * @param option defines whether surrogates are treated normally, 245 | * or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL 246 | * @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL 247 | * @param filter a pointer to a function that may modify the trie data value, 248 | * or NULL if the values from the trie are to be used unmodified 249 | * @param context an opaque pointer that is passed on to the filter function 250 | * @param pValue if not NULL, receives the value that every code point start..end has; 251 | * may have been modified by filter(context, trie value) 252 | * if that function pointer is not NULL 253 | * @return the range end code point, or -1 if start is not a valid code point 254 | * @stable ICU 63 255 | */ 256 | U_CAPI UChar32 U_EXPORT2 257 | ucptrie_getRange(const UCPTrie *trie, UChar32 start, 258 | UCPMapRangeOption option, uint32_t surrogateValue, 259 | UCPMapValueFilter *filter, const void *context, uint32_t *pValue); 260 | 261 | /** 262 | * Writes a memory-mappable form of the trie into 32-bit aligned memory. 263 | * Inverse of ucptrie_openFromBinary(). 264 | * 265 | * @param trie the trie 266 | * @param data a pointer to 32-bit-aligned memory to be filled with the trie data; 267 | * can be NULL if capacity==0 268 | * @param capacity the number of bytes available at data, or 0 for pure preflighting 269 | * @param pErrorCode an in/out ICU UErrorCode; 270 | * U_BUFFER_OVERFLOW_ERROR if the capacity is too small 271 | * @return the number of bytes written or (if buffer overflow) needed for the trie 272 | * 273 | * @see ucptrie_openFromBinary() 274 | * @stable ICU 63 275 | */ 276 | U_CAPI int32_t U_EXPORT2 277 | ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode *pErrorCode); 278 | 279 | /** 280 | * Macro parameter value for a trie with 16-bit data values. 281 | * Use the name of this macro as a "dataAccess" parameter in other macros. 282 | * Do not use this macro in any other way. 283 | * 284 | * @see UCPTRIE_VALUE_BITS_16 285 | * @stable ICU 63 286 | */ 287 | #define UCPTRIE_16(trie, i) ((trie)->data.ptr16[i]) 288 | 289 | /** 290 | * Macro parameter value for a trie with 32-bit data values. 291 | * Use the name of this macro as a "dataAccess" parameter in other macros. 292 | * Do not use this macro in any other way. 293 | * 294 | * @see UCPTRIE_VALUE_BITS_32 295 | * @stable ICU 63 296 | */ 297 | #define UCPTRIE_32(trie, i) ((trie)->data.ptr32[i]) 298 | 299 | /** 300 | * Macro parameter value for a trie with 8-bit data values. 301 | * Use the name of this macro as a "dataAccess" parameter in other macros. 302 | * Do not use this macro in any other way. 303 | * 304 | * @see UCPTRIE_VALUE_BITS_8 305 | * @stable ICU 63 306 | */ 307 | #define UCPTRIE_8(trie, i) ((trie)->data.ptr8[i]) 308 | 309 | /** 310 | * Returns a trie value for a code point, with range checking. 311 | * Returns the trie error value if c is not in the range 0..U+10FFFF. 312 | * 313 | * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST 314 | * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width 315 | * @param c (UChar32, in) the input code point 316 | * @return The code point's trie value. 317 | * @stable ICU 63 318 | */ 319 | #define UCPTRIE_FAST_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_CP_INDEX(trie, 0xffff, c)) 320 | 321 | /** 322 | * Returns a 16-bit trie value for a code point, with range checking. 323 | * Returns the trie error value if c is not in the range U+0000..U+10FFFF. 324 | * 325 | * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_SMALL 326 | * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width 327 | * @param c (UChar32, in) the input code point 328 | * @return The code point's trie value. 329 | * @stable ICU 63 330 | */ 331 | #define UCPTRIE_SMALL_GET(trie, dataAccess, c) \ 332 | dataAccess(trie, _UCPTRIE_CP_INDEX(trie, UCPTRIE_SMALL_MAX, c)) 333 | 334 | /** 335 | * UTF-16: Reads the next code point (UChar32 c, out), post-increments src, 336 | * and gets a value from the trie. 337 | * Sets the trie error value if c is an unpaired surrogate. 338 | * 339 | * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST 340 | * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width 341 | * @param src (const UChar *, in/out) the source text pointer 342 | * @param limit (const UChar *, in) the limit pointer for the text, or NULL if NUL-terminated 343 | * @param c (UChar32, out) variable for the code point 344 | * @param result (out) variable for the trie lookup result 345 | * @stable ICU 63 346 | */ 347 | #define UCPTRIE_FAST_U16_NEXT(trie, dataAccess, src, limit, c, result) UPRV_BLOCK_MACRO_BEGIN { \ 348 | (c) = *(src)++; \ 349 | int32_t __index; \ 350 | if (!U16_IS_SURROGATE(c)) { \ 351 | __index = _UCPTRIE_FAST_INDEX(trie, c); \ 352 | } else { \ 353 | uint16_t __c2; \ 354 | if (U16_IS_SURROGATE_LEAD(c) && (src) != (limit) && U16_IS_TRAIL(__c2 = *(src))) { \ 355 | ++(src); \ 356 | (c) = U16_GET_SUPPLEMENTARY((c), __c2); \ 357 | __index = _UCPTRIE_SMALL_INDEX(trie, c); \ 358 | } else { \ 359 | __index = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; \ 360 | } \ 361 | } \ 362 | (result) = dataAccess(trie, __index); \ 363 | } UPRV_BLOCK_MACRO_END 364 | 365 | /** 366 | * UTF-16: Reads the previous code point (UChar32 c, out), pre-decrements src, 367 | * and gets a value from the trie. 368 | * Sets the trie error value if c is an unpaired surrogate. 369 | * 370 | * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST 371 | * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width 372 | * @param start (const UChar *, in) the start pointer for the text 373 | * @param src (const UChar *, in/out) the source text pointer 374 | * @param c (UChar32, out) variable for the code point 375 | * @param result (out) variable for the trie lookup result 376 | * @stable ICU 63 377 | */ 378 | #define UCPTRIE_FAST_U16_PREV(trie, dataAccess, start, src, c, result) UPRV_BLOCK_MACRO_BEGIN { \ 379 | (c) = *--(src); \ 380 | int32_t __index; \ 381 | if (!U16_IS_SURROGATE(c)) { \ 382 | __index = _UCPTRIE_FAST_INDEX(trie, c); \ 383 | } else { \ 384 | uint16_t __c2; \ 385 | if (U16_IS_SURROGATE_TRAIL(c) && (src) != (start) && U16_IS_LEAD(__c2 = *((src) - 1))) { \ 386 | --(src); \ 387 | (c) = U16_GET_SUPPLEMENTARY(__c2, (c)); \ 388 | __index = _UCPTRIE_SMALL_INDEX(trie, c); \ 389 | } else { \ 390 | __index = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; \ 391 | } \ 392 | } \ 393 | (result) = dataAccess(trie, __index); \ 394 | } UPRV_BLOCK_MACRO_END 395 | 396 | /** 397 | * UTF-8: Post-increments src and gets a value from the trie. 398 | * Sets the trie error value for an ill-formed byte sequence. 399 | * 400 | * Unlike UCPTRIE_FAST_U16_NEXT() this UTF-8 macro does not provide the code point 401 | * because it would be more work to do so and is often not needed. 402 | * If the trie value differs from the error value, then the byte sequence is well-formed, 403 | * and the code point can be assembled without revalidation. 404 | * 405 | * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST 406 | * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width 407 | * @param src (const char *, in/out) the source text pointer 408 | * @param limit (const char *, in) the limit pointer for the text (must not be NULL) 409 | * @param result (out) variable for the trie lookup result 410 | * @stable ICU 63 411 | */ 412 | #define UCPTRIE_FAST_U8_NEXT(trie, dataAccess, src, limit, result) UPRV_BLOCK_MACRO_BEGIN { \ 413 | int32_t __lead = (uint8_t)*(src)++; \ 414 | if (!U8_IS_SINGLE(__lead)) { \ 415 | uint8_t __t1, __t2, __t3; \ 416 | if ((src) != (limit) && \ 417 | (__lead >= 0xe0 ? \ 418 | __lead < 0xf0 ? /* U+0800..U+FFFF except surrogates */ \ 419 | U8_LEAD3_T1_BITS[__lead &= 0xf] & (1 << ((__t1 = *(src)) >> 5)) && \ 420 | ++(src) != (limit) && (__t2 = *(src) - 0x80) <= 0x3f && \ 421 | (__lead = ((int32_t)(trie)->index[(__lead << 6) + (__t1 & 0x3f)]) + __t2, 1) \ 422 | : /* U+10000..U+10FFFF */ \ 423 | (__lead -= 0xf0) <= 4 && \ 424 | U8_LEAD4_T1_BITS[(__t1 = *(src)) >> 4] & (1 << __lead) && \ 425 | (__lead = (__lead << 6) | (__t1 & 0x3f), ++(src) != (limit)) && \ 426 | (__t2 = *(src) - 0x80) <= 0x3f && \ 427 | ++(src) != (limit) && (__t3 = *(src) - 0x80) <= 0x3f && \ 428 | (__lead = __lead >= (trie)->shifted12HighStart ? \ 429 | (trie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : \ 430 | ucptrie_internalSmallU8Index((trie), __lead, __t2, __t3), 1) \ 431 | : /* U+0080..U+07FF */ \ 432 | __lead >= 0xc2 && (__t1 = *(src) - 0x80) <= 0x3f && \ 433 | (__lead = (int32_t)(trie)->index[__lead & 0x1f] + __t1, 1))) { \ 434 | ++(src); \ 435 | } else { \ 436 | __lead = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; /* ill-formed*/ \ 437 | } \ 438 | } \ 439 | (result) = dataAccess(trie, __lead); \ 440 | } UPRV_BLOCK_MACRO_END 441 | 442 | /** 443 | * UTF-8: Pre-decrements src and gets a value from the trie. 444 | * Sets the trie error value for an ill-formed byte sequence. 445 | * 446 | * Unlike UCPTRIE_FAST_U16_PREV() this UTF-8 macro does not provide the code point 447 | * because it would be more work to do so and is often not needed. 448 | * If the trie value differs from the error value, then the byte sequence is well-formed, 449 | * and the code point can be assembled without revalidation. 450 | * 451 | * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST 452 | * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width 453 | * @param start (const char *, in) the start pointer for the text 454 | * @param src (const char *, in/out) the source text pointer 455 | * @param result (out) variable for the trie lookup result 456 | * @stable ICU 63 457 | */ 458 | #define UCPTRIE_FAST_U8_PREV(trie, dataAccess, start, src, result) UPRV_BLOCK_MACRO_BEGIN { \ 459 | int32_t __index = (uint8_t)*--(src); \ 460 | if (!U8_IS_SINGLE(__index)) { \ 461 | __index = ucptrie_internalU8PrevIndex((trie), __index, (const uint8_t *)(start), \ 462 | (const uint8_t *)(src)); \ 463 | (src) -= __index & 7; \ 464 | __index >>= 3; \ 465 | } \ 466 | (result) = dataAccess(trie, __index); \ 467 | } UPRV_BLOCK_MACRO_END 468 | 469 | /** 470 | * Returns a trie value for an ASCII code point, without range checking. 471 | * 472 | * @param trie (const UCPTrie *, in) the trie (of either fast or small type) 473 | * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width 474 | * @param c (UChar32, in) the input code point; must be U+0000..U+007F 475 | * @return The ASCII code point's trie value. 476 | * @stable ICU 63 477 | */ 478 | #define UCPTRIE_ASCII_GET(trie, dataAccess, c) dataAccess(trie, c) 479 | 480 | /** 481 | * Returns a trie value for a BMP code point (U+0000..U+FFFF), without range checking. 482 | * Can be used to look up a value for a UTF-16 code unit if other parts of 483 | * the string processing check for surrogates. 484 | * 485 | * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST 486 | * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width 487 | * @param c (UChar32, in) the input code point, must be U+0000..U+FFFF 488 | * @return The BMP code point's trie value. 489 | * @stable ICU 63 490 | */ 491 | #define UCPTRIE_FAST_BMP_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_FAST_INDEX(trie, c)) 492 | 493 | /** 494 | * Returns a trie value for a supplementary code point (U+10000..U+10FFFF), 495 | * without range checking. 496 | * 497 | * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST 498 | * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width 499 | * @param c (UChar32, in) the input code point, must be U+10000..U+10FFFF 500 | * @return The supplementary code point's trie value. 501 | * @stable ICU 63 502 | */ 503 | #define UCPTRIE_FAST_SUPP_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_SMALL_INDEX(trie, c)) 504 | 505 | /* Internal definitions ----------------------------------------------------- */ 506 | 507 | #ifndef U_IN_DOXYGEN 508 | 509 | /** 510 | * Internal implementation constants. 511 | * These are needed for the API macros, but users should not use these directly. 512 | * @internal 513 | */ 514 | enum { 515 | /** @internal */ 516 | UCPTRIE_FAST_SHIFT = 6, 517 | 518 | /** Number of entries in a data block for code points below the fast limit. 64=0x40 @internal */ 519 | UCPTRIE_FAST_DATA_BLOCK_LENGTH = 1 << UCPTRIE_FAST_SHIFT, 520 | 521 | /** Mask for getting the lower bits for the in-fast-data-block offset. @internal */ 522 | UCPTRIE_FAST_DATA_MASK = UCPTRIE_FAST_DATA_BLOCK_LENGTH - 1, 523 | 524 | /** @internal */ 525 | UCPTRIE_SMALL_MAX = 0xfff, 526 | 527 | /** 528 | * Offset from dataLength (to be subtracted) for fetching the 529 | * value returned for out-of-range code points and ill-formed UTF-8/16. 530 | * @internal 531 | */ 532 | UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET = 1, 533 | /** 534 | * Offset from dataLength (to be subtracted) for fetching the 535 | * value returned for code points highStart..U+10FFFF. 536 | * @internal 537 | */ 538 | UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET = 2 539 | }; 540 | 541 | /* Internal functions and macros -------------------------------------------- */ 542 | // Do not conditionalize with #ifndef U_HIDE_INTERNAL_API, needed for public API 543 | 544 | /** @internal */ 545 | U_CAPI int32_t U_EXPORT2 546 | ucptrie_internalSmallU8Index(const UCPTrie *trie, int32_t lt1, uint8_t t2, uint8_t t3); 547 | 548 | /** 549 | * Internal function for part of the UCPTRIE_FAST_U8_PREVxx() macro implementations. 550 | * Do not call directly. 551 | * @internal 552 | */ 553 | U_CAPI int32_t U_EXPORT2 554 | ucptrie_internalU8PrevIndex(const UCPTrie *trie, UChar32 c, 555 | const uint8_t *start, const uint8_t *src); 556 | 557 | /** Internal trie getter for a code point below the fast limit. Returns the data index. @internal */ 558 | #define _UCPTRIE_FAST_INDEX(trie, c) \ 559 | ((int32_t)(trie)->index[(c) >> UCPTRIE_FAST_SHIFT] + ((c) & UCPTRIE_FAST_DATA_MASK)) 560 | 561 | /** Internal trie getter for a code point at or above the fast limit. Returns the data index. @internal */ 562 | #define _UCPTRIE_SMALL_INDEX(trie, c) \ 563 | ((c) >= (trie)->highStart ? \ 564 | (trie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : \ 565 | ucptrie_internalSmallIndex(trie, c)) 566 | 567 | /** 568 | * Internal trie getter for a code point, with checking that c is in U+0000..10FFFF. 569 | * Returns the data index. 570 | * @internal 571 | */ 572 | #define _UCPTRIE_CP_INDEX(trie, fastMax, c) \ 573 | ((uint32_t)(c) <= (uint32_t)(fastMax) ? \ 574 | _UCPTRIE_FAST_INDEX(trie, c) : \ 575 | (uint32_t)(c) <= 0x10ffff ? \ 576 | _UCPTRIE_SMALL_INDEX(trie, c) : \ 577 | (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET) 578 | 579 | U_CDECL_END 580 | 581 | #endif // U_IN_DOXYGEN 582 | 583 | #if U_SHOW_CPLUSPLUS_API 584 | 585 | U_NAMESPACE_BEGIN 586 | 587 | /** 588 | * \class LocalUCPTriePointer 589 | * "Smart pointer" class, closes a UCPTrie via ucptrie_close(). 590 | * For most methods see the LocalPointerBase base class. 591 | * 592 | * @see LocalPointerBase 593 | * @see LocalPointer 594 | * @stable ICU 63 595 | */ 596 | U_DEFINE_LOCAL_OPEN_POINTER(LocalUCPTriePointer, UCPTrie, ucptrie_close); 597 | 598 | U_NAMESPACE_END 599 | 600 | #endif // U_SHOW_CPLUSPLUS_API 601 | 602 | #endif -------------------------------------------------------------------------------- /unicode/ucptrie_impl.h: -------------------------------------------------------------------------------- 1 | // © 2017 and later: Unicode, Inc. and others. 2 | // License & terms of use: http://www.unicode.org/copyright.html 3 | 4 | // ucptrie_impl.h (modified from utrie2_impl.h) 5 | // created: 2017dec29 Markus W. Scherer 6 | 7 | #ifndef __UCPTRIE_IMPL_H__ 8 | #define __UCPTRIE_IMPL_H__ 9 | 10 | #include "ucptrie.h" 11 | 12 | // UCPTrie signature values, in platform endianness and opposite endianness. 13 | // The UCPTrie signature ASCII byte values spell "Tri3". 14 | #define UCPTRIE_SIG 0x54726933 15 | #define UCPTRIE_OE_SIG 0x33697254 16 | 17 | /** 18 | * Header data for the binary, memory-mappable representation of a UCPTrie/CodePointTrie. 19 | * @internal 20 | */ 21 | struct UCPTrieHeader { 22 | /** "Tri3" in big-endian US-ASCII (0x54726933) */ 23 | uint32_t signature; 24 | 25 | /** 26 | * Options bit field: 27 | * Bits 15..12: Data length bits 19..16. 28 | * Bits 11..8: Data null block offset bits 19..16. 29 | * Bits 7..6: UCPTrieType 30 | * Bits 5..3: Reserved (0). 31 | * Bits 2..0: UCPTrieValueWidth 32 | */ 33 | uint16_t options; 34 | 35 | /** Total length of the index tables. */ 36 | uint16_t indexLength; 37 | 38 | /** Data length bits 15..0. */ 39 | uint16_t dataLength; 40 | 41 | /** Index-3 null block offset, 0x7fff or 0xffff if none. */ 42 | uint16_t index3NullOffset; 43 | 44 | /** Data null block offset bits 15..0, 0xfffff if none. */ 45 | uint16_t dataNullOffset; 46 | 47 | /** 48 | * First code point of the single-value range ending with U+10ffff, 49 | * rounded up and then shifted right by UCPTRIE_SHIFT_2. 50 | */ 51 | uint16_t shiftedHighStart; 52 | }; 53 | 54 | /** 55 | * Constants for use with UCPTrieHeader.options. 56 | * @internal 57 | */ 58 | enum { 59 | UCPTRIE_OPTIONS_DATA_LENGTH_MASK = 0xf000, 60 | UCPTRIE_OPTIONS_DATA_NULL_OFFSET_MASK = 0xf00, 61 | UCPTRIE_OPTIONS_RESERVED_MASK = 0x38, 62 | UCPTRIE_OPTIONS_VALUE_BITS_MASK = 7, 63 | /** 64 | * Value for index3NullOffset which indicates that there is no index-3 null block. 65 | * Bit 15 is unused for this value because this bit is used if the index-3 contains 66 | * 18-bit indexes. 67 | */ 68 | UCPTRIE_NO_INDEX3_NULL_OFFSET = 0x7fff, 69 | UCPTRIE_NO_DATA_NULL_OFFSET = 0xfffff 70 | }; 71 | 72 | // Internal constants. 73 | enum { 74 | /** The length of the BMP index table. 1024=0x400 */ 75 | UCPTRIE_BMP_INDEX_LENGTH = 0x10000 >> UCPTRIE_FAST_SHIFT, 76 | 77 | UCPTRIE_SMALL_LIMIT = 0x1000, 78 | UCPTRIE_SMALL_INDEX_LENGTH = UCPTRIE_SMALL_LIMIT >> UCPTRIE_FAST_SHIFT, 79 | 80 | /** Shift size for getting the index-3 table offset. */ 81 | UCPTRIE_SHIFT_3 = 4, 82 | 83 | /** Shift size for getting the index-2 table offset. */ 84 | UCPTRIE_SHIFT_2 = 5 + UCPTRIE_SHIFT_3, 85 | 86 | /** Shift size for getting the index-1 table offset. */ 87 | UCPTRIE_SHIFT_1 = 5 + UCPTRIE_SHIFT_2, 88 | 89 | /** 90 | * Difference between two shift sizes, 91 | * for getting an index-2 offset from an index-3 offset. 5=9-4 92 | */ 93 | UCPTRIE_SHIFT_2_3 = UCPTRIE_SHIFT_2 - UCPTRIE_SHIFT_3, 94 | 95 | /** 96 | * Difference between two shift sizes, 97 | * for getting an index-1 offset from an index-2 offset. 5=14-9 98 | */ 99 | UCPTRIE_SHIFT_1_2 = UCPTRIE_SHIFT_1 - UCPTRIE_SHIFT_2, 100 | 101 | /** 102 | * Number of index-1 entries for the BMP. (4) 103 | * This part of the index-1 table is omitted from the serialized form. 104 | */ 105 | UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> UCPTRIE_SHIFT_1, 106 | 107 | /** Number of entries in an index-2 block. 32=0x20 */ 108 | UCPTRIE_INDEX_2_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_1_2, 109 | 110 | /** Mask for getting the lower bits for the in-index-2-block offset. */ 111 | UCPTRIE_INDEX_2_MASK = UCPTRIE_INDEX_2_BLOCK_LENGTH - 1, 112 | 113 | /** Number of code points per index-2 table entry. 512=0x200 */ 114 | UCPTRIE_CP_PER_INDEX_2_ENTRY = 1 << UCPTRIE_SHIFT_2, 115 | 116 | /** Number of entries in an index-3 block. 32=0x20 */ 117 | UCPTRIE_INDEX_3_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_2_3, 118 | 119 | /** Mask for getting the lower bits for the in-index-3-block offset. */ 120 | UCPTRIE_INDEX_3_MASK = UCPTRIE_INDEX_3_BLOCK_LENGTH - 1, 121 | 122 | /** Number of entries in a small data block. 16=0x10 */ 123 | UCPTRIE_SMALL_DATA_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_3, 124 | 125 | /** Mask for getting the lower bits for the in-small-data-block offset. */ 126 | UCPTRIE_SMALL_DATA_MASK = UCPTRIE_SMALL_DATA_BLOCK_LENGTH - 1 127 | }; 128 | 129 | typedef UChar32 130 | UCPTrieGetRange(const void *trie, UChar32 start, 131 | UCPMapValueFilter *filter, const void *context, uint32_t *pValue); 132 | 133 | U_CFUNC UChar32 134 | ucptrie_internalGetRange(UCPTrieGetRange *getRange, 135 | const void *trie, UChar32 start, 136 | UCPMapRangeOption option, uint32_t surrogateValue, 137 | UCPMapValueFilter *filter, const void *context, uint32_t *pValue); 138 | 139 | #ifdef UCPTRIE_DEBUG 140 | U_CFUNC void 141 | ucptrie_printLengths(const UCPTrie *trie, const char *which); 142 | 143 | U_CFUNC void umutablecptrie_setName(UMutableCPTrie *builder, const char *name); 144 | #endif 145 | 146 | /* 147 | * Format of the binary, memory-mappable representation of a UCPTrie/CodePointTrie. 148 | * For overview information see https://icu.unicode.org/design/struct/utrie 149 | * 150 | * The binary trie data should be 32-bit-aligned. 151 | * The overall layout is: 152 | * 153 | * UCPTrieHeader header; -- 16 bytes, see struct definition above 154 | * uint16_t index[header.indexLength]; 155 | * uintXY_t data[header.dataLength]; 156 | * 157 | * The trie data array is an array of uint16_t, uint32_t, or uint8_t, 158 | * specified via the UCPTrieValueWidth when building the trie. 159 | * The data array is 32-bit-aligned for uint32_t, otherwise 16-bit-aligned. 160 | * The overall length of the trie data is a multiple of 4 bytes. 161 | * (Padding is added at the end of the index array and/or near the end of the data array as needed.) 162 | * 163 | * The length of the data array (dataLength) is stored as an integer split across two fields 164 | * of the header struct (high bits in header.options). 165 | * 166 | * The trie type can be "fast" or "small" which determines the index structure, 167 | * specified via the UCPTrieType when building the trie. 168 | * 169 | * The type and valueWidth are stored in the header.options. 170 | * There are reserved type and valueWidth values, and reserved header.options bits. 171 | * They could be used in future format extensions. 172 | * Code reading the trie structure must fail with an error when unknown values or options are set. 173 | * 174 | * Values for ASCII character (U+0000..U+007F) can always be found at the start of the data array. 175 | * 176 | * Values for code points below a type-specific fast-indexing limit are found via two-stage lookup. 177 | * For a "fast" trie, the limit is the BMP/supplementary boundary at U+10000. 178 | * For a "small" trie, the limit is UCPTRIE_SMALL_MAX+1=U+1000. 179 | * 180 | * All code points in the range highStart..U+10FFFF map to a single highValue 181 | * which is stored at the second-to-last position of the data array. 182 | * (See UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET.) 183 | * The highStart value is header.shiftedHighStart<>UCPTRIE_SHIFT_1. 205 | * (For 0x100000 supplementary code points U+10000..U+10ffff.) 206 | * 207 | * After this index-1 table follow the variable-length index-3 and index-2 tables. 208 | * 209 | * The supplementary index tables are omitted completely 210 | * if there is only BMP data (highStart<=U+10000). 211 | * 212 | * For a "small" trie: 213 | * 214 | * The index array starts with a fast-index table for lookup of code points U+0000..U+0FFF. 215 | * 216 | * The "supplementary" index tables are always stored. 217 | * The index-1 table starts from U+0000, its maximum length is 68=0x44=0x110000>>UCPTRIE_SHIFT_1. 218 | * 219 | * For both trie types: 220 | * 221 | * The last index-2 block may be a partial block, storing indexes only for code points 222 | * below highStart. 223 | * 224 | * Lookup for ASCII code point c: 225 | * 226 | * Linear access from the start of the data array. 227 | * 228 | * value = data[c]; 229 | * 230 | * Lookup for fast-range code point c: 231 | * 232 | * Shift the code point right by UCPTRIE_FAST_SHIFT=6 bits, 233 | * fetch the index array value at that offset, 234 | * add the lower code point bits, index into the data array. 235 | * 236 | * value = data[index[c>>6] + (c&0x3f)]; 237 | * 238 | * (This works for ASCII as well.) 239 | * 240 | * Lookup for small-range code point c below highStart: 241 | * 242 | * Split the code point into four bit fields using several sets of shifts & masks 243 | * to read consecutive values from the index-1, index-2, index-3 and data tables. 244 | * 245 | * If all of the data block offsets in an index-3 block fit within 16 bits (up to 0xffff), 246 | * then the data block offsets are stored directly as uint16_t. 247 | * 248 | * Otherwise (this is very unusual but possible), the index-2 entry for the index-3 block 249 | * has bit 15 (0x8000) set, and each set of 8 index-3 entries is preceded by 250 | * an additional uint16_t word. Data block offsets are 18 bits wide, with the top 2 bits stored 251 | * in the additional word. 252 | * 253 | * See ucptrie_internalSmallIndex() for details. 254 | * 255 | * (In a "small" trie, this works for ASCII and below-fast_limit code points as well.) 256 | * 257 | * Compaction: 258 | * 259 | * Multiple code point ranges ("blocks") that are aligned on certain boundaries 260 | * (determined by the shifting/bit fields of code points) and 261 | * map to the same data values normally share a single subsequence of the data array. 262 | * Data blocks can also overlap partially. 263 | * (Depending on the builder code finding duplicate and overlapping blocks.) 264 | * 265 | * Iteration over same-value ranges: 266 | * 267 | * Range iteration (ucptrie_getRange()) walks the structure from a start code point 268 | * until some code point is found that maps to a different value; 269 | * the end of the returned range is just before that. 270 | * 271 | * The header.dataNullOffset (split across two header fields, high bits in header.options) 272 | * is the offset of a widely shared data block filled with one single value. 273 | * It helps quickly skip over large ranges of data with that value. 274 | * The builder must ensure that if the start of any data block (fast or small) 275 | * matches the dataNullOffset, then the whole block must be filled with the null value. 276 | * Special care must be taken if there is no fast null data block 277 | * but a small one, which is shorter, and it matches the *start* of some fast data block. 278 | * 279 | * Similarly, the header.index3NullOffset is the index-array offset of an index-3 block 280 | * where all index entries point to the dataNullOffset. 281 | * If there is no such data or index-3 block, then these offsets are set to 282 | * values that cannot be reached (data offset out of range/reserved index offset), 283 | * normally UCPTRIE_NO_DATA_NULL_OFFSET or UCPTRIE_NO_INDEX3_NULL_OFFSET respectively. 284 | */ 285 | 286 | #endif 287 | -------------------------------------------------------------------------------- /unicode/udata.h: -------------------------------------------------------------------------------- 1 | // © 2016 and later: Unicode, Inc. and others. 2 | // License & terms of use: http://www.unicode.org/copyright.html 3 | /* 4 | ****************************************************************************** 5 | * 6 | * Copyright (C) 1999-2014, International Business Machines 7 | * Corporation and others. All Rights Reserved. 8 | * 9 | ****************************************************************************** 10 | * file name: udata.h 11 | * encoding: UTF-8 12 | * tab size: 8 (not used) 13 | * indentation:4 14 | * 15 | * created on: 1999oct25 16 | * created by: Markus W. Scherer 17 | */ 18 | 19 | #ifndef __UDATA_H__ 20 | #define __UDATA_H__ 21 | 22 | #include 23 | 24 | #if U_SHOW_CPLUSPLUS_API 25 | #include "unicode/localpointer.h" 26 | #endif // U_SHOW_CPLUSPLUS_API 27 | 28 | U_CDECL_BEGIN 29 | 30 | /** 31 | * \file 32 | * \brief C API: Data loading interface 33 | * 34 | *

Information about data loading interface

35 | * 36 | * This API is used to find and efficiently load data for ICU and applications 37 | * using ICU. It provides an abstract interface that specifies a data type and 38 | * name to find and load the data. Normally this API is used by other ICU APIs 39 | * to load required data out of the ICU data library, but it can be used to 40 | * load data out of other places. 41 | * 42 | * See the User Guide Data Management chapter. 43 | */ 44 | 45 | #ifndef U_HIDE_INTERNAL_API 46 | /** 47 | * Character used to separate package names from tree names 48 | * @internal ICU 3.0 49 | */ 50 | #define U_TREE_SEPARATOR '-' 51 | 52 | /** 53 | * String used to separate package names from tree names 54 | * @internal ICU 3.0 55 | */ 56 | #define U_TREE_SEPARATOR_STRING "-" 57 | 58 | /** 59 | * Character used to separate parts of entry names 60 | * @internal ICU 3.0 61 | */ 62 | #define U_TREE_ENTRY_SEP_CHAR '/' 63 | 64 | /** 65 | * String used to separate parts of entry names 66 | * @internal ICU 3.0 67 | */ 68 | #define U_TREE_ENTRY_SEP_STRING "/" 69 | 70 | /** 71 | * Alias for standard ICU data 72 | * @internal ICU 3.0 73 | */ 74 | #define U_ICUDATA_ALIAS "ICUDATA" 75 | 76 | #endif /* U_HIDE_INTERNAL_API */ 77 | 78 | /** 79 | * UDataInfo contains the properties about the requested data. 80 | * This is meta data. 81 | * 82 | *

This structure may grow in the future, indicated by the 83 | * size field.

84 | * 85 | *

ICU data must be at least 8-aligned, and should be 16-aligned. 86 | * The UDataInfo struct begins 4 bytes after the start of the data item, 87 | * so it is 4-aligned. 88 | * 89 | *

The platform data property fields help determine if a data 90 | * file can be efficiently used on a given machine. 91 | * The particular fields are of importance only if the data 92 | * is affected by the properties - if there is integer data 93 | * with word sizes > 1 byte, char* text, or UChar* text.

94 | * 95 | *

The implementation for the udata_open[Choice]() 96 | * functions may reject data based on the value in isBigEndian. 97 | * No other field is used by the udata API implementation.

98 | * 99 | *

The dataFormat may be used to identify 100 | * the kind of data, e.g. a converter table.

101 | * 102 | *

The formatVersion field should be used to 103 | * make sure that the format can be interpreted. 104 | * It may be a good idea to check only for the one or two highest 105 | * of the version elements to allow the data memory to 106 | * get more or somewhat rearranged contents, for as long 107 | * as the using code can still interpret the older contents.

108 | * 109 | *

The dataVersion field is intended to be a 110 | * common place to store the source version of the data; 111 | * for data from the Unicode character database, this could 112 | * reflect the Unicode version.

113 | * 114 | * @stable ICU 2.0 115 | */ 116 | typedef struct { 117 | /** sizeof(UDataInfo) 118 | * @stable ICU 2.0 */ 119 | uint16_t size; 120 | 121 | /** unused, set to 0 122 | * @stable ICU 2.0*/ 123 | uint16_t reservedWord; 124 | 125 | /* platform data properties */ 126 | /** 0 for little-endian machine, 1 for big-endian 127 | * @stable ICU 2.0 */ 128 | uint8_t isBigEndian; 129 | 130 | /** see U_CHARSET_FAMILY values in utypes.h 131 | * @stable ICU 2.0*/ 132 | uint8_t charsetFamily; 133 | 134 | /** sizeof(UChar), one of { 1, 2, 4 } 135 | * @stable ICU 2.0*/ 136 | uint8_t sizeofUChar; 137 | 138 | /** unused, set to 0 139 | * @stable ICU 2.0*/ 140 | uint8_t reservedByte; 141 | 142 | /** data format identifier 143 | * @stable ICU 2.0*/ 144 | uint8_t dataFormat[4]; 145 | 146 | /** versions: [0] major [1] minor [2] milli [3] micro 147 | * @stable ICU 2.0*/ 148 | uint8_t formatVersion[4]; 149 | 150 | /** versions: [0] major [1] minor [2] milli [3] micro 151 | * @stable ICU 2.0*/ 152 | uint8_t dataVersion[4]; 153 | } UDataInfo; 154 | 155 | /* API for reading data -----------------------------------------------------*/ 156 | 157 | /** 158 | * Forward declaration of the data memory type. 159 | * @stable ICU 2.0 160 | */ 161 | typedef struct UDataMemory UDataMemory; 162 | 163 | /** 164 | * Callback function for udata_openChoice(). 165 | * @param context parameter passed into udata_openChoice(). 166 | * @param type The type of the data as passed into udata_openChoice(). 167 | * It may be NULL. 168 | * @param name The name of the data as passed into udata_openChoice(). 169 | * @param pInfo A pointer to the UDataInfo structure 170 | * of data that has been loaded and will be returned 171 | * by udata_openChoice() if this function 172 | * returns true. 173 | * @return true if the current data memory is acceptable 174 | * @stable ICU 2.0 175 | */ 176 | typedef UBool U_CALLCONV 177 | UDataMemoryIsAcceptable(void *context, 178 | const char *type, const char *name, 179 | const UDataInfo *pInfo); 180 | 181 | 182 | /** 183 | * Convenience function. 184 | * This function works the same as udata_openChoice 185 | * except that any data that matches the type and name 186 | * is assumed to be acceptable. 187 | * @param path Specifies an absolute path and/or a basename for the 188 | * finding of the data in the file system. 189 | * NULL for ICU data. 190 | * @param type A string that specifies the type of data to be loaded. 191 | * For example, resource bundles are loaded with type "res", 192 | * conversion tables with type "cnv". 193 | * This may be NULL or empty. 194 | * @param name A string that specifies the name of the data. 195 | * @param pErrorCode An ICU UErrorCode parameter. It must not be NULL. 196 | * @return A pointer (handle) to a data memory object, or NULL 197 | * if an error occurs. Call udata_getMemory() 198 | * to get a pointer to the actual data. 199 | * 200 | * @see udata_openChoice 201 | * @stable ICU 2.0 202 | */ 203 | U_CAPI UDataMemory * U_EXPORT2 204 | udata_open(const char *path, const char *type, const char *name, 205 | UErrorCode *pErrorCode); 206 | 207 | /** 208 | * Data loading function. 209 | * This function is used to find and load efficiently data for 210 | * ICU and applications using ICU. 211 | * It provides an abstract interface that allows to specify a data 212 | * type and name to find and load the data. 213 | * 214 | *

The implementation depends on platform properties and user preferences 215 | * and may involve loading shared libraries (DLLs), mapping 216 | * files into memory, or fopen()/fread() files. 217 | * It may also involve using static memory or database queries etc. 218 | * Several or all data items may be combined into one entity 219 | * (DLL, memory-mappable file).

220 | * 221 | *

The data is always preceded by a header that includes 222 | * a UDataInfo structure. 223 | * The caller's isAcceptable() function is called to make 224 | * sure that the data is useful. It may be called several times if it 225 | * rejects the data and there is more than one location with data 226 | * matching the type and name.

227 | * 228 | *

If path==NULL, then ICU data is loaded. 229 | * Otherwise, it is separated into a basename and a basename-less directory string. 230 | * The basename is used as the data package name, and the directory is 231 | * logically prepended to the ICU data directory string.

232 | * 233 | *

For details about ICU data loading see the User Guide 234 | * Data Management chapter. (https://unicode-org.github.io/icu/userguide/icu_data/)

235 | * 236 | * @param path Specifies an absolute path and/or a basename for the 237 | * finding of the data in the file system. 238 | * NULL for ICU data. 239 | * @param type A string that specifies the type of data to be loaded. 240 | * For example, resource bundles are loaded with type "res", 241 | * conversion tables with type "cnv". 242 | * This may be NULL or empty. 243 | * @param name A string that specifies the name of the data. 244 | * @param isAcceptable This function is called to verify that loaded data 245 | * is useful for the client code. If it returns false 246 | * for all data items, then udata_openChoice() 247 | * will return with an error. 248 | * @param context Arbitrary parameter to be passed into isAcceptable. 249 | * @param pErrorCode An ICU UErrorCode parameter. It must not be NULL. 250 | * @return A pointer (handle) to a data memory object, or NULL 251 | * if an error occurs. Call udata_getMemory() 252 | * to get a pointer to the actual data. 253 | * @stable ICU 2.0 254 | */ 255 | U_CAPI UDataMemory * U_EXPORT2 256 | udata_openChoice(const char *path, const char *type, const char *name, 257 | UDataMemoryIsAcceptable *isAcceptable, void *context, 258 | UErrorCode *pErrorCode); 259 | 260 | /** 261 | * Close the data memory. 262 | * This function must be called to allow the system to 263 | * release resources associated with this data memory. 264 | * @param pData The pointer to data memory object 265 | * @stable ICU 2.0 266 | */ 267 | U_CAPI void U_EXPORT2 268 | udata_close(UDataMemory *pData); 269 | 270 | /** 271 | * Get the pointer to the actual data inside the data memory. 272 | * The data is read-only. 273 | * 274 | * ICU data must be at least 8-aligned, and should be 16-aligned. 275 | * 276 | * @param pData The pointer to data memory object 277 | * @stable ICU 2.0 278 | */ 279 | U_CAPI const void * U_EXPORT2 280 | udata_getMemory(UDataMemory *pData); 281 | 282 | /** 283 | * Get the information from the data memory header. 284 | * This allows to get access to the header containing 285 | * platform data properties etc. which is not part of 286 | * the data itself and can therefore not be accessed 287 | * via the pointer that udata_getMemory() returns. 288 | * 289 | * @param pData pointer to the data memory object 290 | * @param pInfo pointer to a UDataInfo object; 291 | * its size field must be set correctly, 292 | * typically to sizeof(UDataInfo). 293 | * 294 | * *pInfo will be filled with the UDataInfo structure 295 | * in the data memory object. If this structure is smaller than 296 | * pInfo->size, then the size will be 297 | * adjusted and only part of the structure will be filled. 298 | * @stable ICU 2.0 299 | */ 300 | U_CAPI void U_EXPORT2 301 | udata_getInfo(UDataMemory *pData, UDataInfo *pInfo); 302 | 303 | /** 304 | * This function bypasses the normal ICU data loading process and 305 | * allows you to force ICU's system data to come out of a user-specified 306 | * area in memory. 307 | * 308 | * ICU data must be at least 8-aligned, and should be 16-aligned. 309 | * See https://unicode-org.github.io/icu/userguide/icudata 310 | * 311 | * The format of this data is that of the icu common data file, as is 312 | * generated by the pkgdata tool with mode=common or mode=dll. 313 | * You can read in a whole common mode file and pass the address to the start of the 314 | * data, or (with the appropriate link options) pass in the pointer to 315 | * the data that has been loaded from a dll by the operating system, 316 | * as shown in this code: 317 | * 318 | * extern const char U_IMPORT U_ICUDATA_ENTRY_POINT []; 319 | * // U_ICUDATA_ENTRY_POINT is same as entry point specified to pkgdata tool 320 | * UErrorCode status = U_ZERO_ERROR; 321 | * 322 | * udata_setCommonData(&U_ICUDATA_ENTRY_POINT, &status); 323 | * 324 | * It is important that the declaration be as above. The entry point 325 | * must not be declared as an extern void*. 326 | * 327 | * Starting with ICU 4.4, it is possible to set several data packages, 328 | * one per call to this function. 329 | * udata_open() will look for data in the multiple data packages in the order 330 | * in which they were set. 331 | * The position of the linked-in or default-name ICU .data package in the 332 | * search list depends on when the first data item is loaded that is not contained 333 | * in the already explicitly set packages. 334 | * If data was loaded implicitly before the first call to this function 335 | * (for example, via opening a converter, constructing a UnicodeString 336 | * from default-codepage data, using formatting or collation APIs, etc.), 337 | * then the default data will be first in the list. 338 | * 339 | * This function has no effect on application (non ICU) data. See udata_setAppData() 340 | * for similar functionality for application data. 341 | * 342 | * @param data pointer to ICU common data 343 | * @param err outgoing error status U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR 344 | * @stable ICU 2.0 345 | */ 346 | U_CAPI void U_EXPORT2 347 | udata_setCommonData(const void *data, UErrorCode *err); 348 | 349 | 350 | /** 351 | * This function bypasses the normal ICU data loading process for application-specific 352 | * data and allows you to force the it to come out of a user-specified 353 | * pointer. 354 | * 355 | * ICU data must be at least 8-aligned, and should be 16-aligned. 356 | * See https://unicode-org.github.io/icu/userguide/icudata 357 | * 358 | * The format of this data is that of the icu common data file, like 'icudt26l.dat' 359 | * or the corresponding shared library (DLL) file. 360 | * The application must read in or otherwise construct an image of the data and then 361 | * pass the address of it to this function. 362 | * 363 | * 364 | * Warning: setAppData will set a U_USING_DEFAULT_WARNING code if 365 | * data with the specified path that has already been opened, or 366 | * if setAppData with the same path has already been called. 367 | * Any such calls to setAppData will have no effect. 368 | * 369 | * 370 | * @param packageName the package name by which the application will refer 371 | * to (open) this data 372 | * @param data pointer to the data 373 | * @param err outgoing error status U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR 374 | * @see udata_setCommonData 375 | * @stable ICU 2.0 376 | */ 377 | U_CAPI void U_EXPORT2 378 | udata_setAppData(const char *packageName, const void *data, UErrorCode *err); 379 | 380 | /** 381 | * Possible settings for udata_setFileAccess() 382 | * @see udata_setFileAccess 383 | * @stable ICU 3.4 384 | */ 385 | typedef enum UDataFileAccess { 386 | /** ICU looks for data in single files first, then in packages. (default) @stable ICU 3.4 */ 387 | UDATA_FILES_FIRST, 388 | /** An alias for the default access mode. @stable ICU 3.4 */ 389 | UDATA_DEFAULT_ACCESS = UDATA_FILES_FIRST, 390 | /** ICU only loads data from packages, not from single files. @stable ICU 3.4 */ 391 | UDATA_ONLY_PACKAGES, 392 | /** ICU loads data from packages first, and only from single files 393 | if the data cannot be found in a package. @stable ICU 3.4 */ 394 | UDATA_PACKAGES_FIRST, 395 | /** ICU does not access the file system for data loading. @stable ICU 3.4 */ 396 | UDATA_NO_FILES, 397 | #ifndef U_HIDE_DEPRECATED_API 398 | /** 399 | * Number of real UDataFileAccess values. 400 | * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 401 | */ 402 | UDATA_FILE_ACCESS_COUNT 403 | #endif // U_HIDE_DEPRECATED_API 404 | } UDataFileAccess; 405 | 406 | /** 407 | * This function may be called to control how ICU loads data. It must be called 408 | * before any ICU data is loaded, including application data loaded with 409 | * ures/ResourceBundle or udata APIs. This function is not multithread safe. 410 | * The results of calling it while other threads are loading data are undefined. 411 | * @param access The type of file access to be used 412 | * @param status Error code. 413 | * @see UDataFileAccess 414 | * @stable ICU 3.4 415 | */ 416 | U_CAPI void U_EXPORT2 417 | udata_setFileAccess(UDataFileAccess access, UErrorCode *status); 418 | 419 | U_CDECL_END 420 | 421 | #if U_SHOW_CPLUSPLUS_API 422 | 423 | U_NAMESPACE_BEGIN 424 | 425 | /** 426 | * \class LocalUDataMemoryPointer 427 | * "Smart pointer" class, closes a UDataMemory via udata_close(). 428 | * For most methods see the LocalPointerBase base class. 429 | * 430 | * @see LocalPointerBase 431 | * @see LocalPointer 432 | * @stable ICU 4.4 433 | */ 434 | U_DEFINE_LOCAL_OPEN_POINTER(LocalUDataMemoryPointer, UDataMemory, udata_close); 435 | 436 | U_NAMESPACE_END 437 | 438 | #endif // U_SHOW_CPLUSPLUS_API 439 | 440 | #endif -------------------------------------------------------------------------------- /unicode/udatamem.h: -------------------------------------------------------------------------------- 1 | // © 2016 and later: Unicode, Inc. and others. 2 | // License & terms of use: http://www.unicode.org/copyright.html 3 | /* 4 | ****************************************************************************** 5 | * 6 | * Copyright (C) 1999-2010, International Business Machines 7 | * Corporation and others. All Rights Reserved. 8 | * 9 | ******************************************************************************/ 10 | 11 | 12 | /*---------------------------------------------------------------------------------- 13 | * 14 | * UDataMemory A class-like struct that serves as a handle to a piece of memory 15 | * that contains some ICU data (resource, converters, whatever.) 16 | * 17 | * When an application opens ICU data (with udata_open, for example, 18 | * a UDataMemory * is returned. 19 | * 20 | *----------------------------------------------------------------------------------*/ 21 | #ifndef __UDATAMEM_H__ 22 | #define __UDATAMEM_H__ 23 | 24 | #include "udata.h" 25 | #include "ucmndata.h" 26 | 27 | struct UDataMemory { 28 | const commonDataFuncs *vFuncs; /* Function Pointers for accessing TOC */ 29 | 30 | const DataHeader *pHeader; /* Header of the memory being described by this */ 31 | /* UDataMemory object. */ 32 | const void *toc; /* For common memory, table of contents for */ 33 | /* the pieces within. */ 34 | UBool heapAllocated; /* True if this UDataMemory Object is on the */ 35 | /* heap and thus needs to be deleted when closed. */ 36 | 37 | void *mapAddr; /* For mapped or allocated memory, the start addr. */ 38 | /* Only non-null if a close operation should unmap */ 39 | /* the associated data. */ 40 | void *map; /* Handle, or other data, OS dependent. */ 41 | /* Only non-null if a close operation should unmap */ 42 | /* the associated data, and additional info */ 43 | /* beyond the mapAddr is needed to do that. */ 44 | int32_t length; /* Length of the data in bytes; -1 if unknown. */ 45 | }; 46 | 47 | U_CFUNC void UDatamemory_assign (UDataMemory *dest, UDataMemory *source); 48 | U_CFUNC UBool UDataMemory_isLoaded(const UDataMemory *This); 49 | U_CFUNC void UDataMemory_setData (UDataMemory *This, const void *dataAddr); 50 | 51 | U_CFUNC const DataHeader *UDataMemory_normalizeDataPointer(const void *p); 52 | 53 | U_CAPI int32_t U_EXPORT2 54 | udata_getLength(const UDataMemory *pData); 55 | 56 | U_CAPI const void * U_EXPORT2 57 | udata_getRawMemory(const UDataMemory *pData); 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /unicode/umapfile.h: -------------------------------------------------------------------------------- 1 | // © 2016 and later: Unicode, Inc. and others. 2 | // License & terms of use: http://www.unicode.org/copyright.html 3 | /* 4 | ****************************************************************************** 5 | * 6 | * Copyright (C) 1999-2011, International Business Machines 7 | * Corporation and others. All Rights Reserved. 8 | * 9 | ******************************************************************************/ 10 | 11 | /*---------------------------------------------------------------------------------- 12 | * 13 | * Memory mapped file wrappers for use by the ICU Data Implementation 14 | * 15 | * Porting note: The implementation of these functions is very platform specific. 16 | * Not all platforms can do real memory mapping. Those that can't 17 | * still must implement these functions, getting the data into memory using 18 | * whatever means are available. 19 | * 20 | * These functions are part of the ICU internal implementation, and 21 | * are not intended to be used directly by applications. 22 | * 23 | *----------------------------------------------------------------------------------*/ 24 | 25 | #ifndef __UMAPFILE_H__ 26 | #define __UMAPFILE_H__ 27 | 28 | #include 29 | #include "udata.h" 30 | #include "putilimp.h" 31 | 32 | U_CFUNC UBool uprv_mapFile(UDataMemory *pdm, const char *path, UErrorCode *status); 33 | U_CFUNC void uprv_unmapFile(UDataMemory *pData); 34 | 35 | /* MAP_NONE: no memory mapping, no file access at all */ 36 | #define MAP_NONE 0 37 | #define MAP_WIN32 1 38 | #define MAP_POSIX 2 39 | #define MAP_STDIO 3 40 | #define MAP_390DLL 4 41 | 42 | #if UCONFIG_NO_FILE_IO 43 | # define MAP_IMPLEMENTATION MAP_NONE 44 | #elif U_PLATFORM_USES_ONLY_WIN32_API 45 | # define MAP_IMPLEMENTATION MAP_WIN32 46 | #elif U_HAVE_MMAP || U_PLATFORM == U_PF_OS390 47 | # if U_PLATFORM == U_PF_OS390 && defined (OS390_STUBDATA) 48 | /* No memory mapping for 390 batch mode. Fake it using dll loading. */ 49 | # define MAP_IMPLEMENTATION MAP_390DLL 50 | # else 51 | # define MAP_IMPLEMENTATION MAP_POSIX 52 | # endif 53 | #else /* unknown platform, no memory map implementation: use stdio.h and uprv_malloc() instead */ 54 | # define MAP_IMPLEMENTATION MAP_STDIO 55 | #endif 56 | 57 | #endif 58 | -------------------------------------------------------------------------------- /unicode/umutex.h: -------------------------------------------------------------------------------- 1 | // © 2016 and later: Unicode, Inc. and others. 2 | // License & terms of use: http://www.unicode.org/copyright.html 3 | /* 4 | ********************************************************************** 5 | * Copyright (C) 1997-2015, International Business Machines 6 | * Corporation and others. All Rights Reserved. 7 | ********************************************************************** 8 | * 9 | * File UMUTEX.H 10 | * 11 | * Modification History: 12 | * 13 | * Date Name Description 14 | * 04/02/97 aliu Creation. 15 | * 04/07/99 srl rewrite - C interface, multiple mutices 16 | * 05/13/99 stephen Changed to umutex (from cmutex) 17 | ****************************************************************************** 18 | */ 19 | 20 | #ifndef UMUTEX_H 21 | #define UMUTEX_H 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | #include 29 | #include 30 | #include 31 | 32 | #include "putilimp.h" 33 | 34 | #if defined(U_USER_ATOMICS_H) || defined(U_USER_MUTEX_H) 35 | // Support for including an alternate implementation of atomic & mutex operations has been withdrawn. 36 | // See issue ICU-20185. 37 | #error U_USER_ATOMICS and U_USER_MUTEX_H are not supported 38 | #endif 39 | 40 | // Export an explicit template instantiation of std::atomic. 41 | // When building DLLs for Windows this is required as it is used as a data member of the exported SharedObject class. 42 | // See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples. 43 | // 44 | // Similar story for std::atomic, and the exported UMutex class. 45 | #if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN && !defined(U_IN_DOXYGEN) 46 | #if defined(__clang__) || defined(_MSC_VER) 47 | #if defined(__clang__) 48 | // Suppress the warning that the explicit instantiation after explicit specialization has no effect. 49 | #pragma clang diagnostic push 50 | #pragma clang diagnostic ignored "-Winstantiation-after-specialization" 51 | #endif 52 | template struct U_COMMON_API std::atomic; 53 | template struct U_COMMON_API std::atomic; 54 | #if defined(__clang__) 55 | #pragma clang diagnostic pop 56 | #endif 57 | #elif defined(__GNUC__) 58 | // For GCC this class is already exported/visible, so no need for U_COMMON_API. 59 | template struct std::atomic; 60 | template struct std::atomic; 61 | #endif 62 | #endif 63 | 64 | 65 | U_NAMESPACE_BEGIN 66 | 67 | /**************************************************************************** 68 | * 69 | * Low Level Atomic Operations, ICU wrappers for. 70 | * 71 | ****************************************************************************/ 72 | 73 | typedef std::atomic u_atomic_int32_t; 74 | #define ATOMIC_INT32_T_INITIALIZER(val) ATOMIC_VAR_INIT(val) 75 | 76 | inline int32_t umtx_loadAcquire(u_atomic_int32_t &var) { 77 | return var.load(std::memory_order_acquire); 78 | } 79 | 80 | inline void umtx_storeRelease(u_atomic_int32_t &var, int32_t val) { 81 | var.store(val, std::memory_order_release); 82 | } 83 | 84 | inline int32_t umtx_atomic_inc(u_atomic_int32_t *var) { 85 | return var->fetch_add(1) + 1; 86 | } 87 | 88 | inline int32_t umtx_atomic_dec(u_atomic_int32_t *var) { 89 | return var->fetch_sub(1) - 1; 90 | } 91 | 92 | 93 | /************************************************************************************************* 94 | * 95 | * UInitOnce Definitions. 96 | * 97 | *************************************************************************************************/ 98 | 99 | struct UInitOnce { 100 | u_atomic_int32_t fState; 101 | UErrorCode fErrCode; 102 | void reset() {fState = 0;} 103 | UBool isReset() {return umtx_loadAcquire(fState) == 0;} 104 | // Note: isReset() is used by service registration code. 105 | // Thread safety of this usage needs review. 106 | }; 107 | 108 | #define U_INITONCE_INITIALIZER {ATOMIC_INT32_T_INITIALIZER(0), U_ZERO_ERROR} 109 | 110 | 111 | U_COMMON_API UBool U_EXPORT2 umtx_initImplPreInit(UInitOnce &); 112 | U_COMMON_API void U_EXPORT2 umtx_initImplPostInit(UInitOnce &); 113 | 114 | template void umtx_initOnce(UInitOnce &uio, T *obj, void (U_CALLCONV T::*fp)()) { 115 | if (umtx_loadAcquire(uio.fState) == 2) { 116 | return; 117 | } 118 | if (umtx_initImplPreInit(uio)) { 119 | (obj->*fp)(); 120 | umtx_initImplPostInit(uio); 121 | } 122 | } 123 | 124 | 125 | // umtx_initOnce variant for plain functions, or static class functions. 126 | // No context parameter. 127 | inline void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)()) { 128 | if (umtx_loadAcquire(uio.fState) == 2) { 129 | return; 130 | } 131 | if (umtx_initImplPreInit(uio)) { 132 | (*fp)(); 133 | umtx_initImplPostInit(uio); 134 | } 135 | } 136 | 137 | // umtx_initOnce variant for plain functions, or static class functions. 138 | // With ErrorCode, No context parameter. 139 | inline void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)(UErrorCode &), UErrorCode &errCode) { 140 | if (U_FAILURE(errCode)) { 141 | return; 142 | } 143 | if (umtx_loadAcquire(uio.fState) != 2 && umtx_initImplPreInit(uio)) { 144 | // We run the initialization. 145 | (*fp)(errCode); 146 | uio.fErrCode = errCode; 147 | umtx_initImplPostInit(uio); 148 | } else { 149 | // Someone else already ran the initialization. 150 | if (U_FAILURE(uio.fErrCode)) { 151 | errCode = uio.fErrCode; 152 | } 153 | } 154 | } 155 | 156 | // umtx_initOnce variant for plain functions, or static class functions, 157 | // with a context parameter. 158 | template void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)(T), T context) { 159 | if (umtx_loadAcquire(uio.fState) == 2) { 160 | return; 161 | } 162 | if (umtx_initImplPreInit(uio)) { 163 | (*fp)(context); 164 | umtx_initImplPostInit(uio); 165 | } 166 | } 167 | 168 | // umtx_initOnce variant for plain functions, or static class functions, 169 | // with a context parameter and an error code. 170 | template void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)(T, UErrorCode &), T context, UErrorCode &errCode) { 171 | if (U_FAILURE(errCode)) { 172 | return; 173 | } 174 | if (umtx_loadAcquire(uio.fState) != 2 && umtx_initImplPreInit(uio)) { 175 | // We run the initialization. 176 | (*fp)(context, errCode); 177 | uio.fErrCode = errCode; 178 | umtx_initImplPostInit(uio); 179 | } else { 180 | // Someone else already ran the initialization. 181 | if (U_FAILURE(uio.fErrCode)) { 182 | errCode = uio.fErrCode; 183 | } 184 | } 185 | } 186 | 187 | // UMutex should be constexpr-constructible, so that no initialization code 188 | // is run during startup. 189 | // This works on all C++ libraries except MS VS before VS2019. 190 | #if (defined(_CPPLIB_VER) && !defined(_MSVC_STL_VERSION)) || \ 191 | (defined(_MSVC_STL_VERSION) && _MSVC_STL_VERSION < 142) 192 | // (VS std lib older than VS2017) || (VS std lib version < VS2019) 193 | # define UMUTEX_CONSTEXPR 194 | #else 195 | # define UMUTEX_CONSTEXPR constexpr 196 | #endif 197 | 198 | /** 199 | * UMutex - ICU Mutex class. 200 | * 201 | * This is the preferred Mutex class for use within ICU implementation code. 202 | * It is a thin wrapper over C++ std::mutex, with these additions: 203 | * - Static instances are safe, not triggering static construction or destruction, 204 | * and the associated order of construction or destruction issues. 205 | * - Plumbed into u_cleanup() for destructing the underlying std::mutex, 206 | * which frees any OS level resources they may be holding. 207 | * 208 | * Limitations: 209 | * - Static or global instances only. Cannot be heap allocated. Cannot appear as a 210 | * member of another class. 211 | * - No condition variables or other advanced features. If needed, you will need to use 212 | * std::mutex and std::condition_variable directly. For an example, see unifiedcache.cpp 213 | * 214 | * Typical Usage: 215 | * static UMutex myMutex; 216 | * 217 | * { 218 | * Mutex lock(myMutex); 219 | * ... // Do stuff that is protected by myMutex; 220 | * } // myMutex is released when lock goes out of scope. 221 | */ 222 | 223 | class U_COMMON_API UMutex { 224 | public: 225 | UMUTEX_CONSTEXPR UMutex() {} 226 | ~UMutex() = default; 227 | 228 | UMutex(const UMutex &other) = delete; 229 | UMutex &operator =(const UMutex &other) = delete; 230 | void *operator new(size_t) = delete; 231 | 232 | // requirements for C++ BasicLockable, allows UMutex to work with std::lock_guard 233 | void lock() { 234 | std::mutex *m = fMutex.load(std::memory_order_acquire); 235 | if (m == nullptr) { m = getMutex(); } 236 | m->lock(); 237 | } 238 | void unlock() { fMutex.load(std::memory_order_relaxed)->unlock(); } 239 | 240 | static void cleanup(); 241 | 242 | private: 243 | alignas(std::mutex) char fStorage[sizeof(std::mutex)] {}; 244 | std::atomic fMutex { nullptr }; 245 | 246 | /** All initialized UMutexes are kept in a linked list, so that they can be found, 247 | * and the underlying std::mutex destructed, by u_cleanup(). 248 | */ 249 | UMutex *fListLink { nullptr }; 250 | static UMutex *gListHead; 251 | 252 | /** Out-of-line function to lazily initialize a UMutex on first use. 253 | * Initial fast check is inline, in lock(). The returned value may never 254 | * be nullptr. 255 | */ 256 | std::mutex *getMutex(); 257 | }; 258 | 259 | 260 | /* Lock a mutex. 261 | * @param mutex The given mutex to be locked. Pass NULL to specify 262 | * the global ICU mutex. Recursive locks are an error 263 | * and may cause a deadlock on some platforms. 264 | */ 265 | U_CAPI void U_EXPORT2 umtx_lock(UMutex* mutex); 266 | 267 | /* Unlock a mutex. 268 | * @param mutex The given mutex to be unlocked. Pass NULL to specify 269 | * the global ICU mutex. 270 | */ 271 | U_CAPI void U_EXPORT2 umtx_unlock (UMutex* mutex); 272 | 273 | 274 | U_NAMESPACE_END 275 | 276 | #endif /* UMUTEX_H */ 277 | /*eof*/ -------------------------------------------------------------------------------- /unicode/uset_imp.h: -------------------------------------------------------------------------------- 1 | // © 2016 and later: Unicode, Inc. and others. 2 | // License & terms of use: http://www.unicode.org/copyright.html 3 | /* 4 | ******************************************************************************* 5 | * 6 | * Copyright (C) 2004-2007, International Business Machines 7 | * Corporation and others. All Rights Reserved. 8 | * 9 | ******************************************************************************* 10 | * file name: uset_imp.h 11 | * encoding: UTF-8 12 | * tab size: 8 (not used) 13 | * indentation:4 14 | * 15 | * created on: 2004sep07 16 | * created by: Markus W. Scherer 17 | * 18 | * Internal USet definitions. 19 | */ 20 | 21 | #ifndef __USET_IMP_H__ 22 | #define __USET_IMP_H__ 23 | 24 | #include 25 | #include 26 | 27 | U_CDECL_BEGIN 28 | 29 | typedef void U_CALLCONV 30 | USetAdd(USet *set, UChar32 c); 31 | 32 | typedef void U_CALLCONV 33 | USetAddRange(USet *set, UChar32 start, UChar32 end); 34 | 35 | typedef void U_CALLCONV 36 | USetAddString(USet *set, const UChar *str, int32_t length); 37 | 38 | typedef void U_CALLCONV 39 | USetRemove(USet *set, UChar32 c); 40 | 41 | typedef void U_CALLCONV 42 | USetRemoveRange(USet *set, UChar32 start, UChar32 end); 43 | 44 | /** 45 | * Interface for adding items to a USet, to keep low-level code from 46 | * statically depending on the USet implementation. 47 | * Calls will look like sa->add(sa->set, c); 48 | */ 49 | struct USetAdder { 50 | USet *set; 51 | USetAdd *add; 52 | USetAddRange *addRange; 53 | USetAddString *addString; 54 | USetRemove *remove; 55 | USetRemoveRange *removeRange; 56 | }; 57 | typedef struct USetAdder USetAdder; 58 | 59 | U_CDECL_END 60 | 61 | #endif 62 | -------------------------------------------------------------------------------- /unicode/ustringtrie.h: -------------------------------------------------------------------------------- 1 | // © 2016 and later: Unicode, Inc. and others. 2 | // License & terms of use: http://www.unicode.org/copyright.html 3 | /* 4 | ******************************************************************************* 5 | * Copyright (C) 2010-2012, International Business Machines 6 | * Corporation and others. All Rights Reserved. 7 | ******************************************************************************* 8 | * file name: udicttrie.h 9 | * encoding: UTF-8 10 | * tab size: 8 (not used) 11 | * indentation:4 12 | * 13 | * created on: 2010dec17 14 | * created by: Markus W. Scherer 15 | */ 16 | 17 | #ifndef __USTRINGTRIE_H__ 18 | #define __USTRINGTRIE_H__ 19 | 20 | /** 21 | * \file 22 | * \brief C API: Helper definitions for dictionary trie APIs. 23 | */ 24 | 25 | #include 26 | 27 | 28 | /** 29 | * Return values for BytesTrie::next(), UCharsTrie::next() and similar methods. 30 | * @see USTRINGTRIE_MATCHES 31 | * @see USTRINGTRIE_HAS_VALUE 32 | * @see USTRINGTRIE_HAS_NEXT 33 | * @stable ICU 4.8 34 | */ 35 | enum UStringTrieResult { 36 | /** 37 | * The input unit(s) did not continue a matching string. 38 | * Once current()/next() return USTRINGTRIE_NO_MATCH, 39 | * all further calls to current()/next() will also return USTRINGTRIE_NO_MATCH, 40 | * until the trie is reset to its original state or to a saved state. 41 | * @stable ICU 4.8 42 | */ 43 | USTRINGTRIE_NO_MATCH, 44 | /** 45 | * The input unit(s) continued a matching string 46 | * but there is no value for the string so far. 47 | * (It is a prefix of a longer string.) 48 | * @stable ICU 4.8 49 | */ 50 | USTRINGTRIE_NO_VALUE, 51 | /** 52 | * The input unit(s) continued a matching string 53 | * and there is a value for the string so far. 54 | * This value will be returned by getValue(). 55 | * No further input byte/unit can continue a matching string. 56 | * @stable ICU 4.8 57 | */ 58 | USTRINGTRIE_FINAL_VALUE, 59 | /** 60 | * The input unit(s) continued a matching string 61 | * and there is a value for the string so far. 62 | * This value will be returned by getValue(). 63 | * Another input byte/unit can continue a matching string. 64 | * @stable ICU 4.8 65 | */ 66 | USTRINGTRIE_INTERMEDIATE_VALUE 67 | }; 68 | 69 | /** 70 | * Same as (result!=USTRINGTRIE_NO_MATCH). 71 | * @param result A result from BytesTrie::first(), UCharsTrie::next() etc. 72 | * @return true if the input bytes/units so far are part of a matching string/byte sequence. 73 | * @stable ICU 4.8 74 | */ 75 | #define USTRINGTRIE_MATCHES(result) ((result)!=USTRINGTRIE_NO_MATCH) 76 | 77 | /** 78 | * Equivalent to (result==USTRINGTRIE_INTERMEDIATE_VALUE || result==USTRINGTRIE_FINAL_VALUE) but 79 | * this macro evaluates result exactly once. 80 | * @param result A result from BytesTrie::first(), UCharsTrie::next() etc. 81 | * @return true if there is a value for the input bytes/units so far. 82 | * @see BytesTrie::getValue 83 | * @see UCharsTrie::getValue 84 | * @stable ICU 4.8 85 | */ 86 | #define USTRINGTRIE_HAS_VALUE(result) ((result)>=USTRINGTRIE_FINAL_VALUE) 87 | 88 | /** 89 | * Equivalent to (result==USTRINGTRIE_NO_VALUE || result==USTRINGTRIE_INTERMEDIATE_VALUE) but 90 | * this macro evaluates result exactly once. 91 | * @param result A result from BytesTrie::first(), UCharsTrie::next() etc. 92 | * @return true if another input byte/unit can continue a matching string. 93 | * @stable ICU 4.8 94 | */ 95 | #define USTRINGTRIE_HAS_NEXT(result) ((result)&1) 96 | 97 | #endif /* __USTRINGTRIE_H__ */ -------------------------------------------------------------------------------- /uset.h: -------------------------------------------------------------------------------- 1 | #ifndef _MY_USET_H_ 2 | #define _MY_USET_H_ 3 | 4 | #include 5 | 6 | typedef struct USet USet; 7 | 8 | extern "C" { 9 | USet *uset_openEmpty(void); 10 | void uset_close(USet *); 11 | void uset_freeze(USet *); 12 | void uset_add(USet *, UChar32); 13 | UBool uset_contains(USet *, UChar32); 14 | } 15 | 16 | #endif --------------------------------------------------------------------------------