├── .gitignore
├── CharacterSet.h
├── CoreFoundationHack.h
├── CoreFoundationHack.xm
├── CoreTextHack.xm
├── EmojiAttributes.plist
├── EmojiSizeFix.xm
├── ICUBlocks.h
├── ICUHack.xm
├── LICENSE
├── Makefile
├── PSEmojiData.h
├── README.md
├── TextInputHack.xm
├── WebCoreHack.xm
├── WebCoreSupport
    ├── CharactersProperties.h
    ├── CoreGraphicsSPI.h
    ├── RefCounted.h
    ├── RefPtr.h
    ├── RenderText.h
    ├── StringImpl.h
    └── UAX.h
├── control
├── copyResources.sh
├── emojiprops.h
├── layout
    ├── DEBIAN
    │   ├── postinst
    │   └── postrm
    └── Library
    │   └── Application Support
    │       └── EmojiAttributes
    │           ├── Info.plist
    │           ├── emoji.bitmap
    │           └── uemoji.icu
├── unicode
    ├── cmemory.h
    ├── putilimp.h
    ├── ucln.h
    ├── ucln_cmn.h
    ├── ucmndata.h
    ├── ucptrie.h
    ├── ucptrie_impl.h
    ├── udata.h
    ├── udatamem.h
    ├── umapfile.h
    ├── umutex.h
    ├── uset_imp.h
    └── ustringtrie.h
└── uset.h


/.gitignore:
--------------------------------------------------------------------------------
1 | .theos
2 | .DS_Store
3 | packages


--------------------------------------------------------------------------------
/CoreFoundationHack.h:
--------------------------------------------------------------------------------
  1 | #import <CoreFoundation/CoreFoundation.h>
  2 | 
  3 | #if defined(__BIG_ENDIAN__)
  4 | #define __CF_BIG_ENDIAN__ 1
  5 | #define __CF_LITTLE_ENDIAN__ 0
  6 | #endif
  7 | 
  8 | #if defined(__LITTLE_ENDIAN__)
  9 | #define __CF_LITTLE_ENDIAN__ 1
 10 | #define __CF_BIG_ENDIAN__ 0
 11 | #endif
 12 | 
 13 | #define CF_INFO_BITS (!!(__CF_BIG_ENDIAN__) * 3)
 14 | #define CF_IS_OBJC(typeID, obj) (1)
 15 | 
 16 | #define CF_IS_SWIFT(type, obj) (0)
 17 | 
 18 | #define WHITE_SPACE_CHARACTER (0x0020)
 19 | #define ZERO_WIDTH_JOINER (0x200D)
 20 | 
 21 | enum {
 22 |     _kCFRuntimeNotATypeID = 0
 23 | };
 24 | 
 25 | static CFTypeID __kCFStringTypeID = _kCFRuntimeNotATypeID;
 26 | 
 27 | enum {
 28 |     __kCFFreeContentsWhenDoneMask = 0x020,
 29 |     __kCFFreeContentsWhenDone = 0x020,
 30 |     __kCFContentsMask = 0x060,
 31 |     __kCFHasInlineContents = 0x000,
 32 |     __kCFNotInlineContentsNoFree = 0x040,
 33 |     __kCFNotInlineContentsDefaultFree = 0x020,
 34 |     __kCFNotInlineContentsCustomFree = 0x060,
 35 |     __kCFHasContentsAllocatorMask = 0x060,
 36 |     __kCFHasContentsAllocator = 0x060,
 37 |     __kCFHasContentsDeallocatorMask = 0x060,
 38 |     __kCFHasContentsDeallocator = 0x060,
 39 |     __kCFIsMutableMask = 0x01,
 40 |     __kCFIsMutable = 0x01,
 41 |     __kCFIsUnicodeMask = 0x10,
 42 |     __kCFIsUnicode = 0x10,
 43 |     __kCFHasNullByteMask = 0x08,
 44 |     __kCFHasNullByte = 0x08,
 45 |     __kCFHasLengthByteMask = 0x04,
 46 |     __kCFHasLengthByte = 0x04,
 47 | };
 48 | 
 49 | struct __notInlineMutable {
 50 |     void *buffer;
 51 |     CFIndex length;
 52 |     CFIndex capacity;
 53 |     NSUInteger hasGap : 1;
 54 |     NSUInteger isFixedCapacity : 1;
 55 |     NSUInteger isExternalMutable : 1;
 56 |     NSUInteger capacityProvidedExternally : 1;
 57 | #if __LP64__
 58 |     unsigned long desiredCapacity : 60;
 59 | #else
 60 |     unsigned long desiredCapacity : 28;
 61 | #endif
 62 |     CFAllocatorRef contentsAllocator;
 63 | };
 64 | 
 65 | typedef struct __CFRuntimeBase {
 66 |     uintptr_t _cfisa;
 67 |     uint8_t _cfinfo[4];
 68 | #if __LP64__
 69 |     uint32_t _rc;
 70 | #endif
 71 | } CFRuntimeBase;
 72 | 
 73 | struct __CFString {
 74 |     CFRuntimeBase base;
 75 |     union {
 76 |         struct __inline1 {
 77 |             CFIndex length;
 78 |         } inline1;
 79 |         struct __notInlineImmutable1 {
 80 |             void *buffer;
 81 |             CFIndex length;
 82 |             CFAllocatorRef contentsDeallocator;
 83 |         } notInlineImmutable1;
 84 |         struct __notInlineImmutable2 {
 85 |             void *buffer;
 86 |             CFAllocatorRef contentsDeallocator;
 87 |         } notInlineImmutable2;
 88 |         struct __notInlineMutable notInlineMutable;
 89 |     } variants;
 90 | };
 91 | 
 92 | enum {
 93 |     kCFUniCharControlCharacterSet = 1,
 94 |     kCFUniCharWhitespaceCharacterSet,
 95 |     kCFUniCharWhitespaceAndNewlineCharacterSet,
 96 |     kCFUniCharDecimalDigitCharacterSet,
 97 |     kCFUniCharLetterCharacterSet,
 98 |     kCFUniCharLowercaseLetterCharacterSet,
 99 |     kCFUniCharUppercaseLetterCharacterSet,
100 |     kCFUniCharNonBaseCharacterSet,
101 |     kCFUniCharCanonicalDecomposableCharacterSet,
102 |     kCFUniCharDecomposableCharacterSet = kCFUniCharCanonicalDecomposableCharacterSet,
103 |     kCFUniCharAlphaNumericCharacterSet,
104 |     kCFUniCharPunctuationCharacterSet,
105 |     kCFUniCharIllegalCharacterSet,
106 |     kCFUniCharTitlecaseLetterCharacterSet,
107 |     kCFUniCharSymbolAndOperatorCharacterSet,
108 |     kCFUniCharNewlineCharacterSet,
109 | 
110 |     kCFUniCharCompatibilityDecomposableCharacterSet = 100,
111 |     kCFUniCharHFSPlusDecomposableCharacterSet,
112 |     kCFUniCharStrongRightToLeftCharacterSet,
113 |     kCFUniCharHasNonSelfLowercaseCharacterSet,
114 |     kCFUniCharHasNonSelfUppercaseCharacterSet,
115 |     kCFUniCharHasNonSelfTitlecaseCharacterSet,
116 |     kCFUniCharHasNonSelfCaseFoldingCharacterSet,
117 |     kCFUniCharHasNonSelfMirrorMappingCharacterSet,
118 |     kCFUniCharControlAndFormatterCharacterSet,
119 |     kCFUniCharCaseIgnorableCharacterSet,
120 |     kCFUniCharGraphemeExtendCharacterSet
121 | };
122 | 
123 | typedef enum {
124 |     kCFStringGraphemeCluster = 1,
125 |     kCFStringComposedCharacterCluster = 2,
126 |     kCFStringCursorMovementCluster = 3,
127 |     kCFStringBackwardDeletionCluster = 4
128 | } CFStringCharacterClusterType;
129 | 
130 | enum {
131 |     kCFUniCharCombiningProperty = 0,
132 |     kCFUniCharBidiProperty
133 | };
134 | 
135 | enum {
136 |     kCFStringHangulStateL,
137 |     kCFStringHangulStateV,
138 |     kCFStringHangulStateT,
139 |     kCFStringHangulStateLV,
140 |     kCFStringHangulStateLVT,
141 |     kCFStringHangulStateBreak
142 | };
143 | 
144 | typedef struct {
145 |     CFCharacterSetRef cset;
146 |     uint32_t flags;
147 |     uint32_t rangeStart;
148 |     uint32_t rangeLimit;
149 |     const uint8_t *bitmap;
150 | } CFCharacterSetInlineBuffer;
151 | 
152 | CF_EXTERN_C_BEGIN
153 | CF_EXPORT void CFCharacterSetInitInlineBuffer(CFCharacterSetRef cset, CFCharacterSetInlineBuffer *buffer);
154 | CF_EXPORT const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset, uint32_t plane);
155 | CF_EXPORT const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType, uint32_t plane);
156 | CF_EXTERN_C_END
157 | 
158 | enum {
159 |     kCFCharacterSetIsCompactBitmap = (1UL << 0),
160 |     kCFCharacterSetNoBitmapAvailable = (1UL << 1),
161 |     kCFCharacterSetIsInverted = (1UL << 2)
162 | };
163 | 
164 | #define kCFUniCharBitShiftForByte (3)
165 | #define kCFUniCharBitShiftForMask (7)
166 | 
167 | #define MAX_CASE_MAPPING_BUF (8)
168 | #define ZERO_WIDTH_JOINER (0x200D)
169 | #define COMBINING_GRAPHEME_JOINER (0x034F)
170 | 
171 | #define HANGUL_CHOSEONG_START (0x1100)
172 | #define HANGUL_CHOSEONG_END (0x115F)
173 | #define HANGUL_JUNGSEONG_START (0x1160)
174 | #define HANGUL_JUNGSEONG_END (0x11A2)
175 | #define HANGUL_JONGSEONG_START (0x11A8)
176 | #define HANGUL_JONGSEONG_END (0x11F9)
177 | 
178 | #define HANGUL_SYLLABLE_START (0xAC00)
179 | #define HANGUL_SYLLABLE_END (0xD7AF)
180 | 
181 | #define HANGUL_JONGSEONG_COUNT (28)
182 | 
183 | #define MAX_TRANSCODING_LENGTH 4
184 | 
185 | static uint8_t __CFTranscodingHintLength[] = {
186 |     2, 3, 4, 4, 4, 4, 4, 2, 2, 2, 2, 4, 0, 0, 0, 0
187 | };
188 | 


--------------------------------------------------------------------------------
/CoreTextHack.xm:
--------------------------------------------------------------------------------
  1 | #define COMPRESSED_SET
  2 | #import <PSHeader/PS.h>
  3 | #import "CharacterSet.h"
  4 | #import "PSEmojiData.h"
  5 | #import "uset.h"
  6 | #import <substrate.h>
  7 | #import <HBLog.h>
  8 | #include <unicode/utf16.h>
  9 | 
 10 | #define CreateMutableDict(dict) CFDictionaryCreateMutableCopy(kCFAllocatorDefault, CFDictionaryGetCount(dict), dict)
 11 | 
 12 | extern "C" CFCharacterSetRef _CFCreateCharacterSetFromUSet(USet *);
 13 | 
 14 | %config(generator=MobileSubstrate)
 15 | 
 16 | %group CharacterSet
 17 | 
 18 | CFCharacterSetRef (*CreateCharacterSetForFont)(CFStringRef const) = NULL;
 19 | CFCharacterSetRef (*CreateCharacterSetWithCompressedBitmapRepresentation)(const CFDataRef characterSet) = NULL;
 20 | CFDataRef (*XTCopyUncompressedBitmapRepresentation)(const UInt8 *, CFIndex);
 21 | %hookf(CFCharacterSetRef, CreateCharacterSetForFont, CFStringRef const fontName) {
 22 |     if (CFStringEqual(fontName, CFSTR("AppleColorEmoji")) || CFStringEqual(fontName, CFSTR(".AppleColorEmojiUI"))) {
 23 |         if (IS_IOS_OR_NEWER(iOS_11_0)) {
 24 |             CFDataRef compressedData = CFDataCreateWithBytesNoCopy(kCFAllocatorDefault, compressedSet, compressedSetLength, kCFAllocatorNull);
 25 |             if (CreateCharacterSetWithCompressedBitmapRepresentation) {
 26 |                 CFCharacterSetRef uncompressedSet = CreateCharacterSetWithCompressedBitmapRepresentation(compressedData);
 27 |                 CFRelease(compressedData);
 28 |                 return uncompressedSet;
 29 |             }
 30 |             CFDataRef uncompressedData = XTCopyUncompressedBitmapRepresentation(CFDataGetBytePtr(compressedData), CFDataGetLength(compressedData));
 31 |             CFRelease(compressedData);
 32 |             if (uncompressedData) {
 33 |                 CFCharacterSetRef ourSet = CFCharacterSetCreateWithBitmapRepresentation(kCFAllocatorDefault, uncompressedData);
 34 |                 CFRelease(uncompressedData);
 35 |                 return ourSet;
 36 |             }
 37 |         }
 38 |         CFDataRef uncompressedData = CFDataCreateWithBytesNoCopy(kCFAllocatorDefault, uncompressedSet, uncompressedSetLength, kCFAllocatorNull);
 39 |         CFCharacterSetRef ourSet = CFCharacterSetCreateWithBitmapRepresentation(kCFAllocatorDefault, uncompressedData);
 40 |         return ourSet;
 41 |     }
 42 |     return %orig(fontName);
 43 | }
 44 | 
 45 | %end
 46 | 
 47 | static CFMutableDictionaryRef ctFontInfo = NULL;
 48 | 
 49 | static CFMutableDictionaryRef getCTFontInfo(CFDictionaryRef dict) {
 50 |     if (ctFontInfo == NULL) {
 51 |         ctFontInfo = CFDictionaryCreateMutableCopy(kCFAllocatorDefault, CFDictionaryGetCount(dict), dict);
 52 |         CFDictionaryRef x = (CFDictionaryRef)CFDictionaryGetValue(ctFontInfo, CFSTR("Attrs"));
 53 |         CFMutableDictionaryRef attrs = CreateMutableDict(x);
 54 |         x = (CFDictionaryRef)CFDictionaryGetValue(attrs, CFSTR("AppleColorEmoji"));
 55 |         CFMutableDictionaryRef ace = CreateMutableDict(x);
 56 |         x = (CFDictionaryRef)CFDictionaryGetValue(ace, CFSTR("NSCTFontTraitsAttribute"));
 57 |         CFMutableDictionaryRef fontTraits = CreateMutableDict(x);
 58 |         SInt32 formatValue = 3;
 59 |         CFNumberRef formatRef = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &formatValue);
 60 |         CFDictionarySetValue(ace, CFSTR("NSCTFontFormatAttribute"), formatRef);
 61 |         CFRelease(formatRef);
 62 |         CFDictionarySetValue(ace, CFSTR("NSCTFontFeaturesAttribute"), (__bridge CFArrayRef)@[
 63 |             @{
 64 |                 @"CTFeatureTypeIdentifier": @(701),
 65 |                 @"CTFeatureTypeNameID": @(256),
 66 |                 @"CTFeatureTypeSelectors": @[
 67 |                     @{
 68 |                         @"CTFeatureSelectorIdentifier": @(100),
 69 |                         @"CTFeatureSelectorNameID": @(257)
 70 |                     },
 71 |                     @{
 72 |                         @"CTFeatureSelectorIdentifier": @(200),
 73 |                         @"CTFeatureSelectorNameID": @(258)
 74 |                     }
 75 |                 ]
 76 |             }
 77 |         ]);
 78 |         CFDictionarySetValue(attrs, CFSTR("AppleColorEmoji"), ace);
 79 |         long long symbolicTraitValue = 3221234688;
 80 |         CFNumberRef symbolicTraitRef = CFNumberCreate(kCFAllocatorDefault, kCFNumberLongLongType, &symbolicTraitValue);
 81 |         CFDictionarySetValue(fontTraits, CFSTR("NSCTFontSymbolicTrait"), symbolicTraitRef);
 82 |         CFRelease(symbolicTraitRef);
 83 |         CFDictionarySetValue(ctFontInfo, CFSTR("Attrs"), attrs);
 84 |     }
 85 |     return ctFontInfo;
 86 | }
 87 | 
 88 | %group FontAttributes1
 89 | 
 90 | CFDictionaryRef (*CTFontGetPlistFromGSFontCacheB)(CFStringRef, bool);
 91 | %hookf(CFDictionaryRef, CTFontGetPlistFromGSFontCacheB, CFStringRef plist, bool directAccess) {
 92 |     CFDictionaryRef dict = %orig(plist, directAccess);
 93 |     if (CFStringEqual(plist, CFSTR("CTFontInfo.plist")))
 94 |         return getCTFontInfo(dict);
 95 |     return dict;
 96 | }
 97 | 
 98 | %end
 99 | 
100 | %group FontAttributes2
101 | 
102 | CFDictionaryRef (*CTFontGetPlistFromGSFontCache)(CFStringRef);
103 | %hookf(CFDictionaryRef, CTFontGetPlistFromGSFontCache, CFStringRef plist) {
104 |     CFDictionaryRef dict = %orig(plist);
105 |     if (CFStringEqual(plist, CFSTR("CTFontInfo.plist")))
106 |         return getCTFontInfo(dict);
107 |     return dict;
108 | }
109 | 
110 | %end
111 | 
112 | #if __LP64__
113 | 
114 | static USet *unicodeSet = NULL;
115 | static CFCharacterSetRef characterSet = NULL;
116 | 
117 | %group EmojiPresentation
118 | 
119 | void (*IsDefaultEmojiPresentation)(void *) = NULL;
120 | CFMutableCharacterSetRef *DefaultEmojiPresentationSet;
121 | 
122 | %hookf(void, IsDefaultEmojiPresentation, void *arg0) {
123 |     *DefaultEmojiPresentationSet = (CFMutableCharacterSetRef)characterSet;
124 | }
125 | 
126 | %end
127 | 
128 | %group EmojiPresentationUSet
129 | 
130 | bool (*IsDefaultEmojiPresentationUSet)(UChar32) = NULL;
131 | %hookf(bool, IsDefaultEmojiPresentationUSet, UChar32 c) {
132 |     return uset_contains(unicodeSet, c);
133 | }
134 | 
135 | %end
136 | 
137 | #endif
138 | 
139 | %ctor {
140 |     MSImageRef ct = MSGetImageByName(realPath2(@"/System/Library/Frameworks/CoreText.framework/CoreText"));
141 |     CreateCharacterSetForFont = (CFCharacterSetRef (*)(CFStringRef const))_PSFindSymbolReadable(ct, "__Z25CreateCharacterSetForFontPK10__CFString");
142 |     HBLogDebug(@"[CoreTextHack: CharacterSet] CreateCharacterSetForFont found: %d", CreateCharacterSetForFont != NULL);
143 |     XTCopyUncompressedBitmapRepresentation = (CFDataRef (*)(const UInt8 *, CFIndex))_PSFindSymbolCallable(ct, "__Z38XTCopyUncompressedBitmapRepresentationPKhm");
144 |     HBLogDebug(@"[CoreTextHack: CharacterSet] XTCopyUncompressedBitmapRepresentation found: %d", XTCopyUncompressedBitmapRepresentation != NULL);
145 |     CreateCharacterSetWithCompressedBitmapRepresentation = (CFCharacterSetRef (*)(const CFDataRef))_PSFindSymbolCallable(ct, "__Z52CreateCharacterSetWithCompressedBitmapRepresentationPK8__CFData");
146 |     HBLogDebug(@"[CoreTextHack: CharacterSet] CreateCharacterSetWithCompressedBitmapRepresentation found: %d", CreateCharacterSetWithCompressedBitmapRepresentation != NULL);
147 |     %init(CharacterSet);
148 |     if (!IS_IOS_OR_NEWER(iOS_13_2)) {
149 |         CTFontGetPlistFromGSFontCacheB = (CFDictionaryRef (*)(CFStringRef, bool))_PSFindSymbolReadable(ct, "__Z29CTFontGetPlistFromGSFontCachePK10__CFStringb");
150 |         HBLogDebug(@"[CoreTextHack: FontAttributes] CTFontGetPlistFromGSFontCacheB found: %d", CTFontGetPlistFromGSFontCacheB != NULL);
151 |         if (CTFontGetPlistFromGSFontCacheB) {
152 |             %init(FontAttributes1);
153 |         }
154 |         CTFontGetPlistFromGSFontCache = (CFDictionaryRef (*)(CFStringRef))_PSFindSymbolReadable(ct, "__Z29CTFontGetPlistFromGSFontCachePK10__CFString");
155 |         HBLogDebug(@"[CoreTextHack: FontAttributes] CTFontGetPlistFromGSFontCache found: %d", CTFontGetPlistFromGSFontCache != NULL);
156 |         if (CTFontGetPlistFromGSFontCache) {
157 |             %init(FontAttributes2);
158 |         }
159 |     }
160 | #if __LP64__
161 |     unicodeSet = uset_openEmpty();
162 |     for (int i = 0; i < presentationCount; ++i)
163 |         uset_add(unicodeSet, presentation[i]);
164 |     uset_freeze(unicodeSet);
165 |     if (IS_IOS_BETWEEN_EEX(iOS_11_0, iOS_12_1)) {
166 |         characterSet = _CFCreateCharacterSetFromUSet(unicodeSet);
167 |         CFRetain(characterSet);
168 |         IsDefaultEmojiPresentation = (void (*)(void *))_PSFindSymbolReadable(ct, "__ZZL26IsDefaultEmojiPresentationjEN4$_138__invokeEPv");
169 |         if (IsDefaultEmojiPresentation == NULL)
170 |             IsDefaultEmojiPresentation = (void (*)(void *))_PSFindSymbolReadable(ct, "__ZZL26IsDefaultEmojiPresentationjEN4$_128__invokeEPv");
171 |         DefaultEmojiPresentationSet = (CFMutableCharacterSetRef (*))_PSFindSymbolReadable(ct, "__ZZL26IsDefaultEmojiPresentationjE28sDefaultEmojiPresentationSet");
172 |         HBLogDebug(@"[CoreTextHack: EmojiPresentation] IsDefaultEmojiPresentation found: %d", IsDefaultEmojiPresentation != NULL);
173 |         HBLogDebug(@"[CoreTextHack: EmojiPresentation] DefaultEmojiPresentationSet found: %d", DefaultEmojiPresentationSet != NULL);
174 |         %init(EmojiPresentation);
175 |     } else if (IS_IOS_BETWEEN_EEX(iOS_12_1, iOS_15_4)) {
176 |         IsDefaultEmojiPresentationUSet = (bool (*)(UChar32))_PSFindSymbolReadable(ct, "__Z26IsDefaultEmojiPresentationj");
177 |         HBLogDebug(@"[CoreTextHack: EmojiPresentation] IsDefaultEmojiPresentation (Uset) found: %d", IsDefaultEmojiPresentationUSet != NULL);
178 |         if (IsDefaultEmojiPresentationUSet) {
179 |             %init(EmojiPresentationUSet);
180 |         }
181 |     }
182 | #endif
183 | }
184 | 
185 | #if __LP64__
186 | 
187 | %dtor {
188 |     if (characterSet)
189 |         CFRelease(characterSet);
190 |     if (unicodeSet)
191 |         uset_close(unicodeSet);
192 | }
193 | 
194 | #endif


--------------------------------------------------------------------------------
/EmojiAttributes.plist:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 3 | <plist version="1.0">
 4 | <dict>
 5 | 	<key>Filter</key>
 6 | 	<dict>
 7 | 		<key>Bundles</key>
 8 | 		<array>
 9 | 			<string>com.apple.UIKit</string>
10 | 			<string>com.apple.WebKit.WebContent</string>
11 | 		</array>
12 | 	</dict>
13 | </dict>
14 | </plist>


--------------------------------------------------------------------------------
/EmojiSizeFix.xm:
--------------------------------------------------------------------------------
  1 | #if !__arm64e__
  2 | 
  3 | #import <PSHeader/PS.h>
  4 | #import <CoreText/CoreText.h>
  5 | #import <HBLog.h>
  6 | #import <substrate.h>
  7 | #import "WebCoreSupport/CoreGraphicsSPI.h"
  8 | 
  9 | %config(generator=MobileSubstrate)
 10 | 
 11 | short iOSVer = 0;
 12 | 
 13 | CGFontRef cgFont = NULL;
 14 | 
 15 | BOOL (*CTFontIsAppleColorEmoji)(CTFontRef);
 16 | 
 17 | bool *findIsEmoji(void *arg0) {
 18 | #if __LP64__
 19 |     if (iOSVer >= 90)
 20 |         return (bool *)((uint8_t *)arg0 + 0x2B);
 21 |     if (iOSVer >= 70)
 22 |         return (bool *)((uint8_t *)arg0 + 0x8);
 23 |     return (bool *)((uint8_t *)arg0 + 0xC);
 24 | #else
 25 |     if (iOSVer >= 90)
 26 |         return (bool *)((uint8_t *)arg0 + 0x1F);
 27 |     if (iOSVer >= 61)
 28 |         return (bool *)((uint8_t *)arg0 + 0x8);
 29 |     return (bool *)((uint8_t *)arg0 + 0xC);
 30 | #endif
 31 | }
 32 | 
 33 | CTFontRef (*FontPlatformData_ctFont)(void *);
 34 | %hookf(CTFontRef, FontPlatformData_ctFont, void *arg0) {
 35 |     bool *isEmoji = findIsEmoji(arg0);
 36 |     bool forEmoji = *isEmoji;
 37 |     *isEmoji = NO;
 38 |     CTFontRef font = %orig;
 39 |     *isEmoji = forEmoji;
 40 |     return font;
 41 | }
 42 | 
 43 | %group iOS60
 44 | 
 45 | void (*platformInit)(void *);
 46 | %hookf(void, platformInit, void *arg0) {
 47 |     bool *isEmoji = (bool *)((uint8_t *)arg0 + 0x34);
 48 |     bool forEmoji = *isEmoji;
 49 |     *isEmoji = NO;
 50 |     %orig;
 51 |     *isEmoji = forEmoji;
 52 | }
 53 | 
 54 | %end
 55 | 
 56 | %group iOS6
 57 | 
 58 | int (*CTFontGetWebKitEmojiRenderMode)(void);
 59 | %hookf(int, CTFontGetWebKitEmojiRenderMode) {
 60 |     return 0;
 61 | }
 62 | 
 63 | CGFontRenderingStyle style = kCGFontRenderingStyleAntialiasing | kCGFontRenderingStyleSubpixelPositioning | kCGFontRenderingStyleSubpixelQuantization | kCGFontAntialiasingStyleUnfiltered;
 64 | 
 65 | float (*platformWidthForGlyph)(void *, CGGlyph);
 66 | %hookf(float, platformWidthForGlyph, void *arg0, CGGlyph code) {
 67 |     if (code == 0xFE0F)
 68 |         return 0.0;
 69 |     CTFontRef font = iOSVer >= 70 ? FontPlatformData_ctFont((void *)((uint8_t *)arg0 + 0x30)) : FontPlatformData_ctFont((void *)((uint8_t *)arg0 + 0x28));
 70 |     BOOL isEmojiFont = CTFontIsAppleColorEmoji && CTFontIsAppleColorEmoji(font);
 71 |     if (!isEmojiFont) {
 72 |         CFStringRef fontName = CTFontCopyPostScriptName(font);
 73 |         isEmojiFont = CFStringEqual(fontName, CFSTR("AppleColorEmoji"));
 74 |         CFRelease(fontName);
 75 |     }
 76 |     if (isEmojiFont) {
 77 |         CGSize advance = CGSizeMake(0, 0);
 78 |         CTFontGetAdvancesForGlyphs(font, kCTFontOrientationHorizontal, &code, &advance, 1);
 79 |         return advance.width + 4.0;
 80 |     }
 81 |     return %orig;
 82 | }
 83 | 
 84 | %end
 85 | 
 86 | %ctor {
 87 |     if (IS_IOS_BETWEEN_EEX(iOS_6_0, iOS_10_0)) {
 88 |         if (IS_IOS_OR_NEWER(iOS_9_0))
 89 |             iOSVer = 90;
 90 |         else if (IS_IOS_OR_NEWER(iOS_7_0))
 91 |             iOSVer = 70;
 92 |         else if (NSFoundationVersionNumber >= NSFoundationVersionNumber_iOS_6_1)
 93 |             iOSVer = 61;
 94 |         else
 95 |             iOSVer = 60;
 96 |         MSImageRef wcref = MSGetImageByName(realPath2(@"/System/Library/PrivateFrameworks/WebCore.framework/WebCore"));
 97 | #if !__LP64__
 98 |         MSImageRef ctref = MSGetImageByName(realPath2(@"/System/Library/Frameworks/CoreText.framework/CoreText"));
 99 |         CTFontIsAppleColorEmoji = (BOOL (*)(CTFontRef))MSFindSymbol(ctref, "_CTFontIsAppleColorEmoji");
100 |         CTFontGetWebKitEmojiRenderMode = (int (*)(void))MSFindSymbol(ctref, "_CTFontGetWebKitEmojiRenderMode");
101 |         platformWidthForGlyph = (float (*)(void *, CGGlyph))MSFindSymbol(wcref, "__ZNK7WebCore4Font21platformWidthForGlyphEt");
102 |         if (platformWidthForGlyph == NULL)
103 |             platformWidthForGlyph = (float (*)(void *, CGGlyph))MSFindSymbol(wcref, "__ZNK7WebCore14SimpleFontData21platformWidthForGlyphEt");
104 |         platformInit = (void (*)(void *))MSFindSymbol(wcref, "__ZN7WebCore14SimpleFontData12platformInitEv");
105 |         HBLogDebug(@"[EmojiSizeFix] Found CTFontGetWebKitEmojiRenderMode: %d", CTFontGetWebKitEmojiRenderMode != NULL);
106 |         HBLogDebug(@"[EmojiSizeFix] Found platformWidthForGlyph: %d", platformWidthForGlyph != NULL);
107 |         HBLogDebug(@"[EmojiSizeFix] Found platformInit: %d", platformInit != NULL);
108 |         if (iOSVer < 70) {
109 |             %init(iOS6);
110 |             if (iOSVer == 60) {
111 |                 %init(iOS60);
112 |             }
113 |         }
114 | #endif
115 |         FontPlatformData_ctFont = (CTFontRef (*)(void *))MSFindSymbol(wcref, "__ZNK7WebCore16FontPlatformData6ctFontEv");
116 |         HBLogDebug(@"[EmojiSizeFix] Found FontPlatformData_ctFont: %d", FontPlatformData_ctFont != NULL);
117 |         %init;
118 |     }
119 | }
120 | 
121 | #endif
122 | 
123 | #if !__LP64__
124 | 
125 | %dtor {
126 |     if (cgFont)
127 |         CFRelease(cgFont);
128 | }
129 | 
130 | #endif


--------------------------------------------------------------------------------
/ICUHack.xm:
--------------------------------------------------------------------------------
  1 | #import "ICUBlocks.h"
  2 | #import "emojiprops.h"
  3 | #include "unicode/ucptrie_impl.h"
  4 | #include "unicode/ucmndata.h"
  5 | #include "unicode/udatamem.h"
  6 | #include "unicode/cmemory.h"
  7 | #import <PSHeader/PS.h>
  8 | #import <theos/IOSMacros.h>
  9 | #import <libundirect/libundirect.h>
 10 | #import <HBLog.h>
 11 | 
 12 | #include <sys/mman.h>
 13 | #include <sys/stat.h>
 14 | 
 15 | #define UEMOJI_PATH PS_ROOT_PATH("/Library/Application Support/EmojiAttributes/uemoji.icu")
 16 | 
 17 | #define uprv_memset(buffer, mark, size) U_STANDARD_CPP_NAMESPACE memset(buffer, mark, size)
 18 | U_CAPI void U_EXPORT2 uprv_free(void *mem);
 19 | U_CAPI void * U_EXPORT2 uprv_malloc(size_t s) U_MALLOC_ATTR U_ALLOC_SIZE_ATTR(1);
 20 | 
 21 | void (*ucptrie_close)(UCPTrie *trie);
 22 | int32_t (*ucptrie_internalSmallIndex)(const UCPTrie *trie, UChar32 c);
 23 | UCPTrie *(*ucptrie_openFromBinary)(UCPTrieType type, UCPTrieValueWidth valueWidth, const void *data, int32_t length, int32_t *pActualLength, UErrorCode *pErrorCode);
 24 | 
 25 | static UCPTrie *legacy_ucptrie_openFromBinary(UCPTrieType type, UCPTrieValueWidth valueWidth, const void *data, int32_t length, int32_t *pActualLength, UErrorCode *pErrorCode) {
 26 |     if (U_FAILURE(*pErrorCode)) {
 27 |         return nullptr;
 28 |     }
 29 | 
 30 |     if (length <= 0 || (U_POINTER_MASK_LSB(data, 3) != 0) ||
 31 |             type < UCPTRIE_TYPE_ANY || UCPTRIE_TYPE_SMALL < type ||
 32 |             valueWidth < UCPTRIE_VALUE_BITS_ANY || UCPTRIE_VALUE_BITS_8 < valueWidth) {
 33 |         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
 34 |         return nullptr;
 35 |     }
 36 | 
 37 |     if (length < (int32_t)sizeof(UCPTrieHeader)) {
 38 |         *pErrorCode = U_INVALID_FORMAT_ERROR;
 39 |         return nullptr;
 40 |     }
 41 | 
 42 |     const UCPTrieHeader *header = (const UCPTrieHeader *)data;
 43 |     if (header->signature != UCPTRIE_SIG) {
 44 |         *pErrorCode = U_INVALID_FORMAT_ERROR;
 45 |         return nullptr;
 46 |     }
 47 | 
 48 |     int32_t options = header->options;
 49 |     int32_t typeInt = (options >> 6) & 3;
 50 |     int32_t valueWidthInt = options & UCPTRIE_OPTIONS_VALUE_BITS_MASK;
 51 |     if (typeInt > UCPTRIE_TYPE_SMALL || valueWidthInt > UCPTRIE_VALUE_BITS_8 ||
 52 |             (options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0) {
 53 |         *pErrorCode = U_INVALID_FORMAT_ERROR;
 54 |         return nullptr;
 55 |     }
 56 |     UCPTrieType actualType = (UCPTrieType)typeInt;
 57 |     UCPTrieValueWidth actualValueWidth = (UCPTrieValueWidth)valueWidthInt;
 58 |     if (type < 0) {
 59 |         type = actualType;
 60 |     }
 61 |     if (valueWidth < 0) {
 62 |         valueWidth = actualValueWidth;
 63 |     }
 64 |     if (type != actualType || valueWidth != actualValueWidth) {
 65 |         *pErrorCode = U_INVALID_FORMAT_ERROR;
 66 |         return nullptr;
 67 |     }
 68 | 
 69 |     UCPTrie tempTrie;
 70 |     uprv_memset(&tempTrie, 0, sizeof(tempTrie));
 71 |     tempTrie.indexLength = header->indexLength;
 72 |     tempTrie.dataLength =
 73 |         ((options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | header->dataLength;
 74 |     tempTrie.index3NullOffset = header->index3NullOffset;
 75 |     tempTrie.dataNullOffset =
 76 |         ((options & UCPTRIE_OPTIONS_DATA_NULL_OFFSET_MASK) << 8) | header->dataNullOffset;
 77 | 
 78 |     tempTrie.highStart = header->shiftedHighStart << UCPTRIE_SHIFT_2;
 79 |     tempTrie.shifted12HighStart = (tempTrie.highStart + 0xfff) >> 12;
 80 |     tempTrie.type = type;
 81 |     tempTrie.valueWidth = valueWidth;
 82 | 
 83 |     int32_t actualLength = (int32_t)sizeof(UCPTrieHeader) + tempTrie.indexLength * 2;
 84 |     if (valueWidth == UCPTRIE_VALUE_BITS_16) {
 85 |         actualLength += tempTrie.dataLength * 2;
 86 |     } else if (valueWidth == UCPTRIE_VALUE_BITS_32) {
 87 |         actualLength += tempTrie.dataLength * 4;
 88 |     } else {
 89 |         actualLength += tempTrie.dataLength;
 90 |     }
 91 |     if (length < actualLength) {
 92 |         *pErrorCode = U_INVALID_FORMAT_ERROR;
 93 |         return nullptr;
 94 |     }
 95 | 
 96 |     UCPTrie *trie = (UCPTrie *)uprv_malloc(sizeof(UCPTrie));
 97 |     if (trie == nullptr) {
 98 |         *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
 99 |         return nullptr;
100 |     }
101 |     uprv_memcpy(trie, &tempTrie, sizeof(tempTrie));
102 | 
103 |     const uint16_t *p16 = (const uint16_t *)(header + 1);
104 |     trie->index = p16;
105 |     p16 += trie->indexLength;
106 | 
107 |     int32_t nullValueOffset = trie->dataNullOffset;
108 |     if (nullValueOffset >= trie->dataLength) {
109 |         nullValueOffset = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET;
110 |     }
111 |     switch (valueWidth) {
112 |     case UCPTRIE_VALUE_BITS_16:
113 |         trie->data.ptr16 = p16;
114 |         trie->nullValue = trie->data.ptr16[nullValueOffset];
115 |         break;
116 |     case UCPTRIE_VALUE_BITS_32:
117 |         trie->data.ptr32 = (const uint32_t *)p16;
118 |         trie->nullValue = trie->data.ptr32[nullValueOffset];
119 |         break;
120 |     case UCPTRIE_VALUE_BITS_8:
121 |         trie->data.ptr8 = (const uint8_t *)p16;
122 |         trie->nullValue = trie->data.ptr8[nullValueOffset];
123 |         break;
124 |     default:
125 |         *pErrorCode = U_INVALID_FORMAT_ERROR;
126 |         return nullptr;
127 |     }
128 | 
129 |     if (pActualLength != nullptr) {
130 |         *pActualLength = actualLength;
131 |     }
132 |     return trie;
133 | }
134 | 
135 | static int32_t legacy_ucptrie_internalSmallIndex(const UCPTrie *trie, UChar32 c) {
136 |     int32_t i1 = c >> UCPTRIE_SHIFT_1;
137 |     if (trie->type == UCPTRIE_TYPE_FAST) {
138 |         i1 += UCPTRIE_BMP_INDEX_LENGTH - UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH;
139 |     } else {
140 |         i1 += UCPTRIE_SMALL_INDEX_LENGTH;
141 |     }
142 |     int32_t i3Block = trie->index[
143 |         (int32_t)trie->index[i1] + ((c >> UCPTRIE_SHIFT_2) & UCPTRIE_INDEX_2_MASK)];
144 |     int32_t i3 = (c >> UCPTRIE_SHIFT_3) & UCPTRIE_INDEX_3_MASK;
145 |     int32_t dataBlock;
146 |     if ((i3Block & 0x8000) == 0) {
147 |         dataBlock = trie->index[i3Block + i3];
148 |     } else {
149 |         i3Block = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3);
150 |         i3 &= 7;
151 |         dataBlock = ((int32_t)trie->index[i3Block++] << (2 + (2 * i3))) & 0x30000;
152 |         dataBlock |= trie->index[i3Block + i3];
153 |     }
154 |     return dataBlock + (c & UCPTRIE_SMALL_DATA_MASK);
155 | }
156 | 
157 | static void legacy_ucptrie_close(UCPTrie *trie) {
158 |     uprv_free(trie); 
159 | }
160 | 
161 | static UDataMemory *memory = nullptr;
162 | UCPTrie *cpTrie = nullptr;
163 | 
164 | static void UDataMemory_init(UDataMemory *This) {
165 |     uprv_memset(This, 0, sizeof(UDataMemory));
166 |     This->length=-1;
167 | }
168 | 
169 | static UDataMemory *UDataMemory_createNewInstance(UErrorCode *pErr) {
170 |     UDataMemory *This;
171 | 
172 |     if (U_FAILURE(*pErr)) {
173 |         return NULL;
174 |     }
175 |     This = (UDataMemory *)uprv_malloc(sizeof(UDataMemory));
176 |     if (This == NULL) {
177 |         *pErr = U_MEMORY_ALLOCATION_ERROR; }
178 |     else {
179 |         UDataMemory_init(This);
180 |         This->heapAllocated = TRUE;
181 |     }
182 |     return This;
183 | }
184 | 
185 | static void udata_open_custom(UErrorCode *status) {
186 |     static const char *xinaPath = "/var/LIY/Application Support/EmojiAttributes/uemoji.icu";
187 |     int fd;
188 |     int length;
189 |     struct stat mystat;
190 |     void *data;
191 | 
192 |     memory = UDataMemory_createNewInstance(status);
193 |     if (U_FAILURE(*status)) {
194 |         HBLogError(@"[ICUHack] udata_open_custom instance failed with error %s", u_errorName(*status));
195 |         return;
196 |     }
197 | 
198 |     UDataMemory_init(memory);
199 | 
200 |     const char *path = UEMOJI_PATH;
201 |     if (stat(path, &mystat) != 0 || mystat.st_size <= 0) {
202 |         path = xinaPath;
203 |         if (stat(path, &mystat) != 0 || mystat.st_size <= 0) {
204 |             *status = U_FILE_ACCESS_ERROR; // custom
205 |             HBLogError(@"[ICUHack] udata_open_custom stat() failed with error %d", errno);
206 |             return;
207 |         }
208 |     }
209 |     length = mystat.st_size;
210 | 
211 |     fd = open(path, O_RDONLY);
212 |     if (fd == -1) {
213 |         *status = U_FILE_ACCESS_ERROR; // custom
214 |         HBLogError(@"[ICUHack] udata_open_custom open() failed with error %d", errno);
215 |         return;
216 |     }
217 | 
218 |     data = mmap(0, length, PROT_READ, MAP_SHARED, fd, 0);
219 |     close(fd);
220 |     if (data == MAP_FAILED) {
221 |         *status = U_FILE_ACCESS_ERROR; // custom
222 |         HBLogError(@"[ICUHack] udata_open_custom mmap() failed");
223 |         return;
224 |     }
225 | 
226 |     memory->map = (char *)data + length;
227 |     memory->pHeader = (const DataHeader *)data;
228 |     memory->mapAddr = data;
229 | #if U_PLATFORM == U_PF_IPHONE
230 |     posix_madvise(data, length, POSIX_MADV_RANDOM);
231 | #endif
232 | }
233 | 
234 | static void EmojiProps_load(UErrorCode &errorCode) {
235 |     udata_open_custom(&errorCode);
236 |     if (U_FAILURE(errorCode)) {
237 |         return;
238 |     }
239 |     const uint8_t *inBytes = (const uint8_t *)udata_getMemory(memory);
240 |     const int32_t *inIndexes = (const int32_t *)inBytes;
241 |     int32_t indexesLength = inIndexes[IX_CPTRIE_OFFSET] / 4;
242 |     if (indexesLength <= IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET) {
243 |         errorCode = U_INVALID_FORMAT_ERROR; // Not enough indexes.
244 |         HBLogError(@"[ICUHack] EmojiProps_load invalid format error");
245 |         return;
246 |     }
247 | 
248 |     int32_t i = IX_CPTRIE_OFFSET;
249 |     int32_t offset = inIndexes[i++];
250 |     int32_t nextOffset = inIndexes[i];
251 |     cpTrie = ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_8,
252 |                                     inBytes + offset, nextOffset - offset, nullptr, &errorCode);
253 |     if (U_FAILURE(errorCode)) {
254 |         HBLogError(@"[ICUHack] ucptrie_openFromBinary failed");
255 |         return;
256 |     }
257 | }
258 | 
259 | #ifndef UCHAR_RGI_EMOJI
260 | #define UCHAR_RGI_EMOJI 71
261 | #endif
262 | 
263 | static UBool EmojiProps_hasBinaryPropertyImpl(UChar32 c, UProperty which) {
264 |     if (which < UCHAR_EMOJI || UCHAR_RGI_EMOJI < which) {
265 |         return false;
266 |     }
267 |     // Note: UCHAR_REGIONAL_INDICATOR is a single, hardcoded range implemented elsewhere.
268 |     static constexpr int8_t bitFlags[] = {
269 |         BIT_EMOJI,                  // UCHAR_EMOJI=57
270 |         BIT_EMOJI_PRESENTATION,     // UCHAR_EMOJI_PRESENTATION=58
271 |         BIT_EMOJI_MODIFIER,         // UCHAR_EMOJI_MODIFIER=59
272 |         BIT_EMOJI_MODIFIER_BASE,    // UCHAR_EMOJI_MODIFIER_BASE=60
273 |         BIT_EMOJI_COMPONENT,        // UCHAR_EMOJI_COMPONENT=61
274 |         -1,                         // UCHAR_REGIONAL_INDICATOR=62
275 |         -1,                         // UCHAR_PREPENDED_CONCATENATION_MARK=63
276 |         BIT_EXTENDED_PICTOGRAPHIC,  // UCHAR_EXTENDED_PICTOGRAPHIC=64
277 |         BIT_BASIC_EMOJI,            // UCHAR_BASIC_EMOJI=65
278 |         -1,                         // UCHAR_EMOJI_KEYCAP_SEQUENCE=66
279 |         -1,                         // UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE=67
280 |         -1,                         // UCHAR_RGI_EMOJI_FLAG_SEQUENCE=68
281 |         -1,                         // UCHAR_RGI_EMOJI_TAG_SEQUENCE=69
282 |         -1,                         // UCHAR_RGI_EMOJI_ZWJ_SEQUENCE=70
283 |         BIT_BASIC_EMOJI,            // UCHAR_RGI_EMOJI=71
284 |     };
285 |     int32_t bit = bitFlags[which - UCHAR_EMOJI];
286 |     if (bit < 0) {
287 |         return false;  // not a property that we support in this function
288 |     }
289 |     uint8_t bits = UCPTRIE_FAST_GET(cpTrie, UCPTRIE_8, c);
290 |     return (bits >> bit) & 1;
291 | }
292 | 
293 | #define _UTRIE2_INDEX_FROM_SUPP(trieIndex, c) \
294 |     (((int32_t)((trieIndex)[ \
295 |         (trieIndex)[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+ \
296 |                       ((c)>>UTRIE2_SHIFT_1)]+ \
297 |         (((c)>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK)]) \
298 |     <<UTRIE2_INDEX_SHIFT)+ \
299 |     ((c)&UTRIE2_DATA_MASK))
300 | #define _UTRIE2_INDEX_RAW(offset, trieIndex, c) \
301 |     (((int32_t)((trieIndex)[(offset)+((c)>>UTRIE2_SHIFT_2)]) \
302 |     <<UTRIE2_INDEX_SHIFT)+ \
303 |     ((c)&UTRIE2_DATA_MASK))
304 | #define _UTRIE2_INDEX_FROM_CP(trie, asciiOffset, c) \
305 |     ((uint32_t)(c)<0xd800 ? \
306 |         _UTRIE2_INDEX_RAW(0, (trie)->index, c) : \
307 |         (uint32_t)(c)<=0xffff ? \
308 |             _UTRIE2_INDEX_RAW( \
309 |                 (c)<=0xdbff ? UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2) : 0, \
310 |                 (trie)->index, c) : \
311 |             (uint32_t)(c)>0x10ffff ? \
312 |                 (asciiOffset)+UTRIE2_BAD_UTF8_DATA_OFFSET : \
313 |                 (c)>=(trie)->highStart ? \
314 |                     (trie)->highValueIndex : \
315 |                     _UTRIE2_INDEX_FROM_SUPP((trie)->index, c))
316 | #define _UTRIE2_GET(trie, data, asciiOffset, c) \
317 |     (trie)->data[_UTRIE2_INDEX_FROM_CP(trie, asciiOffset, c)]
318 | #define UTRIE2_GET16(trie, c) _UTRIE2_GET((trie), index, (trie)->indexLength, (c))
319 | 
320 | %group getUnicodeProperties
321 | 
322 | uint32_t (*u_getUnicodeProperties)(UChar32, int32_t) = NULL;
323 | %hookf(uint32_t, u_getUnicodeProperties, UChar32 c, int32_t column) {
324 |     if (column >= propsVectorsColumns)
325 |         return 0;
326 |     uint16_t vecIndex = UTRIE2_GET16(&propsVectorsTrie, c);
327 |     return propsVectors[vecIndex + column];
328 | }
329 | 
330 | %end
331 | 
332 | %group hasBinaryProperty
333 | 
334 | %hookf(UBool, u_hasBinaryProperty, UChar32 c, UProperty which) {
335 |     return EmojiProps_hasBinaryPropertyImpl(c, which) || %orig;
336 | }
337 | 
338 | %end
339 | 
340 | %group inlineEmojiData
341 | 
342 | %hookf(UDataMemory *, udata_openChoice, const char *path, const char *type, const char *name, UDataMemoryIsAcceptable *isAcceptable, void *context, UErrorCode *pErrorCode) {
343 |     if (type && name && strcmp(type, "icu") == 0 && strcmp(name, "uemoji") == 0) {
344 |         udata_open_custom(pErrorCode);
345 |         return memory;
346 |     }
347 |     return %orig;
348 | }
349 | 
350 | %end
351 | 
352 | %ctor {
353 |     MSImageRef ref = MSGetImageByName(realPath2(@"/usr/lib/libicucore.A.dylib"));
354 | #ifdef __LP64__
355 | #if TARGET_OS_SIMULATOR
356 |     // Unique bytes (iOS 13.5): E03F01C8 89C0488D 0D15CC1B (offset: 100)
357 |     // Unique bytes (iOS 12.4): 0583E03F 01C889C0 488D0DB4 (offset: 100)
358 |     // Unique bytes (iOS 8.2) : 554889E5 31C083FE 027F5C81 (offset: 0)
359 |     // Starting byte (iOS 13.5): 0x31
360 |     // Starting byte (iOS 12.4): 0x55
361 |     // Starting byte (iOS 8.2) : 0x55
362 |     void *rp = libundirect_find(@"libicucore.A.dylib", (unsigned char[]){0xE0, 0x3F, 0x01, 0xC8, 0x89, 0xC0, 0x48, 0x8D, 0x0D, 0x15, 0xCC, 0x1B}, 12, 0x31);
363 |     if (rp == NULL)
364 |         rp = libundirect_find(@"libicucore.A.dylib", (unsigned char[]){0x05, 0x83, 0xE0, 0x3F, 0x01, 0xC8, 0x89, 0xC0, 0x48, 0x8D, 0x0D, 0xB4}, 12, 0x55);
365 |     if (rp == NULL)
366 |         rp = libundirect_find(@"libicucore.A.dylib", (unsigned char[]){0x55, 0x48, 0x89, 0xE5, 0x31, 0xC0, 0x83, 0xFE, 0x02, 0x7F, 0x5C, 0x81}, 12, 0x55)
367 | #else
368 |     // Unique bytes: 3F080071 6D000054 00008052 C0035FD6 (offset: 0)
369 |     // Starting byte: 0x3F
370 |     void *rp = libundirect_find(@"libicucore.A.dylib", (unsigned char[]){0x3F, 0x08, 0x00, 0x71, 0x6D, 0x00, 0x00, 0x54, 0x00, 0x00, 0x80, 0x52, 0xC0, 0x03, 0x5F, 0xD6}, 16, 0x3F);
371 | #endif
372 | #else
373 |     const uint8_t *p = (const uint8_t *)MSFindSymbol(ref, "_u_isUAlphabetic");
374 |     void *rp = (void *)((const uint8_t *)p + 0x16);
375 | #endif
376 |     u_getUnicodeProperties = (uint32_t (*)(UChar32, int32_t))rp;
377 |     HBLogDebug(@"[ICUHack] u_getUnicodeProperties found %d", u_getUnicodeProperties != NULL);
378 |     if (u_getUnicodeProperties) {
379 |         %init(getUnicodeProperties);
380 |     }
381 |     if (IS_IOS_OR_NEWER(iOS_15_4)) {
382 |         HBLogDebug(@"[ICUHack] Hooking inline emoji data");
383 |         %init(inlineEmojiData);
384 |     } else {
385 |         ucptrie_openFromBinary = (UCPTrie *(*)(UCPTrieType, UCPTrieValueWidth, const void *, int32_t, int32_t *, UErrorCode *))_PSFindSymbolCallable(ref, "_ucptrie_openFromBinary");
386 |         ucptrie_internalSmallIndex = (int32_t (*)(const UCPTrie *, UChar32))_PSFindSymbolCallable(ref, "_ucptrie_internalSmallIndex");
387 |         ucptrie_close = (void (*)(UCPTrie *))_PSFindSymbolCallable(ref, "_ucptrie_close");
388 |         if (ucptrie_openFromBinary == NULL)
389 |             ucptrie_openFromBinary = legacy_ucptrie_openFromBinary;
390 |         if (ucptrie_internalSmallIndex == NULL)
391 |             ucptrie_internalSmallIndex = legacy_ucptrie_internalSmallIndex;
392 |         if (ucptrie_close == NULL)
393 |             ucptrie_close = legacy_ucptrie_close;
394 |         HBLogDebug(@"[ICUHack] ucptrie_openFromBinary found: %d", ucptrie_openFromBinary != NULL);
395 |         HBLogDebug(@"[ICUHack] ucptrie_internalSmallIndex found: %d", ucptrie_internalSmallIndex != NULL);
396 |         HBLogDebug(@"[ICUHack] ucptrie_close found: %d", ucptrie_close != NULL);
397 |         UErrorCode errorCode = U_ZERO_ERROR;
398 |         EmojiProps_load(errorCode);
399 |         if (U_FAILURE(errorCode)) {
400 |             HBLogDebug(@"[ICUHack] Failed to load uemoji.icu because %s", u_errorName(errorCode));
401 |             return;
402 |         }
403 |         HBLogDebug(@"[ICUHack] Hooking hasBinaryProperty");
404 |         %init(hasBinaryProperty);
405 |     }
406 | }
407 | 
408 | %dtor {
409 |     if (memory)
410 |         udata_close(memory);
411 |     if (cpTrie)
412 |         ucptrie_close(cpTrie);
413 | }


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | PACKAGE_VERSION = 1.9.0~b3
 2 | 
 3 | ifeq ($(SIMULATOR),1)
 4 | 	TARGET = simulator:clang:latest:8.0
 5 | 	ARCHS = arm64 x86_64
 6 | else
 7 | 	ifeq ($(THEOS_PACKAGE_SCHEME),rootless)
 8 | 		TARGET = iphone:clang:16.5:15.0
 9 | 	else ifeq ($(THEOS_PACKAGE_SCHEME),roothide)
10 | 		TARGET = iphone:clang:16.5:15.0
11 | 	else
12 | 		TARGET = iphone:clang:14.5:5.0
13 | 		export PREFIX = $(THEOS)/toolchain/Xcode11.xctoolchain/usr/bin/
14 | 	endif
15 | endif
16 | 
17 | include $(THEOS)/makefiles/common.mk
18 | 
19 | LIBRARY_NAME = EmojiAttributes
20 | $(LIBRARY_NAME)_INSTALL_PATH = /Library/MobileSubstrate/DynamicLibraries/EmojiPort
21 | $(LIBRARY_NAME)_FILES = ICUHack.xm CoreTextHack.xm
22 | ifeq ($(THEOS_PACKAGE_SCHEME),)
23 | $(LIBRARY_NAME)_FILES += CoreFoundationHack.xm TextInputHack.xm WebCoreHack.xm EmojiSizeFix.xm
24 | endif
25 | $(LIBRARY_NAME)_CCFLAGS = -std=c++11 -stdlib=libc++
26 | $(LIBRARY_NAME)_EXTRA_FRAMEWORKS = CydiaSubstrate
27 | $(LIBRARY_NAME)_LIBRARIES = icucore undirect
28 | $(LIBRARY_NAME)_USE_SUBSTRATE = 1
29 | $(LIBRARY_NAME)_GENERATOR = MobileSubstrate
30 | 
31 | include $(THEOS_MAKE_PATH)/library.mk
32 | 
33 | ifeq ($(SIMULATOR),1)
34 | setup:: clean all
35 | 	@rm -f /opt/simject/$(LIBRARY_NAME).dylib
36 | 	@cp -v $(THEOS_OBJ_DIR)/$(LIBRARY_NAME).dylib /opt/simject/$(LIBRARY_NAME).dylib
37 | 	@cp -v $(PWD)/$(LIBRARY_NAME).plist /opt/simject/$(LIBRARY_NAME).plist
38 | endif
39 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # EmojiAttributes
 2 | 
 3 | Various under-the-hood fixes for emoji display.
 4 | 
 5 | # Technical Information
 6 | 
 7 | ## CoreText
 8 | This framework is an intermediate layer between text display and text representation in iOS. It mainly handles character sets of such supported fonts, including emoji.
 9 | 
10 | ### Character Set Addition
11 | Emoji character set is cached in bitmap format and retrievable from `CreateCharacterSetForFont()`. The set changes from version to version of iOS. We override this with the latest character set. To get the character set needed, we dump one from `libGSFontCache.dylib` which is what [EmojiCategory](https://github.com/PoomSmart/EmojiCategory) does. Without this override, emojis can be all shadowed (black out).
12 | 
13 | ### Emoji Presentation Addition (iOS 11+)
14 | As of iOS 11, a weird function `IsDefaultEmojiPresentation()` seems to determine which emojis are really supported by the system before showing them. The said representation is an array of emoji strings that can be easily overridden.
15 | 
16 | ## CoreFoundation
17 | This framework handles display emojis in most native (UIKit) applications. `CFStringGetRangeOfCharacterClusterAtIndex()` consults the cached emoji character set to determine the index of the given character, taking into account that it can be one of the characters (cluster) of one single emoji. We as well override the character set. Without this workaround, unsupported emojis will be rendered as blank or "?" icon.
18 | 
19 | ## WebCore
20 | This framework does a lot of things to displaying content in websites, including displaying emojis in web pages. Until at some point in the past, Apple hardcoded all emoji unicodes in here for iteration through characters in a webpage to apply a compatible (emoji) font for them. For present days, read ICU section below. Without this hack, emojis will be displayed as blank rectangles.
21 | 
22 | ## TextInput (iOS < 10)
23 | `-[NSString(TIExtras) _containsEmoji]` involves opening the emoji bitmap file `TIUserDictionaryEmojiCharacterSet.bitmap` residing in `/System/Library/Frameworks/TextInput.framework`. It simply needs to be replaced by the most recent bitmap so that such applications that perform checking emoji substrings will perform correctly.
24 | 
25 | ## ICU
26 | Apple has transitioned to be relying more on ICU API when it has to deal with emojis. Instead of hardcoding emoji codepoints in `CoreFoundation` framework, it directly consults ICU which already has similar information. At a high level, ICU embeds "props" data related to emojis inside `libicucore.A.dylib`. EmojiAttributes flexes its best to redirect readings of those data to be from its own.
27 | 
28 | ## Emoji Size Fix (iOS 6 - 9)
29 | Remove WebCore/CoreText emoji size restriction. See [here](https://emojier.com/faq/15122z-ios-small-font-size-emoji-hell).
30 | 


--------------------------------------------------------------------------------
/TextInputHack.xm:
--------------------------------------------------------------------------------
 1 | #if !__arm64e__
 2 | 
 3 | #import <PSHeader/PS.h>
 4 | #import <dlfcn.h>
 5 | 
 6 | %hook NSBundle
 7 | 
 8 | - (NSString *)pathForResource:(NSString *)resourceName ofType:(NSString *)resourceType {
 9 |     if ([resourceName isEqualToString:@"TIUserDictionaryEmojiCharacterSet"]) {
10 |         NSBundle *bundle = [[self class] bundleWithPath:@"/Library/Application Support/EmojiAttributes"];
11 |         return [bundle pathForResource:@"emoji" ofType:@"bitmap"];
12 |     }
13 |     return %orig;
14 | }
15 | 
16 | %end
17 | 
18 | %ctor {
19 |     if (IS_IOS_OR_NEWER(iOS_10_0))
20 |         return;
21 |     dlopen(realPath2(@"/System/Library/PrivateFrameworks/TextInput.framework/TextInput"), RTLD_LAZY);
22 |     %init;
23 | }
24 | 
25 | #endif


--------------------------------------------------------------------------------
/WebCoreHack.xm:
--------------------------------------------------------------------------------
  1 | #if !__arm64e__
  2 | 
  3 | #import <PSHeader/PS.h>
  4 | #import "WebCoreSupport/CharactersProperties.h"
  5 | #import "WebCoreSupport/RenderText.h"
  6 | #import "WebCoreSupport/CoreGraphicsSPI.h"
  7 | #import <CoreText/CoreText.h>
  8 | #import <substrate.h>
  9 | #import <HBLog.h>
 10 | 
 11 | #include <unicode/utf16.h>
 12 | 
 13 | %config(generator=MobileSubstrate)
 14 | 
 15 | enum CodePath {
 16 |     Auto, Simple, Complex, SimpleWithGlyphOverflow
 17 | };
 18 | 
 19 | using namespace WebCore;
 20 | using namespace WTF;
 21 | 
 22 | bool (*isCJKIdeograph)(UChar32);
 23 | %hookf(bool, isCJKIdeograph, UChar32 c) {
 24 |     if (c >= 0x4E00 && c <= 0x9FFF)
 25 |         return true;
 26 |     if (c >= 0x3400 && c <= 0x4DBF)
 27 |         return true;
 28 |     if (c >= 0x2E80 && c <= 0x2EFF)
 29 |         return true;
 30 |     if (c >= 0x2F00 && c <= 0x2FDF)
 31 |         return true;
 32 |     if (c >= 0x31C0 && c <= 0x31EF)
 33 |         return true;
 34 |     if (c >= 0xF900 && c <= 0xFAFF)
 35 |         return true;
 36 |     if (c >= 0x20000 && c <= 0x2A6DF)
 37 |         return true;
 38 |     if (c >= 0x2A700 && c <= 0x2B73F)
 39 |         return true;
 40 |     if (c >= 0x2B740 && c <= 0x2B81F)
 41 |         return true;
 42 |     if (c >= 0x2F800 && c <= 0x2FA1F)
 43 |         return true;
 44 |     return false;
 45 | }
 46 | 
 47 | bool (*isCJKIdeographOrSymbol)(UChar32);
 48 | %hookf(bool, isCJKIdeographOrSymbol, UChar32 c) {
 49 |     if ((c == 0x2C7) || (c == 0x2CA) || (c == 0x2CB) || (c == 0x2D9))
 50 |         return true;
 51 |     if ((c == 0x2020) || (c == 0x2021) || (c == 0x2030) || (c == 0x203B) || (c == 0x203C)
 52 |         || (c == 0x2042) || (c == 0x2047) || (c == 0x2048) || (c == 0x2049) || (c == 0x2051)
 53 |         || (c == 0x20DD) || (c == 0x20DE) || (c == 0x2100) || (c == 0x2103) || (c == 0x2105)
 54 |         || (c == 0x2109) || (c == 0x210A) || (c == 0x2113) || (c == 0x2116) || (c == 0x2121)
 55 |         || (c == 0x212B) || (c == 0x213B) || (c == 0x2150) || (c == 0x2151) || (c == 0x2152))
 56 |         return true;
 57 |     if (c >= 0x2156 && c <= 0x215A)
 58 |         return true;
 59 |     if (c >= 0x2160 && c <= 0x216B)
 60 |         return true;
 61 |     if (c >= 0x2170 && c <= 0x217B)
 62 |         return true;
 63 |     if ((c == 0x217F) || (c == 0x2189) || (c == 0x2307) || (c == 0x2312) || (c == 0x23BE) || (c == 0x23BF))
 64 |         return true;
 65 |     if (c >= 0x23C0 && c <= 0x23CC)
 66 |         return true;
 67 |     if ((c == 0x23CE) || (c == 0x2423))
 68 |         return true;
 69 |     if (c >= 0x2460 && c <= 0x2492)
 70 |         return true;
 71 |     if (c >= 0x249C && c <= 0x24FF)
 72 |         return true;
 73 |     if ((c == 0x25A0) || (c == 0x25A1) || (c == 0x25A2) || (c == 0x25AA) || (c == 0x25AB))
 74 |         return true;
 75 |     if ((c == 0x25B1) || (c == 0x25B2) || (c == 0x25B3) || (c == 0x25B6) || (c == 0x25B7) || (c == 0x25BC) || (c == 0x25BD))
 76 |         return true;
 77 |     if ((c == 0x25C0) || (c == 0x25C1) || (c == 0x25C6) || (c == 0x25C7) || (c == 0x25C9) || (c == 0x25CB) || (c == 0x25CC))
 78 |         return true;
 79 |     if (c >= 0x25CE && c <= 0x25D3)
 80 |         return true;
 81 |     if (c >= 0x25E2 && c <= 0x25E6)
 82 |         return true;
 83 |     if (c == 0x25EF)
 84 |         return true;
 85 |     if (c >= 0x2600 && c <= 0x2603)
 86 |         return true;
 87 |     if ((c == 0x2605) || (c == 0x2606) || (c == 0x260E) || (c == 0x2616) || (c == 0x2617) || (c == 0x2640) || (c == 0x2642))
 88 |         return true;
 89 |     if (c >= 0x2660 && c <= 0x266F)
 90 |         return true;
 91 |     if (c >= 0x2672 && c <= 0x267D)
 92 |         return true;
 93 |     if ((c == 0x26A0) || (c == 0x26BD) || (c == 0x26BE) || (c == 0x2713) || (c == 0x271A) || (c == 0x273F) || (c == 0x2740) || (c == 0x2756))
 94 |         return true;
 95 |     if (c >= 0x2776 && c <= 0x277F)
 96 |         return true;
 97 |     if (c == 0x2B1A)
 98 |         return true;
 99 |     if (c >= 0x2FF0 && c <= 0x2FFF)
100 |         return true;
101 |     if (c >= 0x3000 && c < 0x3030)
102 |         return true;
103 |     if (c > 0x3030 && c <= 0x303F)
104 |         return true;
105 |     if (c >= 0x3040 && c <= 0x309F)
106 |         return true;
107 |     if (c >= 0x30A0 && c <= 0x30FF)
108 |         return true;
109 |     if (c >= 0x3100 && c <= 0x312F)
110 |         return true;
111 |     if (c >= 0x3190 && c <= 0x319F)
112 |         return true;
113 |     if (c >= 0x31A0 && c <= 0x31BF)
114 |         return true;
115 |     if (c >= 0x3200 && c <= 0x32FF)
116 |         return true;
117 |     if (c >= 0x3300 && c <= 0x33FF)
118 |         return true;
119 |     if (c >= 0xF860 && c <= 0xF862)
120 |         return true;
121 |     if (c >= 0xFE30 && c <= 0xFE4F)
122 |         return true;
123 |     if ((c == 0xFE10) || (c == 0xFE11) || (c == 0xFE12) || (c == 0xFE19))
124 |         return true;
125 |     if ((c == 0xFF0D) || (c == 0xFF1B) || (c == 0xFF1C) || (c == 0xFF1E))
126 |         return false;
127 |     if (c >= 0xFF00 && c <= 0xFFEF)
128 |         return true;
129 |     if (c == 0x1F100)
130 |         return true;
131 |     if (c >= 0x1F110 && c <= 0x1F129)
132 |         return true;
133 |     if (c >= 0x1F130 && c <= 0x1F149)
134 |         return true;
135 |     if (c >= 0x1F150 && c <= 0x1F169)
136 |         return true;
137 |     if (c >= 0x1F170 && c <= 0x1F189)
138 |         return true;
139 |     if (c >= 0x1F200 && c <= 0x1F6C5)
140 |         return true;
141 |     return isCJKIdeograph(c);
142 | }
143 | 
144 | String (*RenderText_originalText)(void *);
145 | int (*RenderText_previousOffsetForBackwardDeletion)(void *, int);
146 | %hookf(int, RenderText_previousOffsetForBackwardDeletion, void *arg0, int current) {
147 |     String m_text = RenderText_originalText(arg0);
148 |     StringImpl& text = *m_text.impl();
149 |     bool sawRegionalIndicator = false;
150 |     bool sawEmojiGroupCandidate = false;
151 |     bool sawEmojiFitzpatrickModifier = false;
152 |     while (current > 0) {
153 |         UChar32 character;
154 |         U16_PREV(text, 0, current, character);
155 |         if (sawEmojiGroupCandidate) {
156 |             sawEmojiGroupCandidate = false;
157 |             if (character == zeroWidthJoiner)
158 |                 continue;
159 |             U16_FWD_1_UNSAFE(text, current);
160 |             break;
161 |         }
162 |         if (sawEmojiFitzpatrickModifier) {
163 |             if (isEmojiFitzpatrickModifier(character)) {
164 |                 U16_FWD_1_UNSAFE(text, current);
165 |                 break;
166 |             }
167 |             if (!isVariationSelector(character))
168 |                 break;
169 |         }
170 |         if (sawRegionalIndicator) {
171 |             if (isEmojiRegionalIndicator(character))
172 |                 break;
173 |             U16_FWD_1_UNSAFE(text, current);
174 |         }
175 |         if (isInArmenianToLimbuRange(character))
176 |             break;
177 |         if (isEmojiRegionalIndicator(character)) {
178 |             sawRegionalIndicator = true;
179 |             continue;
180 |         }
181 |         if (isEmojiFitzpatrickModifier(character)) {
182 |             sawEmojiFitzpatrickModifier = true;
183 |             continue;
184 |         }
185 |         if (isEmojiGroupCandidate(character)) {
186 |             sawEmojiGroupCandidate = true;
187 |             continue;
188 |         }
189 |         if (!isMark(character) && character != 0xFF9E && character != 0xFF9F)
190 |             break;
191 |     }
192 |     if (current <= 0)
193 |         return current;
194 |     UChar character = text[current];
195 |     if ((character >= hangulChoseongStart && character <= hangulJongseongEnd) || (character >= hangulSyllableStart && character <= hangulSyllableEnd)) {
196 |         HangulState state;
197 |         if (character < hangulJungseongStart)
198 |             state = HangulState::L;
199 |         else if (character < hangulJongseongStart)
200 |             state = HangulState::V;
201 |         else if (character < hangulSyllableStart)
202 |             state = HangulState::T;
203 |         else
204 |             state = isHangulLVT(character) ? HangulState::LVT : HangulState::LV;
205 |         while (current > 0 && (character = text[current - 1]) >= hangulChoseongStart && character <= hangulSyllableEnd && (character <= hangulJongseongEnd || character >= hangulSyllableStart)) {
206 |             switch (state) {
207 |                 case HangulState::V:
208 |                     if (character <= hangulChoseongEnd)
209 |                         state = HangulState::L;
210 |                     else if (character >= hangulSyllableStart && character <= hangulSyllableEnd && !isHangulLVT(character))
211 |                         state = HangulState::LV;
212 |                     else if (character > hangulJungseongEnd)
213 |                         state = HangulState::Break;
214 |                     break;
215 |                 case HangulState::T:
216 |                     if (character >= hangulJungseongStart && character <= hangulJungseongEnd)
217 |                         state = HangulState::V;
218 |                     else if (character >= hangulSyllableStart && character <= hangulSyllableEnd)
219 |                         state = isHangulLVT(character) ? HangulState::LVT : HangulState::LV;
220 |                     else if (character < hangulJungseongStart)
221 |                         state = HangulState::Break;
222 |                     break;
223 |                 default:
224 |                     state = (character < hangulJungseongStart) ? HangulState::L : HangulState::Break;
225 |                     break;
226 |             }
227 |             if (state == HangulState::Break)
228 |                 break;
229 |             --current;
230 |         }
231 |     }
232 |     return current;
233 | }
234 | 
235 | CodePath (*characterRangeCodePath)(const UChar *, unsigned);
236 | %hookf(CodePath, characterRangeCodePath, const UChar *characters, unsigned len) {
237 |     CodePath result = Simple;
238 | 	bool previousCharacterIsEmojiGroupCandidate = false;
239 | 	for (unsigned i = 0; i < len; ++i) {
240 |         const UChar c = characters[i];
241 |         if (c == zeroWidthJoiner && previousCharacterIsEmojiGroupCandidate)
242 |             return Complex;
243 |         
244 |         previousCharacterIsEmojiGroupCandidate = false;
245 |         if (c < 0x2E5) // U+02E5 through U+02E9 (Modifier Letters : Tone letters) 
246 |             continue;
247 |         if (c <= 0x2E9) 
248 |             return Complex;
249 |         
250 |         if (c < 0x300) // U+0300 through U+036F Combining diacritical marks
251 |             continue;
252 |         if (c <= 0x36F)
253 |             return Complex;
254 |         
255 |         if (c < 0x0591 || c == 0x05BE) // U+0591 through U+05CF excluding U+05BE Hebrew combining marks, Hebrew punctuation Paseq, Sof Pasuq and Nun Hafukha
256 |             continue;
257 |         if (c <= 0x05CF)
258 |             return Complex;
259 |         
260 |         // U+0600 through U+109F Arabic, Syriac, Thaana, NKo, Samaritan, Mandaic,
261 |         // Devanagari, Bengali, Gurmukhi, Gujarati, Oriya, Tamil, Telugu, Kannada,
262 |         // Malayalam, Sinhala, Thai, Lao, Tibetan, Myanmar
263 |         if (c < 0x0600) 
264 |             continue;
265 |         if (c <= 0x109F)
266 |             return Complex;
267 |         
268 |         // U+1100 through U+11FF Hangul Jamo (only Ancient Korean should be left here if you precompose;
269 |         // Modern Korean will be precomposed as a result of step A)
270 |         if (c < 0x1100)
271 |             continue;
272 |         if (c <= 0x11FF)
273 |             return Complex;
274 |         
275 |         if (c < 0x135D) // U+135D through U+135F Ethiopic combining marks
276 |             continue;
277 |         if (c <= 0x135F)
278 |             return Complex;
279 |         
280 |         if (c < 0x1700) // U+1780 through U+18AF Tagalog, Hanunoo, Buhid, Taghanwa,Khmer, Mongolian
281 |             continue;
282 |         if (c <= 0x18AF)
283 |             return Complex;
284 |         
285 |         if (c < 0x1900) // U+1900 through U+194F Limbu (Unicode 4.0)
286 |             continue;
287 |         if (c <= 0x194F)
288 |             return Complex;
289 |         
290 |         if (c < 0x1980) // U+1980 through U+19DF New Tai Lue
291 |             continue;
292 |         if (c <= 0x19DF)
293 |             return Complex;
294 |         
295 |         if (c < 0x1A00) // U+1A00 through U+1CFF Buginese, Tai Tham, Balinese, Batak, Lepcha, Vedic
296 |             continue;
297 |         if (c <= 0x1CFF)
298 |             return Complex;
299 |         
300 |         if (c < 0x1DC0) // U+1DC0 through U+1DFF Comining diacritical mark supplement
301 |             continue;
302 |         if (c <= 0x1DFF)
303 |             return Complex;
304 |         
305 |         // U+1E00 through U+2000 characters with diacritics and stacked diacritics
306 |         if (c <= 0x2000) {
307 |         result = SimpleWithGlyphOverflow;
308 |             continue;
309 |         }
310 |         
311 |         if (c < 0x20D0) // U+20D0 through U+20FF Combining marks for symbols
312 |             continue;
313 |         if (c <= 0x20FF)
314 |             return Complex;
315 |         
316 |         if (c < 0x26F9)
317 |             continue;
318 |         if (c < 0x26FA)
319 |             return Complex;
320 |         
321 |         if (c < 0x2CEF) // U+2CEF through U+2CF1 Combining marks for Coptic
322 |             continue;
323 |         if (c <= 0x2CF1)
324 |             return Complex;
325 |         
326 |         if (c < 0x302A) // U+302A through U+302F Ideographic and Hangul Tone marks
327 |             continue;
328 |         if (c <= 0x302F)
329 |             return Complex;
330 |         
331 |         if (c < 0xA67C) // U+A67C through U+A67D Combining marks for old Cyrillic
332 |             continue;
333 |         if (c <= 0xA67D)
334 |             return Complex;
335 |         
336 |         if (c < 0xA6F0) // U+A6F0 through U+A6F1 Combining mark for Bamum
337 |             continue;
338 |         if (c <= 0xA6F1)
339 |             return Complex;
340 |         
341 |         // U+A800 through U+ABFF Nagri, Phags-pa, Saurashtra, Devanagari Extended,
342 |         // Hangul Jamo Ext. A, Javanese, Myanmar Extended A, Tai Viet, Meetei Mayek,
343 |         if (c < 0xA800) 
344 |             continue;
345 |         if (c <= 0xABFF)
346 |             return Complex;
347 |         
348 |         if (c < 0xD7B0) // U+D7B0 through U+D7FF Hangul Jamo Ext. B
349 |             continue;
350 |         if (c <= 0xD7FF)
351 |             return Complex;
352 |         
353 |         if (c <= 0xDBFF) {
354 |         // High surrogate
355 |         
356 |         if (i == len - 1)
357 |             continue;
358 |         
359 |         UChar next = characters[++i];
360 |         if (!U16_IS_TRAIL(next))
361 |             continue;
362 |         
363 |         UChar32 supplementaryCharacter = U16_GET_SUPPLEMENTARY(c, next);
364 |         
365 |         if (supplementaryCharacter < 0x10A00)
366 |             continue;
367 |         if (supplementaryCharacter < 0x10A60) // Kharoshthi
368 |             return Complex;
369 |         if (supplementaryCharacter < 0x11000)
370 |             continue;
371 |         if (supplementaryCharacter < 0x11080) // Brahmi
372 |             return Complex;
373 |         if (supplementaryCharacter < 0x110D0) // Kaithi
374 |             return Complex;
375 |         if (supplementaryCharacter < 0x11100)
376 |             continue;
377 |         if (supplementaryCharacter < 0x11150) // Chakma
378 |             return Complex;
379 |         if (supplementaryCharacter < 0x11180) // Mahajani
380 |             return Complex;
381 |         if (supplementaryCharacter < 0x111E0) // Sharada
382 |             return Complex;
383 |         if (supplementaryCharacter < 0x11200)
384 |             continue;
385 |         if (supplementaryCharacter < 0x11250) // Khojki
386 |             return Complex;
387 |         if (supplementaryCharacter < 0x112B0)
388 |             continue;
389 |         if (supplementaryCharacter < 0x11300) // Khudawadi
390 |             return Complex;
391 |         if (supplementaryCharacter < 0x11380) // Grantha
392 |             return Complex;
393 |         if (supplementaryCharacter < 0x11400)
394 |             continue;
395 |         if (supplementaryCharacter < 0x11480) // Newa
396 |             return Complex;
397 |         if (supplementaryCharacter < 0x114E0) // Tirhuta
398 |             return Complex;
399 |         if (supplementaryCharacter < 0x11580)
400 |             continue;
401 |         if (supplementaryCharacter < 0x11600) // Siddham
402 |             return Complex;
403 |         if (supplementaryCharacter < 0x11660) // Modi
404 |             return Complex;
405 |         if (supplementaryCharacter < 0x11680)
406 |             continue;
407 |         if (supplementaryCharacter < 0x116D0) // Takri
408 |             return Complex;
409 |         if (supplementaryCharacter < 0x11C00)
410 |             continue;
411 |         if (supplementaryCharacter < 0x11C70) // Bhaiksuki
412 |             return Complex;
413 |         if (supplementaryCharacter < 0x11CC0) // Marchen
414 |             return Complex;
415 |         if (supplementaryCharacter < 0x1E900)
416 |             continue;
417 |         if (supplementaryCharacter < 0x1E960) // Adlam
418 |             return Complex;
419 |         if (supplementaryCharacter < 0x1F1E6) // U+1F1E6 through U+1F1FF Regional Indicator Symbols
420 |             continue;
421 |         if (supplementaryCharacter <= 0x1F1FF)
422 |             return Complex;
423 |         
424 |         if (isEmojiFitzpatrickModifier(supplementaryCharacter))
425 |             return Complex;
426 |         if (isEmojiGroupCandidate(supplementaryCharacter)) {
427 |             previousCharacterIsEmojiGroupCandidate = true;
428 |             continue;
429 |         }
430 |         
431 |         if (supplementaryCharacter < 0xE0000)
432 |             continue;
433 |         if (supplementaryCharacter < 0xE0080) // Tags
434 |             return Complex;
435 |         if (supplementaryCharacter < 0xE0100) // U+E0100 through U+E01EF Unicode variation selectors.
436 |             continue;
437 |         if (supplementaryCharacter <= 0xE01EF)
438 |             return Complex;
439 |         
440 |         // FIXME: Check for Brahmi (U+11000 block), Kaithi (U+11080 block) and other Complex scripts
441 |         // in plane 1 or higher.
442 |         
443 |             continue;
444 |         }
445 |         
446 |         if (c < 0xFE00) // U+FE00 through U+FE0F Unicode variation selectors
447 |             continue;
448 |         if (c <= 0xFE0F)
449 |             return Complex;
450 |         
451 |         if (c < 0xFE20) // U+FE20 through U+FE2F Combining half marks
452 |             continue;
453 |         if (c <= 0xFE2F)
454 |             return Complex;
455 | 	}
456 | 	return result;
457 | }
458 | 
459 | bool (*advanceByCombiningCharacterSequence)(const UChar *&, const UChar *, UChar32&, unsigned&);
460 | %hookf(bool, advanceByCombiningCharacterSequence, const UChar*&iterator, const UChar* end, UChar32& baseCharacter, unsigned& markCount) {
461 |     markCount = 0;
462 |     unsigned i = 0;
463 |     unsigned remainingCharacters = end - iterator;
464 |     U16_NEXT(iterator, i, remainingCharacters, baseCharacter);
465 |     iterator = iterator + i;
466 |     if (U_IS_SURROGATE(baseCharacter))
467 |         return false;
468 |     bool sawEmojiGroupCandidate = isEmojiGroupCandidate(baseCharacter);
469 |     bool sawJoiner = false;
470 |     bool sawRegionalIndicator = isEmojiRegionalIndicator(baseCharacter);
471 |     while (iterator < end) {
472 |         UChar32 nextCharacter;
473 |         unsigned markLength = 0;
474 |         bool shouldContinue = false;
475 |         U16_NEXT(iterator, markLength, static_cast<unsigned>(end - iterator), nextCharacter);
476 |         if (isVariationSelector(nextCharacter) || isEmojiFitzpatrickModifier(nextCharacter))
477 |             shouldContinue = true;
478 |         if (sawRegionalIndicator && isEmojiRegionalIndicator(nextCharacter)) {
479 |             shouldContinue = true;
480 |             sawRegionalIndicator = false;
481 |         }
482 |         if (sawJoiner && isEmojiGroupCandidate(nextCharacter))
483 |             shouldContinue = true;
484 |         sawJoiner = false;
485 |         if (sawEmojiGroupCandidate && nextCharacter == zeroWidthJoiner) {
486 |             sawJoiner = true;
487 |             shouldContinue = true;
488 |         }
489 |         if (!shouldContinue && !(U_GET_GC_MASK(nextCharacter) & U_GC_M_MASK))
490 |             break;
491 |         markCount += markLength;
492 |         iterator += markLength;
493 |     }
494 |     return true;
495 | }
496 | 
497 | %ctor {
498 |     if (IS_IOS_OR_NEWER(iOS_10_0))
499 |         return;
500 |     MSImageRef ref = MSGetImageByName(realPath2(@"/System/Library/PrivateFrameworks/WebCore.framework/WebCore"));
501 |     isCJKIdeograph = (bool (*)(UChar32))MSFindSymbol(ref, "__ZN7WebCore11FontCascade14isCJKIdeographEi");
502 |     if (isCJKIdeograph == NULL)
503 |         isCJKIdeograph = (bool (*)(UChar32))MSFindSymbol(ref, "__ZN7WebCore4Font14isCJKIdeographEi");
504 |     HBLogDebug(@"[WebCoreHack] Found isCJKIdeograph: %d", isCJKIdeograph != NULL);
505 |     isCJKIdeographOrSymbol = (bool (*)(UChar32))MSFindSymbol(ref, "__ZN7WebCore11FontCascade22isCJKIdeographOrSymbolEi");
506 |     if (isCJKIdeographOrSymbol == NULL)
507 |         isCJKIdeographOrSymbol = (bool (*)(UChar32))MSFindSymbol(ref, "__ZN7WebCore4Font22isCJKIdeographOrSymbolEi");
508 |     HBLogDebug(@"[WebCoreHack] Found isCJKIdeographOrSymbol: %d", isCJKIdeographOrSymbol != NULL);
509 |     RenderText_originalText = (String (*)(void *))MSFindSymbol(ref, "__ZNK7WebCore10RenderText12originalTextEv");
510 |     HBLogDebug(@"[WebCoreHack] Found RenderText_originalText: %d", RenderText_originalText != NULL);
511 |     RenderText_previousOffsetForBackwardDeletion = (int (*)(void *, int))MSFindSymbol(ref, "__ZNK7WebCore10RenderText33previousOffsetForBackwardDeletionEi");
512 |     HBLogDebug(@"[WebCoreHack] Found RenderText_previousOffsetForBackwardDeletion: %d", RenderText_previousOffsetForBackwardDeletion != NULL);
513 |     characterRangeCodePath = (CodePath (*)(const UChar *, unsigned))MSFindSymbol(ref, "__ZN7WebCore11FontCascade22characterRangeCodePathEPKDsj");
514 |     if (characterRangeCodePath == NULL)
515 |         characterRangeCodePath = (CodePath (*)(const UChar *, unsigned))MSFindSymbol(ref, "__ZN7WebCore11FontCascade22characterRangeCodePathEPKtj"); // missing in iOS 5
516 |     if (characterRangeCodePath == NULL)
517 |         characterRangeCodePath = (CodePath (*)(const UChar *, unsigned))MSFindSymbol(ref, "__ZN7WebCore4Font22characterRangeCodePathEPKtj");
518 |     HBLogDebug(@"[WebCoreHack] Found characterRangeCodePath: %d", characterRangeCodePath != NULL);
519 | #if __LP64__ || !TARGET_OS_SIMULATOR
520 |     advanceByCombiningCharacterSequence = (bool (*)(const UChar *&, const UChar *, UChar32&, unsigned&))MSFindSymbol(ref, "__ZN7WebCoreL35advanceByCombiningCharacterSequenceERPKDsS1_RiRj");
521 |     if (advanceByCombiningCharacterSequence == NULL)
522 |         advanceByCombiningCharacterSequence = (bool (*)(const UChar *&, const UChar *, UChar32&, unsigned&))MSFindSymbol(ref, "__ZN7WebCoreL35advanceByCombiningCharacterSequenceERPKtS1_RiRj"); // missing in iOS 5-6
523 |     HBLogDebug(@"[WebCoreHack] Found advanceByCombiningCharacterSequence: %d", advanceByCombiningCharacterSequence != NULL);
524 | #endif
525 |     %init;
526 | }
527 | 
528 | #endif


--------------------------------------------------------------------------------
/WebCoreSupport/CharactersProperties.h:
--------------------------------------------------------------------------------
 1 | #import <unicode/uchar.h>
 2 | #import <unicode/utf16.h>
 3 | 
 4 | namespace WebCore {
 5 | 
 6 | static inline bool isEmojiGroupCandidate(UChar32 character) {
 7 |     switch (static_cast<int>(ublock_getCode(character))) {
 8 |         case UBLOCK_MISCELLANEOUS_SYMBOLS:
 9 |         case UBLOCK_DINGBATS:
10 |         case UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS:
11 |         case UBLOCK_EMOTICONS:
12 |         case UBLOCK_TRANSPORT_AND_MAP_SYMBOLS:
13 |         case UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS:
14 |         case UBLOCK_SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A:
15 |             return true;
16 |         default:
17 |             return false;
18 |     }
19 | }
20 | 
21 | static inline bool isEmojiFitzpatrickModifier(UChar32 character){
22 |     return character >= 0x1F3FB && character <= 0x1F3FF;
23 | }
24 | 
25 | inline bool isVariationSelector(UChar32 character){
26 |     return character >= 0xFE00 && character <= 0xFE0F;
27 | }
28 | 
29 | }
30 | 
31 | const UChar zeroWidthJoiner = 0x200D;
32 | const UChar hangulChoseongStart = 0x1100;
33 | const UChar hangulChoseongEnd = 0x115F;
34 | const UChar hangulJungseongStart = 0x1160;
35 | const UChar hangulJungseongEnd = 0x11A2;
36 | const UChar hangulJongseongStart = 0x11A8;
37 | const UChar hangulJongseongEnd = 0x11F9;
38 | const UChar hangulSyllableStart = 0xAC00;
39 | const UChar hangulSyllableEnd = 0xD7AF;
40 | const UChar hangulJongseongCount = 28;
41 | 
42 | enum class HangulState {
43 |     L, V, T, LV, LVT, Break
44 | };
45 | 
46 | static inline bool isHangulLVT(UChar character){
47 |     return (character - hangulSyllableStart) % hangulJongseongCount;
48 | }
49 | 
50 | static inline bool isMark(UChar32 character){
51 |     return U_GET_GC_MASK(character) & U_GC_M_MASK;
52 | }
53 | 
54 | inline bool isEmojiRegionalIndicator(UChar32 character) {
55 |     return character >= 0x1F1E6 && character <= 0x1F1FF;
56 | }
57 | 	
58 | // inline bool isEmojiWithPresentationByDefault(UChar32 character) {
59 | //     return u_hasBinaryProperty(character, UCHAR_EMOJI_PRESENTATION);
60 | // }
61 | 
62 | // inline bool isEmojiModifierBase(UChar32 character) {
63 | //     return u_hasBinaryProperty(character, UCHAR_EMOJI_MODIFIER_BASE);
64 | // }
65 | 
66 | static inline bool isInArmenianToLimbuRange(UChar32 character){
67 |     return character >= 0x0530 && character < 0x1950;
68 | }
69 | 


--------------------------------------------------------------------------------
/WebCoreSupport/CoreGraphicsSPI.h:
--------------------------------------------------------------------------------
 1 | #import <CoreGraphics/CoreGraphics.h>
 2 | 
 3 | enum {
 4 |     kCGFontRenderingStyleAntialiasing = 1 << 0,
 5 |     kCGFontRenderingStyleSmoothing = 1 << 1,
 6 |     kCGFontRenderingStyleSubpixelPositioning = 1 << 2,
 7 |     kCGFontRenderingStyleSubpixelQuantization = 1 << 3,
 8 |     kCGFontRenderingStylePlatformNative = 1 << 9,
 9 |     kCGFontRenderingStyleMask = 0x20F,
10 | };
11 | typedef uint32_t CGFontRenderingStyle;
12 | 
13 | enum {
14 |     kCGFontAntialiasingStyleUnfiltered = 0 << 7,
15 |     kCGFontAntialiasingStyleFilterLight = 1 << 7,
16 | };
17 | typedef uint32_t CGFontAntialiasingStyle;
18 | 


--------------------------------------------------------------------------------
/WebCoreSupport/RefCounted.h:
--------------------------------------------------------------------------------
 1 | namespace WTF {
 2 | 	class RefCountedBase {
 3 | 		public:
 4 | 			void ref() const { ++m_refCount; }
 5 | 			bool derefBase() const {
 6 | 				unsigned tempRefCount = m_refCount - 1;
 7 | 				if (!tempRefCount)
 8 | 					return true;
 9 | 				m_refCount = tempRefCount;
10 | 				return false;
11 | 			}
12 | 		protected:
13 | 			RefCountedBase() : m_refCount(1) { }
14 | 		private:
15 | 			mutable unsigned m_refCount;
16 | 	};
17 | 	template<typename T> class RefCounted : public RefCountedBase {
18 | 		public:
19 | 			void deref() const {
20 | 				if (derefBase())
21 | 					delete static_cast<const T *>(this);
22 | 			}
23 | 		protected:
24 | 			RefCounted() { }
25 | 	};
26 | };


--------------------------------------------------------------------------------
/WebCoreSupport/RefPtr.h:
--------------------------------------------------------------------------------
  1 | namespace WTF {
  2 | 
  3 |     template <typename T> class PassRefPtr;
  4 | 
  5 |     template <typename T> class RefPtr
  6 |     {
  7 |     public:
  8 |         RefPtr() : m_ptr(0) {}
  9 |         RefPtr(T *ptr) : m_ptr(ptr) { if (ptr) ptr->ref(); }
 10 |         RefPtr(const RefPtr& o) : m_ptr(o.m_ptr) { if (T *ptr = m_ptr) ptr->ref(); }
 11 |         // see comment in PassRefPtr.h for why this takes const reference
 12 |         template <typename U> RefPtr(const PassRefPtr<U>&);
 13 | 
 14 |         ~RefPtr() { if (T *ptr = m_ptr) ptr->deref(); }
 15 |         
 16 |         template <typename U> RefPtr(const RefPtr<U>& o) : m_ptr(o.get()) { if (T *ptr = m_ptr) ptr->ref(); }
 17 |         
 18 |         T *get() const { return m_ptr; }
 19 |         
 20 |         PassRefPtr<T> release() { PassRefPtr<T> tmp = adoptRef(m_ptr); m_ptr = 0; return tmp; }
 21 | 
 22 |         T& operator*() const { return *m_ptr; }
 23 |         T *operator->() const { return m_ptr; }
 24 |         
 25 |         bool operator!() const { return !m_ptr; }
 26 |     
 27 |         // This conversion operator allows implicit conversion to bool but not to other integer types.
 28 |         typedef T * (RefPtr::*UnspecifiedBoolType)() const;
 29 |         operator UnspecifiedBoolType() const { return m_ptr ? &RefPtr::get : 0; }
 30 |         
 31 |         RefPtr& operator=(const RefPtr&);
 32 |         RefPtr& operator=(T *);
 33 |         RefPtr& operator=(const PassRefPtr<T>&);
 34 |         template <typename U> RefPtr& operator=(const RefPtr<U>&);
 35 |         template <typename U> RefPtr& operator=(const PassRefPtr<U>&);
 36 | 
 37 |         void swap(RefPtr&);
 38 | 
 39 |     private:
 40 |         T *m_ptr;
 41 |     };
 42 |     
 43 |     template <typename T> template <typename U> inline RefPtr<T>::RefPtr(const PassRefPtr<U>& o)
 44 |         : m_ptr(o.release())
 45 |     {
 46 |     }
 47 | 
 48 |     template <typename T> inline RefPtr<T>& RefPtr<T>::operator=(const RefPtr<T>& o)
 49 |     {
 50 |         T* optr = o.get();
 51 |         if (optr)
 52 |             optr->ref();
 53 |         T* ptr = m_ptr;
 54 |         m_ptr = optr;
 55 |         if (ptr)
 56 |             ptr->deref();
 57 |         return *this;
 58 |     }
 59 |     
 60 |     template <typename T> template <typename U> inline RefPtr<T>& RefPtr<T>::operator=(const RefPtr<U>& o)
 61 |     {
 62 |         T* optr = o.get();
 63 |         if (optr)
 64 |             optr->ref();
 65 |         T* ptr = m_ptr;
 66 |         m_ptr = optr;
 67 |         if (ptr)
 68 |             ptr->deref();
 69 |         return *this;
 70 |     }
 71 |     
 72 |     template <typename T> inline RefPtr<T>& RefPtr<T>::operator=(T* optr)
 73 |     {
 74 |         if (optr)
 75 |             optr->ref();
 76 |         T* ptr = m_ptr;
 77 |         m_ptr = optr;
 78 |         if (ptr)
 79 |             ptr->deref();
 80 |         return *this;
 81 |     }
 82 | 
 83 |     template <typename T> inline RefPtr<T>& RefPtr<T>::operator=(const PassRefPtr<T>& o)
 84 |     {
 85 |         T* ptr = m_ptr;
 86 |         m_ptr = o.release();
 87 |         if (ptr)
 88 |             ptr->deref();
 89 |         return *this;
 90 |     }
 91 | 
 92 |     template <typename T> template <typename U> inline RefPtr<T>& RefPtr<T>::operator=(const PassRefPtr<U>& o)
 93 |     {
 94 |         T* ptr = m_ptr;
 95 |         m_ptr = o.release();
 96 |         if (ptr)
 97 |             ptr->deref();
 98 |         return *this;
 99 |     }
100 | 
101 |     template <class T> inline void RefPtr<T>::swap(RefPtr<T>& o)
102 |     {
103 |         std::swap(m_ptr, o.m_ptr);
104 |     }
105 | 
106 |     template <class T> inline void swap(RefPtr<T>& a, RefPtr<T>& b)
107 |     {
108 |         a.swap(b);
109 |     }
110 | 
111 |     template <typename T, typename U> inline bool operator==(const RefPtr<T>& a, const RefPtr<U>& b)
112 |     { 
113 |         return a.get() == b.get(); 
114 |     }
115 | 
116 |     template <typename T, typename U> inline bool operator==(const RefPtr<T>& a, U* b)
117 |     { 
118 |         return a.get() == b; 
119 |     }
120 |     
121 |     template <typename T, typename U> inline bool operator==(T* a, const RefPtr<U>& b) 
122 |     {
123 |         return a == b.get(); 
124 |     }
125 |     
126 |     template <typename T, typename U> inline bool operator!=(const RefPtr<T>& a, const RefPtr<U>& b)
127 |     { 
128 |         return a.get() != b.get(); 
129 |     }
130 | 
131 |     template <typename T, typename U> inline bool operator!=(const RefPtr<T>& a, U* b)
132 |     {
133 |         return a.get() != b; 
134 |     }
135 | 
136 |     template <typename T, typename U> inline bool operator!=(T* a, const RefPtr<U>& b)
137 |     { 
138 |         return a != b.get(); 
139 |     }
140 |     
141 |     template <typename T, typename U> inline RefPtr<T> static_pointer_cast(const RefPtr<U>& p)
142 |     { 
143 |         return RefPtr<T>(static_cast<T *>(p.get())); 
144 |     }
145 | 
146 |     template <typename T, typename U> inline RefPtr<T> const_pointer_cast(const RefPtr<U>& p)
147 |     { 
148 |         return RefPtr<T>(const_cast<T *>(p.get())); 
149 |     }
150 | 
151 |     template <typename T> inline T* getPtr(const RefPtr<T>& p)
152 |     {
153 |         return p.get();
154 |     }
155 | 
156 | } // namespace WTF
157 | 
158 | using WTF::RefPtr;
159 | using WTF::static_pointer_cast;
160 | using WTF::const_pointer_cast;


--------------------------------------------------------------------------------
/WebCoreSupport/RenderText.h:
--------------------------------------------------------------------------------
 1 | #import "StringImpl.h"
 2 | 
 3 | using namespace WTF;
 4 | 
 5 | namespace WebCore {
 6 | 
 7 | class InlineTextBox;
 8 | 
 9 | class RenderText {
10 | 	private:
11 | 		String m_text;
12 | 	public:
13 | 		StringImpl* text() const { return m_text.impl(); }
14 | 	};
15 | } // namespace WebCore


--------------------------------------------------------------------------------
/WebCoreSupport/StringImpl.h:
--------------------------------------------------------------------------------
  1 | #include <unicode/uchar.h>
  2 | #include <unicode/utf16.h>
  3 | #include <objc/objc.h>
  4 | #import "RefPtr.h"
  5 | #import "RefCounted.h"
  6 | 
  7 | typedef unsigned char LChar;
  8 | 
  9 | namespace WTF {
 10 | 	class StringImplBase {
 11 | 	public:
 12 |     	bool isStringImpl() { return (m_refCountAndFlags & s_refCountInvalidForStringImpl) != s_refCountInvalidForStringImpl; }
 13 |     	unsigned length() const { return m_length; }
 14 |     	void ref() { m_refCountAndFlags += s_refCountIncrement; }
 15 |     protected:
 16 | 		enum BufferOwnership {
 17 | 			BufferInternal,
 18 | 			BufferOwned,
 19 | 			BufferSubstring,
 20 | 			BufferShared,
 21 | 		};
 22 | 
 23 | 		StringImplBase() { }
 24 | 		
 25 | 		static const unsigned s_refCountMask = 0xFFFFFF80;
 26 | 		static const unsigned s_refCountIncrement = 0x80;
 27 | 		static const unsigned s_refCountFlagStatic = 0x40;
 28 | 		static const unsigned s_refCountFlagHasTerminatingNullCharacter = 0x20;
 29 | 		static const unsigned s_refCountFlagIsAtomic = 0x10;
 30 | 		static const unsigned s_refCountFlagShouldReportedCost = 0x8;
 31 | 		static const unsigned s_refCountFlagIsIdentifier = 0x4;
 32 | 		static const unsigned s_refCountMaskBufferOwnership = 0x3;
 33 | 		static const unsigned s_refCountInvalidForStringImpl = s_refCountFlagStatic | s_refCountFlagShouldReportedCost;
 34 | 
 35 | 		unsigned m_refCountAndFlags;
 36 | 		unsigned m_length;
 37 | 	};
 38 | };
 39 | 
 40 | namespace WTF {
 41 | 	class StringImpl : public StringImplBase {
 42 | 	public:
 43 | 		CFStringRef createCFString();
 44 | 		UChar operator[](unsigned i) { return m_data[i]; }
 45 | 		//operator NSString*();
 46 | 		const UChar* characters() const { return m_data; }
 47 | 		bool is8Bit() const { return m_hashAndFlags & s_hashFlag8BitBuffer; }
 48 | 		const LChar* characters8() const { return m_data8; }
 49 | 		const UChar* characters16() const { return m_data16; }
 50 | 		void deref() { m_refCountAndFlags -= s_refCountIncrement; if (!(m_refCountAndFlags & (s_refCountMask | s_refCountFlagStatic))) delete this; }
 51 | 		unsigned length() const { return m_length; }
 52 | 	private:
 53 | 		static const unsigned s_hashFlag8BitBuffer = 1u << 3;
 54 | 		const UChar* m_data;
 55 | 		unsigned m_refCount;
 56 | 		unsigned m_length;
 57 | 		union {
 58 | 			const LChar* m_data8;
 59 | 			const UChar* m_data16;
 60 | 		};
 61 | 		mutable unsigned m_hashAndFlags;
 62 | 	};
 63 | 	
 64 | 	bool equal(const StringImpl*, const StringImpl*);
 65 | 	bool equal(const StringImpl*, const char*);
 66 | 	inline bool equal(const char* a, StringImpl* b) { return equal(b, a); }
 67 | 
 68 | 	bool equalIgnoringCase(StringImpl*, StringImpl*);
 69 | 	bool equalIgnoringCase(StringImpl*, const char*);
 70 | 	inline bool equalIgnoringCase(const char* a, StringImpl* b) { return equalIgnoringCase(b, a); }
 71 | 	bool equalIgnoringCase(const UChar* a, const char* b, unsigned length);
 72 | 	inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, a, length); }
 73 | 	
 74 | 	int codePointCompare(const StringImpl*, const StringImpl*);
 75 | };
 76 | 
 77 | template<typename Type>
 78 | bool isPointerTypeAlignmentOkay(Type* ptr)
 79 | {
 80 |     return !(reinterpret_cast<intptr_t>(ptr) % __alignof__(Type));
 81 | }
 82 | 
 83 | template<typename TypePtr>
 84 | TypePtr reinterpret_cast_ptr(void* ptr)
 85 | {
 86 |     return reinterpret_cast<TypePtr>(ptr);
 87 | }
 88 | 
 89 | template<typename TypePtr>
 90 | TypePtr reinterpret_cast_ptr(const void* ptr)
 91 | {
 92 |     return reinterpret_cast<TypePtr>(ptr);
 93 | }
 94 | 
 95 | namespace WTF {
 96 | 	class CStringBuffer : public RefCounted<CStringBuffer> {
 97 | 		public:
 98 | 			const char* data() { return mutableData(); }
 99 | 			size_t length() const { return m_length; }
100 | 		private:
101 | 			friend class CString;
102 | 			CStringBuffer(size_t length) : m_length(length) { }
103 | 		char* mutableData() { return reinterpret_cast_ptr<char*>(this + 1); }
104 | 	const size_t m_length;
105 | 	};
106 | 	class CString {
107 | 		public:
108 | 			CString(CStringBuffer* buffer) : m_buffer(buffer) { }
109 | 			const char *data() { return m_buffer ? m_buffer->data() : 0; }
110 | 		private:
111 | 			RefPtr<CStringBuffer> m_buffer;
112 | 	};
113 | };
114 | 
115 | namespace WTF {
116 | 
117 | 	typedef enum {
118 | 		LenientConversion,
119 | 		StrictConversion,
120 | 		StrictConversionReplacingUnpairedSurrogatesWithFFFD,
121 | 	} ConversionMode;
122 | 	
123 | 	class String {
124 | 	public:
125 | 		String() { }
126 | 		String(StringImpl* impl) : m_impl(impl) { }
127 | 		String(RefPtr<StringImpl> impl) : m_impl(impl) { }
128 | 		void swap(String& o) { m_impl.swap(o.m_impl); }
129 | 		bool isEmpty() const { return !m_impl || !m_impl->length(); }
130 | 		bool isNull() const { return !m_impl; }
131 | 		bool is8Bit() const { return m_impl->is8Bit(); }
132 | 		StringImpl* impl() const { return m_impl.get(); }
133 | 		unsigned length() const {
134 |         	if (!m_impl)
135 |             	return 0;
136 |         	return m_impl->length();
137 |     	}
138 | 		const LChar* characters8() const {
139 | 			if (!m_impl)
140 | 				return 0;
141 | 			return m_impl->characters8();
142 | 		}
143 | 		const UChar* characters16() const {
144 | 			if (!m_impl)
145 | 				return 0;
146 | 			return m_impl->characters16();
147 | 		}
148 |     	const UChar* characters() const {
149 |         	if (!m_impl)
150 |             	return 0;
151 |         	return m_impl->characters();
152 |     	}
153 | 		UChar operator[](unsigned index) const {
154 |         	if (!m_impl || index >= m_impl->length())
155 |             	return 0;
156 |         	return m_impl->characters()[index];
157 |     	}
158 |     	static String number(short);
159 | 		static String number(unsigned short);
160 | 		static String number(int);
161 | 		static String number(unsigned);
162 | 		static String number(long);
163 | 		static String number(unsigned long);
164 | 		static String number(long long);
165 | 		static String number(unsigned long long);
166 | 		static String number(double);
167 | 		
168 | 		void append(const String&);
169 | 		void append(char);
170 | 		void append(UChar);
171 | 		void append(const UChar*, unsigned length);
172 | 		void insert(const String&, unsigned pos);
173 | 		void insert(const UChar*, unsigned length, unsigned pos);
174 | 		
175 | 		CString utf8(ConversionMode = LenientConversion) const;
176 | 	private:
177 |     	RefPtr<StringImpl> m_impl;
178 | 	};
179 | };
180 | 
181 | namespace WTF {
182 | 	class AtomicString {
183 | 		public:
184 | 			bool isEmpty() const { return m_string.isEmpty(); };
185 | 			const String& string() const { return m_string; };
186 | 		private:
187 | 			String m_string;
188 | 	};
189 | };
190 | 
191 | namespace WTF {
192 | 	class StringBuilder {
193 | 		public:
194 | 			void append(const UChar*, unsigned);
195 | 			void append(const LChar*, unsigned);
196 | 			void append(const char* characters, unsigned length) { append(reinterpret_cast<const LChar*>(characters), length); }
197 | 			void append(const char* characters) {
198 | 				if (characters)
199 | 					append(characters, strlen(characters));
200 | 			}
201 | 			String toString() {
202 | 				shrinkToFit();
203 | 				if (m_string.isNull())
204 | 					reifyString();
205 | 				return m_string;
206 | 			}
207 | 		private:
208 | 			void shrinkToFit();
209 | 			void reifyString() const;
210 | 			mutable String m_string;
211 | 			unsigned m_length;
212 | 	};
213 | };
214 | 
215 | namespace WebCore {
216 | 	class TextBreakIterator;
217 | };
218 | 
219 | namespace WTF {
220 | 	class StringView {
221 | 		public:
222 | 			StringView();
223 | 			StringView(const String&);
224 | 			StringView(const StringImpl&);
225 | 			StringView(const StringImpl*);
226 | 			class UpconvertedCharacters;
227 | 			const UChar* characters16() const;
228 | 			bool is8Bit() const;
229 | 			UpconvertedCharacters upconvertedCharacters() const;
230 | 		private:
231 | 			const void *m_characters { nullptr };
232 | 			unsigned m_length { 0 };
233 | 	};
234 | 	static const unsigned is16BitStringFlag = 1u << 31;
235 | 	inline bool StringView::is8Bit() const {
236 | 		return !(m_length & is16BitStringFlag);
237 | 	}
238 | 	class StringView::UpconvertedCharacters {
239 | 		public:
240 | 			explicit UpconvertedCharacters(const StringView&);
241 | 			operator const UChar*() const { return m_characters; };
242 | 			const UChar* get() const { return m_characters; };
243 | 		private:
244 | 			//Vector<UChar, 32> m_upconvertedCharacters;
245 | 			const UChar* m_characters;
246 | 	};
247 | 	inline const UChar* StringView::characters16() const {
248 | 		return static_cast<const UChar*>(m_characters);
249 | 	}
250 | 	inline StringView::UpconvertedCharacters StringView::upconvertedCharacters() const {
251 | 		return UpconvertedCharacters(*this);
252 | 	}
253 | 	inline StringView::UpconvertedCharacters::UpconvertedCharacters(const StringView& string) {
254 | 		if (!string.is8Bit()) {
255 | 			m_characters = string.characters16();
256 | 			return;
257 | 		}
258 | 	}
259 | 	inline StringView::StringView(const String& string) {
260 | 		if (!string.impl()) {
261 | 			m_characters = nullptr;
262 | 			m_length = 0;
263 | 			return;
264 | 		}
265 | 		if (string.is8Bit()) {
266 | 			return;
267 | 		}
268 | 	}
269 | };


--------------------------------------------------------------------------------
/WebCoreSupport/UAX.h:
--------------------------------------------------------------------------------
  1 | #define ADDITIONAL_EMOJI_SUPPORT 1
  2 | 
  3 | enum LineBreakIteratorMode {
  4 |     LineBreakIteratorModeUAX14,
  5 |     LineBreakIteratorModeUAX14Loose,
  6 |     LineBreakIteratorModeUAX14Normal,
  7 |     LineBreakIteratorModeUAX14Strict,
  8 | };
  9 | 
 10 | static const char* uax14Prologue =
 11 | 	    "!!chain;"
 12 | 	    "!!LBCMNoChain;"
 13 | 	    "!!lookAheadHardBreak;";
 14 | 
 15 | 	static const char* uax14AssignmentsBefore =
 16 | 	    // explicitly enumerate $CJ since ICU versions prior to 49 don't support :LineBreak=Conditional_Japanese_Starter:
 17 | 	    "$CJ = ["
 18 | 	#if (U_ICU_VERSION_MAJOR_NUM >= 4) && (U_ICU_VERSION_MINOR_NUM >= 9)
 19 | 	    ":LineBreak=Conditional_Japanese_Starter:"
 20 | 	#else
 21 | 	    "\\u3041\\u3043\\u3045\\u3047\\u3049\\u3063\\u3083\\u3085\\u3087\\u308E\\u3095\\u3096\\u30A1\\u30A3\\u30A5\\u30A7"
 22 | 	    "\\u30A9\\u30C3\\u30E3\\u30E5\\u30E7\\u30EE\\u30F5\\u30F6\\u30FC"
 23 | 	    "\\u31F0\\u31F1\\u31F2\\u31F3\\u31F4\\u31F5\\u31F6\\u31F7\\u31F8\\u31F9\\u31FA\\u31FB\\u31FC\\u31FD\\u31FE\\u31FF"
 24 | 	    "\\uFF67\\uFF68\\uFF69\\uFF6A\\uFF6B\\uFF6C\\uFF6D\\uFF6E\\uFF6F\\uFF70"
 25 | 	#endif
 26 | 	    "];";
 27 | 
 28 | 	static const char* uax14AssignmentsCustomLooseCJK =
 29 | 	    "$BA_SUB = [\\u2010\\u2013];"
 30 | 	    "$EX_SUB = [\\u0021\\u003F\\uFF01\\uFF1F];"
 31 | 	    "$ID_SUB = '';"
 32 | 	    "$IN_SUB = [\\u2025\\u2026];"
 33 | 	    "$IS_SUB = [\\u003A\\u003B];"
 34 | 	    "$NS_SUB = [\\u203C\\u2047\\u2048\\u2049\\u3005\\u301C\\u303B\\u309D\\u309E\\u30A0\\u30FB\\u30FD\\u30FE\\uFF1A\\uFF1B\\uFF65];"
 35 | 	    "$PO_SUB = [\\u0025\\u00A2\\u00B0\\u2030\\u2032\\u2033\\u2103\\uFF05\\uFFE0];"
 36 | 	    "$PR_SUB = [\\u0024\\u00A3\\u00A5\\u20AC\\u2116\\uFF04\\uFFE1\\uFFE5];"
 37 | 	    "$ID_ADD = [$CJ $BA_SUB $EX_SUB $IN_SUB $IS_SUB $NS_SUB $PO_SUB $PR_SUB];"
 38 | 	    "$NS_ADD = '';";
 39 | 
 40 | 	static const char* uax14AssignmentsCustomLooseNonCJK =
 41 | 	    "$BA_SUB = '';"
 42 | 	    "$EX_SUB = '';"
 43 | 	    "$ID_SUB = '';"
 44 | 	    "$IN_SUB = [\\u2025\\u2026];"
 45 | 	    "$IS_SUB = '';"
 46 | 	    "$NS_SUB = [\\u3005\\u303B\\u309D\\u309E\\u30FD\\u30FE];"
 47 | 	    "$PO_SUB = '';"
 48 | 	    "$PR_SUB = '';"
 49 | 	    "$ID_ADD = [$CJ $IN_SUB $NS_SUB];"
 50 | 	    "$NS_ADD = '';";
 51 | 
 52 | 	static const char* uax14AssignmentsCustomNormalCJK =
 53 | 	    "$BA_SUB = [\\u2010\\u2013];"
 54 | 	    "$EX_SUB = '';"
 55 | 	    "$IN_SUB = '';"
 56 | 	    "$ID_SUB = '';"
 57 | 	    "$IS_SUB = '';"
 58 | 	    "$NS_SUB = [\\u301C\\u30A0];"
 59 | 	    "$PO_SUB = '';"
 60 | 	    "$PR_SUB = '';"
 61 | 	    "$ID_ADD = [$CJ $BA_SUB $NS_SUB];"
 62 | 	    "$NS_ADD = '';";
 63 | 
 64 | 	static const char* uax14AssignmentsCustomNormalNonCJK =
 65 | 	    "$BA_SUB = '';"
 66 | 	    "$EX_SUB = '';"
 67 | 	    "$ID_SUB = '';"
 68 | 	    "$IN_SUB = '';"
 69 | 	    "$IS_SUB = '';"
 70 | 	    "$NS_SUB = '';"
 71 | 	    "$PO_SUB = '';"
 72 | 	    "$PR_SUB = '';"
 73 | 	    "$ID_ADD = [$CJ];"
 74 | 	    "$NS_ADD = '';";
 75 | 
 76 | 	static const char* uax14AssignmentsCustomStrictCJK =
 77 | 	    "$BA_SUB = '';"
 78 | 	    "$EX_SUB = '';"
 79 | 	    "$ID_SUB = '';"
 80 | 	    "$IN_SUB = '';"
 81 | 	    "$IS_SUB = '';"
 82 | 	    "$NS_SUB = '';"
 83 | 	    "$PO_SUB = '';"
 84 | 	    "$PR_SUB = '';"
 85 | 	    "$ID_ADD = '';"
 86 | 	    "$NS_ADD = [$CJ];";
 87 | 
 88 | 	#define uax14AssignmentsCustomStrictNonCJK      uax14AssignmentsCustomStrictCJK
 89 | 	#define uax14AssignmentsCustomDefaultCJK        uax14AssignmentsCustomNormalCJK
 90 | 	#define uax14AssignmentsCustomDefaultNonCJK     uax14AssignmentsCustomStrictNonCJK
 91 | 
 92 | 	static const char* uax14AssignmentsAfter =
 93 | 	    "$AI = [:LineBreak = Ambiguous:];"
 94 | 	    "$AL = [:LineBreak = Alphabetic:];"
 95 | 	    "$BA = [[:LineBreak = Break_After:] - $BA_SUB];"
 96 | 	    "$BB = [:LineBreak = Break_Before:];"
 97 | 	    "$BK = [:LineBreak = Mandatory_Break:];"
 98 | 	    "$B2 = [:LineBreak = Break_Both:];"
 99 | 	    "$CB = [:LineBreak = Contingent_Break:];"
100 | 	    "$CL = [:LineBreak = Close_Punctuation:];"
101 | 	    "$CM = [:LineBreak = Combining_Mark:];"
102 | 	    "$CP = [:LineBreak = Close_Parenthesis:];"
103 | 	    "$CR = [:LineBreak = Carriage_Return:];"
104 | 	    "$EX = [[:LineBreak = Exclamation:] - $EX_SUB];"
105 | 	    "$GL = [:LineBreak = Glue:];"
106 | 	#if (U_ICU_VERSION_MAJOR_NUM >= 4) && (U_ICU_VERSION_MINOR_NUM >= 9)
107 | 	    "$HL = [:LineBreak = Hebrew_Letter:];"
108 | 	#else
109 | 	    "$HL = [[:Hebrew:] & [:Letter:]];"
110 | 	#endif
111 | 	    "$HY = [:LineBreak = Hyphen:];"
112 | 	    "$H2 = [:LineBreak = H2:];"
113 | 	    "$H3 = [:LineBreak = H3:];"
114 | 	    "$ID = [[[[:LineBreak = Ideographic:] - $CJ] $ID_ADD] - $ID_SUB];"
115 | 	    "$IN = [[:LineBreak = Inseparable:] - $IN_SUB];"
116 | 	    "$IS = [[:LineBreak = Infix_Numeric:] - $IS_SUB];"
117 | 	    "$JL = [:LineBreak = JL:];"
118 | 	    "$JV = [:LineBreak = JV:];"
119 | 	    "$JT = [:LineBreak = JT:];"
120 | 	    "$LF = [:LineBreak = Line_Feed:];"
121 | 	    "$NL = [:LineBreak = Next_Line:];"
122 | 	    "$NS = [[[[:LineBreak = Nonstarter:] - $CJ] $NS_ADD] - $NS_SUB];"
123 | 	    "$NU = [:LineBreak = Numeric:];"
124 | 	    "$OP = [:LineBreak = Open_Punctuation:];"
125 | 	    "$PO = [[:LineBreak = Postfix_Numeric:] - $PO_SUB];"
126 | 	    "$PR = [[:LineBreak = Prefix_Numeric:] - $PR_SUB];"
127 | 	    "$QU = [:LineBreak = Quotation:];"
128 | 	    "$RI = [\\U0001F1E6-\\U0001F1FF];"
129 | 	    "$SA = [:LineBreak = Complex_Context:];"
130 | 	    "$SG = [:LineBreak = Surrogate:];"
131 | 	    "$SP = [:LineBreak = Space:];"
132 | 	    "$SY = [:LineBreak = Break_Symbols:];"
133 | 	    "$WJ = [:LineBreak = Word_Joiner:];"
134 | 	    "$XX = [:LineBreak = Unknown:];"
135 | 	    "$ZW = [:LineBreak = ZWSpace:];"
136 | 	    "$ZWJ = \\u200D;"
137 | 	    "$EmojiVar = \\uFE0F;"
138 | 	#if ADDITIONAL_EMOJI_SUPPORT
139 | 	    "$EmojiForSeqs = [\\u2764 \\U0001F441 \\U0001F466-\\U0001F469 \\U0001F48B \\U0001F5E8];"
140 | 	    "$EmojiForMods = [\\u261D \\u26F9 \\u270A-\\u270D \\U0001F385 \\U0001F3C3-\\U0001F3C4 \\U0001F3CA \\U0001F3CB \\U0001F442-\\U0001F443 \\U0001F446-\\U0001F450 \\U0001F466-\\U0001F469 \\U0001F46E-\\U0001F478 \\U0001F47C \\U0001F481-\\U0001F483 \\U0001F485-\\U0001F487 \\U0001F4AA \\U0001F575 \\U0001F590 \\U0001F595 \\U0001F596 \\U0001F645-\\U0001F647 \\U0001F64B-\\U0001F64F \\U0001F6A3 \\U0001F6B4-\\U0001F6B6 \\U0001F6C0 \\U0001F918] ;" // Emoji that take Fitzpatrick modifiers
141 | 	#else
142 | 	    "$EmojiForSeqs = [\\u2764 \\U0001F466-\\U0001F469 \\U0001F48B];"
143 | 	    "$EmojiForMods = [\\u261D \\u270A-\\u270C \\U0001F385 \\U0001F3C3-\\U0001F3C4 \\U0001F3C7 \\U0001F3CA \\U0001F442-\\U0001F443 \\U0001F446-\\U0001F450 \\U0001F466-\\U0001F469 \\U0001F46E-\\U0001F478 \\U0001F47C \\U0001F481-\\U0001F483 \\U0001F485-\\U0001F487 \\U0001F4AA \\U0001F596 \\U0001F645-\\U0001F647 \\U0001F64B-\\U0001F64F \\U0001F6A3 \\U0001F6B4-\\U0001F6B6 \\U0001F6C0] ;" // Emoji that take Fitzpatrick modifiers
144 | 	#endif
145 | 	    "$EmojiMods = [\\U0001F3FB-\\U0001F3FF];"
146 | 	    "$dictionary = [:LineBreak = Complex_Context:];"
147 | 	    "$ALPlus = [$AL $AI $SA $SG $XX];"
148 | 	    "$ALcm = $ALPlus $CM*;"
149 | 	    "$BAcm = $BA $CM*;"
150 | 	    "$BBcm = $BB $CM*;"
151 | 	    "$B2cm = $B2 $CM*;"
152 | 	    "$CLcm = $CL $CM*;"
153 | 	    "$CPcm = $CP $CM*;"
154 | 	    "$EXcm = $EX $CM*;"
155 | 	    "$GLcm = $GL $CM*;"
156 | 	    "$HLcm = $HL $CM*;"
157 | 	    "$HYcm = $HY $CM*;"
158 | 	    "$H2cm = $H2 $CM*;"
159 | 	    "$H3cm = $H3 $CM*;"
160 | 	    "$IDcm = $ID $CM*;"
161 | 	    "$INcm = $IN $CM*;"
162 | 	    "$IScm = $IS $CM*;"
163 | 	    "$JLcm = $JL $CM*;"
164 | 	    "$JVcm = $JV $CM*;"
165 | 	    "$JTcm = $JT $CM*;"
166 | 	    "$NScm = $NS $CM*;"
167 | 	    "$NUcm = $NU $CM*;"
168 | 	    "$OPcm = $OP $CM*;"
169 | 	    "$POcm = $PO $CM*;"
170 | 	    "$PRcm = $PR $CM*;"
171 | 	    "$QUcm = $QU $CM*;"
172 | 	    "$RIcm = $RI $CM*;"
173 | 	    "$SYcm = $SY $CM*;"
174 | 	    "$WJcm = $WJ $CM*;";
175 | 
176 | 	static const char* uax14Forward =
177 | 	    "!!forward;"
178 | 	    "$CAN_CM = [^$SP $BK $CR $LF $NL $ZW $CM];"
179 | 	    "$CANT_CM = [$SP $BK $CR $LF $NL $ZW $CM];"
180 | 	    "$AL_FOLLOW_NOCM = [$BK $CR $LF $NL $ZW $SP];"
181 | 	    "$AL_FOLLOW_CM = [$CL $CP $EX $HL $IS $SY $WJ $GL $OP $QU $BA $HY $NS $IN $NU $ALPlus];"
182 | 	    "$AL_FOLLOW = [$AL_FOLLOW_NOCM $AL_FOLLOW_CM];"
183 | 	    "$LB4Breaks = [$BK $CR $LF $NL];"
184 | 	    "$LB4NonBreaks = [^$BK $CR $LF $NL];"
185 | 	    "$LB8Breaks = [$LB4Breaks $ZW];"
186 | 	    "$LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]];"
187 | 	    "$LB18NonBreaks = [$LB8NonBreaks - [$SP]];"
188 | 	    "$LB18Breaks = [$LB8Breaks $SP];"
189 | 	    "$LB20NonBreaks = [$LB18NonBreaks - $CB];"
190 | 	    "$ALPlus $CM+;"
191 | 	    "$BA $CM+;"
192 | 	    "$BB $CM+;"
193 | 	    "$B2 $CM+;"
194 | 	    "$CL $CM+;"
195 | 	    "$CP $CM+;"
196 | 	    "$EX $CM+;"
197 | 	    "$GL $CM+;"
198 | 	    "$HL $CM+;"
199 | 	    "$HY $CM+;"
200 | 	    "$H2 $CM+;"
201 | 	    "$H3 $CM+;"
202 | 	    "$ID $CM+;"
203 | 	    "$IN $CM+;"
204 | 	    "$IS $CM+;"
205 | 	    "$JL $CM+;"
206 | 	    "$JV $CM+;"
207 | 	    "$JT $CM+;"
208 | 	    "$NS $CM+;"
209 | 	    "$NU $CM+;"
210 | 	    "$OP $CM+;"
211 | 	    "$PO $CM+;"
212 | 	    "$PR $CM+;"
213 | 	    "$QU $CM+;"
214 | 	    "$RI $CM+;"
215 | 	    "$SY $CM+;"
216 | 	    "$WJ $CM+;"
217 | 	    "$CR $LF {100};"
218 | 	    "$LB4NonBreaks? $LB4Breaks {100};"
219 | 	    "$CAN_CM $CM* $LB4Breaks {100};"
220 | 	    "$CM+ $LB4Breaks {100};"
221 | 	    "$LB4NonBreaks [$SP $ZW];"
222 | 	    "$CAN_CM $CM* [$SP $ZW];"
223 | 	    "$CM+ [$SP $ZW];"
224 | 	    "$EmojiForSeqs $EmojiVar? $EmojiMods? $ZWJ $EmojiForSeqs;"
225 | 	    "$CAN_CM $CM+;"
226 | 	    "$CM+;"
227 | 	    "$CAN_CM $CM* $WJcm;"
228 | 	    "$LB8NonBreaks $WJcm;"
229 | 	    "$CM+ $WJcm;"
230 | 	    "$WJcm $CANT_CM;"
231 | 	    "$WJcm $CAN_CM $CM*;"
232 | 	    "$GLcm $CAN_CM $CM*;"
233 | 	    "$GLcm $CANT_CM;"
234 | 	    "[[$LB8NonBreaks] - [$SP $BA $HY]] $CM* $GLcm;"
235 | 	    "$CM+ GLcm;"
236 | 	    "$LB8NonBreaks $CL;"
237 | 	    "$CAN_CM $CM* $CL;"
238 | 	    "$CM+ $CL;"
239 | 	    "$LB8NonBreaks $CP;"
240 | 	    "$CAN_CM $CM* $CP;"
241 | 	    "$CM+ $CP;"
242 | 	    "$LB8NonBreaks $EX;"
243 | 	    "$CAN_CM $CM* $EX;"
244 | 	    "$CM+ $EX;"
245 | 	    "$LB8NonBreaks $IS;"
246 | 	    "$CAN_CM $CM* $IS;"
247 | 	    "$CM+ $IS;"
248 | 	    "$LB8NonBreaks $SY;"
249 | 	    "$CAN_CM $CM* $SY;"
250 | 	    "$CM+ $SY;"
251 | 	    "$OPcm $SP* $CAN_CM $CM*;"
252 | 	    "$OPcm $SP* $CANT_CM;"
253 | 	    "$OPcm $SP+ $CM+ $AL_FOLLOW?;"
254 | 	    "$QUcm $SP* $OPcm;"
255 | 	    "($CLcm | $CPcm) $SP* $NScm;"
256 | 	    "$B2cm $SP* $B2cm;"
257 | 	    "$LB18NonBreaks $CM* $QUcm;"
258 | 	    "$CM+ $QUcm;"
259 | 	    "$QUcm .?;"
260 | 	    "$QUcm $LB18NonBreaks $CM*;"
261 | 	    "$LB20NonBreaks $CM* ($BAcm | $HYcm | $NScm); "
262 | 	    "$BBcm [^$CB];"
263 | 	    "$BBcm $LB20NonBreaks $CM*;"
264 | 	    "$HLcm ($HYcm | $BAcm) [^$CB]?;"
265 | 	    "$SYcm $HLcm;"
266 | 	    "($ALcm | $HLcm) $INcm;"
267 | 	    "$CM+ $INcm;"
268 | 	    "$EXcm $INcm;"
269 | 	    "$IDcm $INcm;"
270 | 	    "$INcm $INcm;"
271 | 	    "$NUcm $INcm;"
272 | 	    "$IDcm $POcm;"
273 | 	    "$ALcm $NUcm;"
274 | 	    "$HLcm $NUcm;"
275 | 	    "$CM+ $NUcm;"
276 | 	    "$NUcm $ALcm;"
277 | 	    "$NUcm $HLcm;"
278 | 	    "$PRcm $IDcm;"
279 | 	    "$PRcm ($ALcm | $HLcm);"
280 | 	    "$POcm ($ALcm | $HLcm);"
281 | 	    "($PRcm | $POcm)? ($OPcm | $HYcm)? $NUcm ($NUcm | $SYcm | $IScm)* ($CLcm | $CPcm)? ($PRcm | $POcm)?;"
282 | 	    "$JLcm ($JLcm | $JVcm | $H2cm | $H3cm);"
283 | 	    "($JVcm | $H2cm) ($JVcm | $JTcm);"
284 | 	    "($JTcm | $H3cm) $JTcm;"
285 | 	    "($JLcm | $JVcm | $JTcm | $H2cm | $H3cm) $INcm;"
286 | 	    "($JLcm | $JVcm | $JTcm | $H2cm | $H3cm) $POcm;"
287 | 	    "$PRcm ($JLcm | $JVcm | $JTcm | $H2cm | $H3cm);"
288 | 	    "($ALcm | $HLcm) ($ALcm | $HLcm);"
289 | 	    "$CM+ ($ALcm | $HLcm);"
290 | 	    "$IScm ($ALcm | $HLcm);"
291 | 	    "($ALcm | $HLcm | $NUcm) $OPcm;"
292 | 	    "$CM+ $OPcm;"
293 | 	    "$CPcm ($ALcm | $HLcm | $NUcm);"
294 | 	#if ADDITIONAL_EMOJI_SUPPORT
295 | 	    "$RIcm $RIcm;"
296 | 	#endif
297 | 	    "$EmojiForMods $EmojiVar? $EmojiMods;";
298 | 
299 | 	static const char* uax14Reverse =
300 | 	    "!!reverse;"
301 | 	    "$CM+ $ALPlus;"
302 | 	    "$CM+ $BA;"
303 | 	    "$CM+ $BB;"
304 | 	    "$CM+ $B2;"
305 | 	    "$CM+ $CL;"
306 | 	    "$CM+ $CP;"
307 | 	    "$CM+ $EX;"
308 | 	    "$CM+ $GL;"
309 | 	    "$CM+ $HL;"
310 | 	    "$CM+ $HY;"
311 | 	    "$CM+ $H2;"
312 | 	    "$CM+ $H3;"
313 | 	    "$CM+ $ID;"
314 | 	    "$CM+ $IN;"
315 | 	    "$CM+ $IS;"
316 | 	    "$CM+ $JL;"
317 | 	    "$CM+ $JV;"
318 | 	    "$CM+ $JT;"
319 | 	    "$CM+ $NS;"
320 | 	    "$CM+ $NU;"
321 | 	    "$CM+ $OP;"
322 | 	    "$CM+ $PO;"
323 | 	    "$CM+ $PR;"
324 | 	    "$CM+ $QU;"
325 | 	#if ADDITIONAL_EMOJI_SUPPORT
326 | 	    "$CM+ $RI;"
327 | 	#endif
328 | 	    "$CM+ $SY;"
329 | 	    "$CM+ $WJ;"
330 | 	    "$CM+;"
331 | 	    "$AL_FOLLOW $CM+ / ([$BK $CR $LF $NL $ZW {eof}] | $SP+ $CM+ $SP | $SP+ $CM* ([^$OP $CM $SP] | [$AL {eof}]));"
332 | 	    "[$PR] / $CM+ [$BK $CR $LF $NL $ZW $SP {eof}];"
333 | 	    "$LB4Breaks [$LB4NonBreaks-$CM];"
334 | 	    "$LB4Breaks $CM+ $CAN_CM;"
335 | 	    "$LF $CR;"
336 | 	    "[$SP $ZW] [$LB4NonBreaks-$CM];"
337 | 	    "[$SP $ZW] $CM+ $CAN_CM;"
338 | 	    "$EmojiForSeqs $ZWJ $EmojiMods? $EmojiVar? $EmojiForSeqs;"
339 | 	    "$CM+ $CAN_CM;"
340 | 	    "$CM* $WJ $CM* $CAN_CM;"
341 | 	    "$CM* $WJ [$LB8NonBreaks-$CM];"
342 | 	    "$CANT_CM $CM* $WJ;"
343 | 	    "$CM* $CAN_CM $CM* $WJ;"
344 | 	    "$CM* $GL $CM* [$LB8NonBreaks-[$CM $SP $BA $HY]];"
345 | 	    "$CANT_CM $CM* $GL;"
346 | 	    "$CM* $CAN_CM $CM* $GL;"
347 | 	    "$CL $CM+ $CAN_CM;"
348 | 	    "$CP $CM+ $CAN_CM;"
349 | 	    "$EX $CM+ $CAN_CM;"
350 | 	    "$IS $CM+ $CAN_CM;"
351 | 	    "$SY $CM+ $CAN_CM;"
352 | 	    "$CL [$LB8NonBreaks-$CM];"
353 | 	    "$CP [$LB8NonBreaks-$CM];"
354 | 	    "$EX [$LB8NonBreaks-$CM];"
355 | 	    "$IS [$LB8NonBreaks-$CM];"
356 | 	    "$SY [$LB8NonBreaks-$CM];"
357 | 	    "[$CL $CP $EX $IS $SY] $CM+ $SP+ $CM* $OP; "
358 | 	    "$CM* $CAN_CM $SP* $CM* $OP;"
359 | 	    "$CANT_CM $SP* $CM* $OP;"
360 | 	    "$AL_FOLLOW? $CM+ $SP $SP* $CM* $OP;"
361 | 	    "$AL_FOLLOW_NOCM $CM+ $SP+ $CM* $OP;"
362 | 	    "$CM* $AL_FOLLOW_CM $CM+ $SP+ $CM* $OP;"
363 | 	    "$SY $CM $SP+ $OP;"
364 | 	    "$CM* $OP $SP* $CM* $QU;"
365 | 	    "$CM* $NS $SP* $CM* ($CL | $CP);"
366 | 	    "$CM* $B2 $SP* $CM* $B2;"
367 | 	    "$CM* $QU $CM* $CAN_CM;"
368 | 	    "$CM* $QU $LB18NonBreaks;"
369 | 	    "$CM* $CAN_CM $CM* $QU;"
370 | 	    "$CANT_CM $CM* $QU;"
371 | 	    "$CM* ($BA | $HY | $NS) $CM* [$LB20NonBreaks-$CM];"
372 | 	    "$CM* [$LB20NonBreaks-$CM] $CM* $BB;"
373 | 	    "[^$CB] $CM* $BB;"
374 | 	    "[^$CB] $CM* ($HY | $BA) $CM* $HL;"
375 | 	    "$CM* $HL $CM* $SY;"
376 | 	    "$CM* $IN $CM* ($ALPlus | $HL);"
377 | 	    "$CM* $IN $CM* $EX;"
378 | 	    "$CM* $IN $CM* $ID;"
379 | 	    "$CM* $IN $CM* $IN;"
380 | 	    "$CM* $IN $CM* $NU;"
381 | 	    "$CM* $PO $CM* $ID;"
382 | 	    "$CM* $NU $CM* ($ALPlus | $HL);"
383 | 	    "$CM* ($ALPlus | $HL) $CM* $NU;"
384 | 	    "$CM* $ID $CM* $PR;"
385 | 	    "$CM* ($ALPlus | $HL) $CM* $PR;"
386 | 	    "$CM* ($ALPlus | $HL) $CM* $PO;"
387 | 	    "($CM* ($PR | $PO))? ($CM* ($CL | $CP))? ($CM* ($NU | $IS | $SY))* $CM* $NU ($CM* ($OP | $HY))? ($CM* ($PR | $PO))?;"
388 | 	    "$CM* ($H3 | $H2 | $JV | $JL) $CM* $JL;"
389 | 	    "$CM* ($JT | $JV) $CM* ($H2 | $JV);"
390 | 	    "$CM* $JT $CM* ($H3 | $JT);"
391 | 	    "$CM* $IN $CM* ($H3 | $H2 | $JT | $JV | $JL);"
392 | 	    "$CM* $PO $CM* ($H3 | $H2 | $JT | $JV | $JL);"
393 | 	    "$CM* ($H3 | $H2 | $JT | $JV | $JL) $CM* $PR;"
394 | 	    "$CM* ($ALPlus | $HL) $CM* ($ALPlus | $HL);"
395 | 	    "$CM* ($ALPlus | $HL) $CM* $IS;"
396 | 	    "$CM* $OP $CM* ($ALPlus | $HL | $NU);"
397 | 	    "$CM* ($ALPlus | $HL | $NU) $CM* $CP;"
398 | 	#if ADDITIONAL_EMOJI_SUPPORT
399 | 	    "$CM* $RI $CM* $RI;"
400 | 	#endif
401 | 	    "$EmojiMods $EmojiVar? $EmojiForMods;";
402 | 
403 | 	static const char* uax14SafeForward =
404 | 	    "!!safe_forward;"
405 | 	    "[$CM $OP $QU $CL $CP $B2 $PR $HY $BA $SP $dictionary]+ [^$CM $OP $QU $CL $CP $B2 $PR $HY $BA $dictionary];"
406 | 	    "$dictionary $dictionary;";
407 | 
408 | 	static const char* uax14SafeReverse =
409 | 	    "!!safe_reverse;"
410 | 	    "$CM+ [^$CM $BK $CR $LF $NL $ZW $SP];"
411 | 	    "$CM+ $SP / .;"
412 | 	    "$SP+ $CM* $OP;"
413 | 	    "$SP+ $CM* $QU;"
414 | 	    "$SP+ $CM* ($CL | $CP);"
415 | 	    "$SP+ $CM* $B2;"
416 | 	    "$CM* ($HY | $BA) $CM* $HL;"
417 | 	    "($CM* ($IS | $SY))+ $CM* $NU;"
418 | 	    "($CL | $CP) $CM* ($NU | $IS | $SY);"
419 | 	    "$dictionary $dictionary;";
420 | 


--------------------------------------------------------------------------------
/control:
--------------------------------------------------------------------------------
 1 | Package: com.ps.emojiattributes
 2 | Name: EmojiAttributes
 3 | Depends: firmware (>= 5.1), mobilesubstrate, com.opa334.libundirect (>= 1.1.6)
 4 | Conflicts: com.ps.flexmoji
 5 | Version: 1.0.0
 6 | Architecture: iphoneos-arm
 7 | Description: Various under-the-hood fixes for emoji display.
 8 | Maintainer: PoomSmart
 9 | Author: PoomSmart
10 | Section: Tweaks
11 | Depiction: https://poomsmart.github.io/repo/depictions/emojiattributes.html
12 | SileoDepiction: https://poomsmart.github.io/repo/sileodepictions/emojiattributes.json
13 | 


--------------------------------------------------------------------------------
/copyResources.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Unnecessary as of iOS 10
 3 | 
 4 | if [ -z $1 ];then
 5 |   echo "Runtime version required"
 6 |   exit 1
 7 | fi
 8 | 
 9 | EA_RUNTIME_ROOT=/Library/Developer/CoreSimulator/Profiles/Runtimes/iOS\ ${1}.simruntime/Contents/Resources/RuntimeRoot
10 | EA_BITMAP_NAME=emoji.bitmap
11 | 
12 | sudo cp -v "${PWD}/layout/System/Library/PrivateFrameworks/TextInput.framework/${EA_BITMAP_NAME}" "${EA_RUNTIME_ROOT}/System/Library/PrivateFrameworks/TextInput.framework/"
13 | 


--------------------------------------------------------------------------------
/emojiprops.h:
--------------------------------------------------------------------------------
 1 | // © 2021 and later: Unicode, Inc. and others.
 2 | // License & terms of use: https://www.unicode.org/copyright.html
 3 | 
 4 | // emojiprops.h
 5 | // created: 2021sep03 Markus W. Scherer
 6 | 
 7 | #ifndef __EMOJIPROPS_H__
 8 | #define __EMOJIPROPS_H__
 9 | 
10 | #include <unicode/uchar.h>
11 | #include <unicode/utypes.h>
12 | 
13 | enum {
14 |     // Byte offsets from the start of the data, after the generic header,
15 |     // in ascending order.
16 |     // UCPTrie=CodePointTrie, follows the indexes
17 |     IX_CPTRIE_OFFSET,
18 |     IX_RESERVED1,
19 |     IX_RESERVED2,
20 |     IX_RESERVED3,
21 | 
22 |     // UCharsTrie=CharsTrie
23 |     IX_BASIC_EMOJI_TRIE_OFFSET,
24 |     IX_EMOJI_KEYCAP_SEQUENCE_TRIE_OFFSET,
25 |     IX_RGI_EMOJI_MODIFIER_SEQUENCE_TRIE_OFFSET,
26 |     IX_RGI_EMOJI_FLAG_SEQUENCE_TRIE_OFFSET,
27 |     IX_RGI_EMOJI_TAG_SEQUENCE_TRIE_OFFSET,
28 |     IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET,
29 |     IX_RESERVED10,
30 |     IX_RESERVED11,
31 |     IX_RESERVED12,
32 |     IX_TOTAL_SIZE,
33 | 
34 |     // Not initially byte offsets.
35 |     IX_RESERVED14,
36 |     IX_RESERVED15,
37 |     IX_COUNT  // 16
38 | };
39 | 
40 | // Properties in the code point trie.
41 | enum {
42 |     // https://www.unicode.org/reports/tr51/#Emoji_Properties
43 |     BIT_EMOJI,
44 |     BIT_EMOJI_PRESENTATION,
45 |     BIT_EMOJI_MODIFIER,
46 |     BIT_EMOJI_MODIFIER_BASE,
47 |     BIT_EMOJI_COMPONENT,
48 |     BIT_EXTENDED_PICTOGRAPHIC,
49 |     // https://www.unicode.org/reports/tr51/#Emoji_Sets
50 |     BIT_BASIC_EMOJI
51 | };
52 | 
53 | #endif  // __EMOJIPROPS_H__


--------------------------------------------------------------------------------
/layout/DEBIAN/postinst:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | killall -9 kbd com.apple.WebKit.WebContent || true
4 | 


--------------------------------------------------------------------------------
/layout/DEBIAN/postrm:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | killall -9 kbd com.apple.WebKit.WebContent || true
4 | 


--------------------------------------------------------------------------------
/layout/Library/Application Support/EmojiAttributes/Info.plist:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 3 | <plist version="1.0">
 4 | <dict>
 5 | 	<key>CFBundleDevelopmentRegion</key>
 6 | 	<string>English</string>
 7 | 	<key>CFBundleExecutable</key>
 8 | 	<string>EmojiAttributes</string>
 9 | 	<key>CFBundleIdentifier</key>
10 | 	<string>com.ps.emojiattributes</string>
11 | 	<key>CFBundleInfoDictionaryVersion</key>
12 | 	<string>6.0</string>
13 | 	<key>CFBundlePackageType</key>
14 | 	<string>BNDL</string>
15 | 	<key>CFBundleShortVersionString</key>
16 | 	<string>1.0.0</string>
17 | 	<key>CFBundleSignature</key>
18 | 	<string>????</string>
19 | 	<key>CFBundleVersion</key>
20 | 	<string>1.0</string>
21 | 	<key>NSPrincipalClass</key>
22 | 	<string>EmojiAttributes</string>
23 | </dict>
24 | </plist>
25 | 


--------------------------------------------------------------------------------
/layout/Library/Application Support/EmojiAttributes/emoji.bitmap:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PoomSmart/EmojiAttributes/f2031a851ee0857d22db475b0429ebcb7cce23e1/layout/Library/Application Support/EmojiAttributes/emoji.bitmap


--------------------------------------------------------------------------------
/layout/Library/Application Support/EmojiAttributes/uemoji.icu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PoomSmart/EmojiAttributes/f2031a851ee0857d22db475b0429ebcb7cce23e1/layout/Library/Application Support/EmojiAttributes/uemoji.icu


--------------------------------------------------------------------------------
/unicode/cmemory.h:
--------------------------------------------------------------------------------
1 | #include <string.h>
2 | 
3 | #define U_POINTER_MASK_LSB(ptr, mask) ((uintptr_t)(ptr) & (mask))
4 | 
5 | #define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size)


--------------------------------------------------------------------------------
/unicode/putilimp.h:
--------------------------------------------------------------------------------
  1 | // © 2016 and later: Unicode, Inc. and others.
  2 | // License & terms of use: http://www.unicode.org/copyright.html
  3 | /*
  4 | ******************************************************************************
  5 | *
  6 | *   Copyright (C) 1997-2016, International Business Machines
  7 | *   Corporation and others.  All Rights Reserved.
  8 | *
  9 | ******************************************************************************
 10 | *
 11 | *  FILE NAME : putilimp.h
 12 | *
 13 | *   Date        Name        Description
 14 | *   10/17/04    grhoten     Move internal functions from putil.h to this file.
 15 | ******************************************************************************
 16 | */
 17 | 
 18 | #ifndef PUTILIMP_H
 19 | #define PUTILIMP_H
 20 | 
 21 | #include <unicode/utypes.h>
 22 | #include <unicode/putil.h>
 23 | 
 24 | /**
 25 |  * \def U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
 26 |  * Nearly all CPUs and compilers implement a right-shift of a signed integer
 27 |  * as an Arithmetic Shift Right which copies the sign bit (the Most Significant Bit (MSB))
 28 |  * into the vacated bits (sign extension).
 29 |  * For example, (int32_t)0xfff5fff3>>4 becomes 0xffff5fff and -1>>1=-1.
 30 |  *
 31 |  * This can be useful for storing a signed value in the upper bits
 32 |  * and another bit field in the lower bits.
 33 |  * The signed value can be retrieved by simple right-shifting.
 34 |  *
 35 |  * This is consistent with the Java language.
 36 |  *
 37 |  * However, the C standard allows compilers to implement a right-shift of a signed integer
 38 |  * as a Logical Shift Right which copies a 0 into the vacated bits.
 39 |  * For example, (int32_t)0xfff5fff3>>4 becomes 0x0fff5fff and -1>>1=0x7fffffff.
 40 |  *
 41 |  * Code that depends on the natural behavior should be guarded with this macro,
 42 |  * with an alternate path for unusual platforms.
 43 |  * @internal
 44 |  */
 45 | #ifdef U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
 46 |     /* Use the predefined value. */
 47 | #else
 48 |     /*
 49 |      * Nearly all CPUs & compilers implement a right-shift of a signed integer
 50 |      * as an Arithmetic Shift Right (with sign extension).
 51 |      */
 52 | #   define U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC 1
 53 | #endif
 54 | 
 55 | /** Define this to 1 if your platform supports IEEE 754 floating point,
 56 |    to 0 if it does not. */
 57 | #ifndef IEEE_754
 58 | #   define IEEE_754 1
 59 | #endif
 60 | 
 61 | /**
 62 |  * uintptr_t is an optional part of the standard definitions in stdint.h.
 63 |  * The opengroup.org documentation for stdint.h says
 64 |  * "On XSI-conformant systems, the intptr_t and uintptr_t types are required;
 65 |  * otherwise, they are optional."
 66 |  * We assume that when uintptr_t is defined, UINTPTR_MAX is defined as well.
 67 |  *
 68 |  * Do not use ptrdiff_t since it is signed. size_t is unsigned.
 69 |  */
 70 | /* TODO: This check fails on some z environments. Filed a ticket #9357 for this. */
 71 | #if !defined(__intptr_t_defined) && !defined(UINTPTR_MAX) && (U_PLATFORM != U_PF_OS390)
 72 | typedef size_t uintptr_t;
 73 | #endif
 74 | 
 75 | /*===========================================================================*/
 76 | /** @{ Information about POSIX support                                       */
 77 | /*===========================================================================*/
 78 | 
 79 | #ifdef U_HAVE_NL_LANGINFO_CODESET
 80 |     /* Use the predefined value. */
 81 | #elif U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_ANDROID || U_PLATFORM == U_PF_QNX
 82 | #   define U_HAVE_NL_LANGINFO_CODESET 0
 83 | #else
 84 | #   define U_HAVE_NL_LANGINFO_CODESET 1
 85 | #endif
 86 | 
 87 | #ifdef U_NL_LANGINFO_CODESET
 88 |     /* Use the predefined value. */
 89 | #elif !U_HAVE_NL_LANGINFO_CODESET
 90 | #   define U_NL_LANGINFO_CODESET -1
 91 | #elif U_PLATFORM == U_PF_OS400
 92 |    /* not defined */
 93 | #else
 94 | #   define U_NL_LANGINFO_CODESET CODESET
 95 | #endif
 96 | 
 97 | #if defined(U_TZSET) || defined(U_HAVE_TZSET)
 98 |     /* Use the predefined value. */
 99 | #elif U_PLATFORM_USES_ONLY_WIN32_API
100 |     // UWP doesn't support tzset or environment variables for tz
101 | #if U_PLATFORM_HAS_WINUWP_API == 0
102 | #   define U_TZSET _tzset
103 | #endif
104 | #elif U_PLATFORM == U_PF_OS400
105 |    /* not defined */
106 | #else
107 | #   define U_TZSET tzset
108 | #endif
109 | 
110 | #if defined(U_TIMEZONE) || defined(U_HAVE_TIMEZONE)
111 |     /* Use the predefined value. */
112 | #elif U_PLATFORM == U_PF_ANDROID
113 | #   define U_TIMEZONE timezone
114 | #elif defined(__UCLIBC__)
115 |     // uClibc does not have __timezone or _timezone.
116 | #elif defined(_NEWLIB_VERSION)
117 | #   define U_TIMEZONE _timezone
118 | #elif defined(__GLIBC__)
119 |     // glibc
120 | #   define U_TIMEZONE __timezone
121 | #elif U_PLATFORM_IS_LINUX_BASED
122 |     // not defined
123 | #elif U_PLATFORM_USES_ONLY_WIN32_API
124 | #   define U_TIMEZONE _timezone
125 | #elif U_PLATFORM == U_PF_BSD && !defined(__NetBSD__)
126 |    /* not defined */
127 | #elif U_PLATFORM == U_PF_OS400
128 |    /* not defined */
129 | #elif U_PLATFORM == U_PF_IPHONE
130 |    /* not defined */
131 | #else
132 | #   define U_TIMEZONE timezone
133 | #endif
134 | 
135 | #if defined(U_TZNAME) || defined(U_HAVE_TZNAME)
136 |     /* Use the predefined value. */
137 | #elif U_PLATFORM_USES_ONLY_WIN32_API
138 |     /* not usable on all windows platforms */
139 | #if U_PLATFORM_HAS_WINUWP_API == 0
140 | #   define U_TZNAME _tzname
141 | #endif
142 | #elif U_PLATFORM == U_PF_OS400
143 |    /* not defined */
144 | #else
145 | #   define U_TZNAME tzname
146 | #endif
147 | 
148 | #ifdef U_HAVE_MMAP
149 |     /* Use the predefined value. */
150 | #elif U_PLATFORM_USES_ONLY_WIN32_API
151 | #   define U_HAVE_MMAP 0
152 | #else
153 | #   define U_HAVE_MMAP 1
154 | #endif
155 | 
156 | #ifdef U_HAVE_POPEN
157 |     /* Use the predefined value. */
158 | #elif U_PLATFORM_USES_ONLY_WIN32_API
159 | #   define U_HAVE_POPEN 0
160 | #elif U_PLATFORM == U_PF_OS400
161 | #   define U_HAVE_POPEN 0
162 | #else
163 | #   define U_HAVE_POPEN 1
164 | #endif
165 | 
166 | /**
167 |  * \def U_HAVE_DIRENT_H
168 |  * Defines whether dirent.h is available.
169 |  * @internal
170 |  */
171 | #ifdef U_HAVE_DIRENT_H
172 |     /* Use the predefined value. */
173 | #elif U_PLATFORM_USES_ONLY_WIN32_API
174 | #   define U_HAVE_DIRENT_H 0
175 | #else
176 | #   define U_HAVE_DIRENT_H 1
177 | #endif
178 | 
179 | /** @} */
180 | 
181 | /*===========================================================================*/
182 | /** @{ Programs used by ICU code                                             */
183 | /*===========================================================================*/
184 | 
185 | /**
186 |  * \def U_MAKE_IS_NMAKE
187 |  * Defines whether the "make" program is Windows nmake.
188 |  */
189 | #ifdef U_MAKE_IS_NMAKE
190 |     /* Use the predefined value. */
191 | #elif U_PLATFORM == U_PF_WINDOWS
192 | #   define U_MAKE_IS_NMAKE 1
193 | #else
194 | #   define U_MAKE_IS_NMAKE 0
195 | #endif
196 | 
197 | /** @} */
198 | 
199 | /*==========================================================================*/
200 | /* Platform utilities                                                       */
201 | /*==========================================================================*/
202 | 
203 | /**
204 |  * Platform utilities isolates the platform dependencies of the
205 |  * library.  For each platform which this code is ported to, these
206 |  * functions may have to be re-implemented.
207 |  */
208 | 
209 | /**
210 |  * Floating point utility to determine if a double is Not a Number (NaN).
211 |  * @internal
212 |  */
213 | U_CAPI UBool   U_EXPORT2 uprv_isNaN(double d);
214 | /**
215 |  * Floating point utility to determine if a double has an infinite value.
216 |  * @internal
217 |  */
218 | U_CAPI UBool   U_EXPORT2 uprv_isInfinite(double d);
219 | /**
220 |  * Floating point utility to determine if a double has a positive infinite value.
221 |  * @internal
222 |  */
223 | U_CAPI UBool   U_EXPORT2 uprv_isPositiveInfinity(double d);
224 | /**
225 |  * Floating point utility to determine if a double has a negative infinite value.
226 |  * @internal
227 |  */
228 | U_CAPI UBool   U_EXPORT2 uprv_isNegativeInfinity(double d);
229 | /**
230 |  * Floating point utility that returns a Not a Number (NaN) value.
231 |  * @internal
232 |  */
233 | U_CAPI double  U_EXPORT2 uprv_getNaN(void);
234 | /**
235 |  * Floating point utility that returns an infinite value.
236 |  * @internal
237 |  */
238 | U_CAPI double  U_EXPORT2 uprv_getInfinity(void);
239 | 
240 | /**
241 |  * Floating point utility to truncate a double.
242 |  * @internal
243 |  */
244 | U_CAPI double  U_EXPORT2 uprv_trunc(double d);
245 | /**
246 |  * Floating point utility to calculate the floor of a double.
247 |  * @internal
248 |  */
249 | U_CAPI double  U_EXPORT2 uprv_floor(double d);
250 | /**
251 |  * Floating point utility to calculate the ceiling of a double.
252 |  * @internal
253 |  */
254 | U_CAPI double  U_EXPORT2 uprv_ceil(double d);
255 | /**
256 |  * Floating point utility to calculate the absolute value of a double.
257 |  * @internal
258 |  */
259 | U_CAPI double  U_EXPORT2 uprv_fabs(double d);
260 | /**
261 |  * Floating point utility to calculate the fractional and integer parts of a double.
262 |  * @internal
263 |  */
264 | U_CAPI double  U_EXPORT2 uprv_modf(double d, double* pinteger);
265 | /**
266 |  * Floating point utility to calculate the remainder of a double divided by another double.
267 |  * @internal
268 |  */
269 | U_CAPI double  U_EXPORT2 uprv_fmod(double d, double y);
270 | /**
271 |  * Floating point utility to calculate d to the power of exponent (d^exponent).
272 |  * @internal
273 |  */
274 | U_CAPI double  U_EXPORT2 uprv_pow(double d, double exponent);
275 | /**
276 |  * Floating point utility to calculate 10 to the power of exponent (10^exponent).
277 |  * @internal
278 |  */
279 | U_CAPI double  U_EXPORT2 uprv_pow10(int32_t exponent);
280 | /**
281 |  * Floating point utility to calculate the maximum value of two doubles.
282 |  * @internal
283 |  */
284 | U_CAPI double  U_EXPORT2 uprv_fmax(double d, double y);
285 | /**
286 |  * Floating point utility to calculate the minimum value of two doubles.
287 |  * @internal
288 |  */
289 | U_CAPI double  U_EXPORT2 uprv_fmin(double d, double y);
290 | /**
291 |  * Private utility to calculate the maximum value of two integers.
292 |  * @internal
293 |  */
294 | U_CAPI int32_t U_EXPORT2 uprv_max(int32_t d, int32_t y);
295 | /**
296 |  * Private utility to calculate the minimum value of two integers.
297 |  * @internal
298 |  */
299 | U_CAPI int32_t U_EXPORT2 uprv_min(int32_t d, int32_t y);
300 | 
301 | #if U_IS_BIG_ENDIAN
302 | #   define uprv_isNegative(number) (*((signed char *)&(number))<0)
303 | #else
304 | #   define uprv_isNegative(number) (*((signed char *)&(number)+sizeof(number)-1)<0)
305 | #endif
306 | 
307 | /**
308 |  * Return the largest positive number that can be represented by an integer
309 |  * type of arbitrary bit length.
310 |  * @internal
311 |  */
312 | U_CAPI double  U_EXPORT2 uprv_maxMantissa(void);
313 | 
314 | /**
315 |  * Floating point utility to calculate the logarithm of a double.
316 |  * @internal
317 |  */
318 | U_CAPI double  U_EXPORT2 uprv_log(double d);
319 | 
320 | /**
321 |  * Does common notion of rounding e.g. uprv_floor(x + 0.5);
322 |  * @param x the double number
323 |  * @return the rounded double
324 |  * @internal
325 |  */
326 | U_CAPI double  U_EXPORT2 uprv_round(double x);
327 | 
328 | /**
329 |  * Adds the signed integers a and b, storing the result in res.
330 |  * Checks for signed integer overflow.
331 |  * Similar to the GCC/Clang extension __builtin_add_overflow
332 |  *
333 |  * @param a The first operand.
334 |  * @param b The second operand.
335 |  * @param res a + b
336 |  * @return true if overflow occurred; false if no overflow occurred.
337 |  * @internal
338 |  */
339 | U_CAPI UBool U_EXPORT2 uprv_add32_overflow(int32_t a, int32_t b, int32_t* res);
340 | 
341 | /**
342 |  * Multiplies the signed integers a and b, storing the result in res.
343 |  * Checks for signed integer overflow.
344 |  * Similar to the GCC/Clang extension __builtin_mul_overflow
345 |  *
346 |  * @param a The first multiplicand.
347 |  * @param b The second multiplicand.
348 |  * @param res a * b
349 |  * @return true if overflow occurred; false if no overflow occurred.
350 |  * @internal
351 |  */
352 | U_CAPI UBool U_EXPORT2 uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res);
353 | 
354 | #if 0
355 | /**
356 |  * Returns the number of digits after the decimal point in a double number x.
357 |  *
358 |  * @param x the double number
359 |  * @return the number of digits after the decimal point in a double number x.
360 |  * @internal
361 |  */
362 | /*U_CAPI int32_t  U_EXPORT2 uprv_digitsAfterDecimal(double x);*/
363 | #endif
364 | 
365 | #if !U_CHARSET_IS_UTF8
366 | /**
367 |  * Please use ucnv_getDefaultName() instead.
368 |  * Return the default codepage for this platform and locale.
369 |  * This function can call setlocale() on Unix platforms. Please read the
370 |  * platform documentation on setlocale() before calling this function.
371 |  * @return the default codepage for this platform
372 |  * @internal
373 |  */
374 | U_CAPI const char*  U_EXPORT2 uprv_getDefaultCodepage(void);
375 | #endif
376 | 
377 | /**
378 |  * Please use uloc_getDefault() instead.
379 |  * Return the default locale ID string by querying the system, or
380 |  *     zero if one cannot be found.
381 |  * This function can call setlocale() on Unix platforms. Please read the
382 |  * platform documentation on setlocale() before calling this function.
383 |  * @return the default locale ID string
384 |  * @internal
385 |  */
386 | U_CAPI const char*  U_EXPORT2 uprv_getDefaultLocaleID(void);
387 | 
388 | /**
389 |  * Time zone utilities
390 |  *
391 |  * Wrappers for C runtime library functions relating to timezones.
392 |  * The t_tzset() function (similar to tzset) uses the current setting
393 |  * of the environment variable TZ to assign values to three global
394 |  * variables: daylight, timezone, and tzname. These variables have the
395 |  * following meanings, and are declared in &lt;time.h&gt;.
396 |  *
397 |  *   daylight   Nonzero if daylight-saving-time zone (DST) is specified
398 |  *              in TZ; otherwise, 0. Default value is 1.
399 |  *   timezone   Difference in seconds between coordinated universal
400 |  *              time and local time. E.g., -28,800 for PST (GMT-8hrs)
401 |  *   tzname(0)  Three-letter time-zone name derived from TZ environment
402 |  *              variable. E.g., "PST".
403 |  *   tzname(1)  Three-letter DST zone name derived from TZ environment
404 |  *              variable.  E.g., "PDT". If DST zone is omitted from TZ,
405 |  *              tzname(1) is an empty string.
406 |  *
407 |  * Notes: For example, to set the TZ environment variable to correspond
408 |  * to the current time zone in Germany, you can use one of the
409 |  * following statements:
410 |  *
411 |  *   set TZ=GST1GDT
412 |  *   set TZ=GST+1GDT
413 |  *
414 |  * If the TZ value is not set, t_tzset() attempts to use the time zone
415 |  * information specified by the operating system. Under Windows NT
416 |  * and Windows 95, this information is specified in the Control Panel's
417 |  * Date/Time application.
418 |  * @internal
419 |  */
420 | U_CAPI void     U_EXPORT2 uprv_tzset(void);
421 | 
422 | /**
423 |  * Difference in seconds between coordinated universal
424 |  * time and local time. E.g., -28,800 for PST (GMT-8hrs)
425 |  * @return the difference in seconds between coordinated universal time and local time.
426 |  * @internal
427 |  */
428 | U_CAPI int32_t  U_EXPORT2 uprv_timezone(void);
429 | 
430 | /**
431 |  *   tzname(0)  Three-letter time-zone name derived from TZ environment
432 |  *              variable. E.g., "PST".
433 |  *   tzname(1)  Three-letter DST zone name derived from TZ environment
434 |  *              variable.  E.g., "PDT". If DST zone is omitted from TZ,
435 |  *              tzname(1) is an empty string.
436 |  * @internal
437 |  */
438 | U_CAPI const char* U_EXPORT2 uprv_tzname(int n);
439 | 
440 | /**
441 |  * Reset the global tzname cache.
442 |  * @internal
443 |  */
444 | U_CAPI void uprv_tzname_clear_cache(void);
445 | 
446 | /**
447 |  * Get UTC (GMT) time measured in milliseconds since 0:00 on 1/1/1970.
448 |  * This function is affected by 'faketime' and should be the bottleneck for all user-visible ICU time functions.
449 |  * @return the UTC time measured in milliseconds
450 |  * @internal
451 |  */
452 | U_CAPI UDate U_EXPORT2 uprv_getUTCtime(void);
453 | 
454 | /**
455 |  * Get UTC (GMT) time measured in milliseconds since 0:00 on 1/1/1970.
456 |  * This function is not affected by 'faketime', so it should only be used by low level test functions- not by anything that
457 |  * exposes time to the end user.
458 |  * @return the UTC time measured in milliseconds
459 |  * @internal
460 |  */
461 | U_CAPI UDate U_EXPORT2 uprv_getRawUTCtime(void);
462 | 
463 | /**
464 |  * Determine whether a pathname is absolute or not, as defined by the platform.
465 |  * @param path Pathname to test
466 |  * @return true if the path is absolute
467 |  * @internal (ICU 3.0)
468 |  */
469 | U_CAPI UBool U_EXPORT2 uprv_pathIsAbsolute(const char *path);
470 | 
471 | /**
472 |  * Use U_MAX_PTR instead of this function.
473 |  * @param void pointer to test
474 |  * @return the largest possible pointer greater than the base
475 |  * @internal (ICU 3.8)
476 |  */
477 | U_CAPI void * U_EXPORT2 uprv_maximumPtr(void *base);
478 | 
479 | /**
480 |  * Maximum value of a (void*) - use to indicate the limit of an 'infinite' buffer.
481 |  * In fact, buffer sizes must not exceed 2GB so that the difference between
482 |  * the buffer limit and the buffer start can be expressed in an int32_t.
483 |  *
484 |  * The definition of U_MAX_PTR must fulfill the following conditions:
485 |  * - return the largest possible pointer greater than base
486 |  * - return a valid pointer according to the machine architecture (AS/400, 64-bit, etc.)
487 |  * - avoid wrapping around at high addresses
488 |  * - make sure that the returned pointer is not farther from base than 0x7fffffff bytes
489 |  *
490 |  * @param base The beginning of a buffer to find the maximum offset from
491 |  * @internal
492 |  */
493 | #ifndef U_MAX_PTR
494 | #  if U_PLATFORM == U_PF_OS390 && !defined(_LP64)
495 |     /* We have 31-bit pointers. */
496 | #    define U_MAX_PTR(base) ((void *)0x7fffffff)
497 | #  elif U_PLATFORM == U_PF_OS400
498 | #    define U_MAX_PTR(base) uprv_maximumPtr((void *)base)
499 | #  elif 0
500 |     /*
501 |      * For platforms where pointers are scalar values (which is normal, but unlike i5/OS)
502 |      * but that do not define uintptr_t.
503 |      *
504 |      * However, this does not work on modern compilers:
505 |      * The C++ standard does not define pointer overflow, and allows compilers to
506 |      * assume that p+u>p for any pointer p and any integer u>0.
507 |      * Thus, modern compilers optimize away the ">" comparison.
508 |      * (See ICU tickets #7187 and #8096.)
509 |      */
510 | #    define U_MAX_PTR(base) \
511 |     ((void *)(((char *)(base)+0x7fffffffu) > (char *)(base) \
512 |         ? ((char *)(base)+0x7fffffffu) \
513 |         : (char *)-1))
514 | #  else
515 |     /* Default version. C++ standard compliant for scalar pointers. */
516 | #    define U_MAX_PTR(base) \
517 |     ((void *)(((uintptr_t)(base)+0x7fffffffu) > (uintptr_t)(base) \
518 |         ? ((uintptr_t)(base)+0x7fffffffu) \
519 |         : (uintptr_t)-1))
520 | #  endif
521 | #endif
522 | 
523 | 
524 | #ifdef __cplusplus
525 | /**
526 |  * Pin a buffer capacity such that doing pointer arithmetic
527 |  * on the destination pointer and capacity cannot overflow.
528 |  *
529 |  * The pinned capacity must fulfill the following conditions (for positive capacities):
530 |  *   - dest + capacity is a valid pointer according to the machine architecture (AS/400, 64-bit, etc.)
531 |  *   - (dest + capacity) >= dest
532 |  *   - The size (in bytes) of T[capacity] does not exceed 0x7fffffff
533 |  *
534 |  * @param dest the destination buffer pointer.
535 |  * @param capacity the requested buffer capacity, in units of type T.
536 |  * @return the pinned capacity.
537 |  * @internal
538 |  */
539 | template <typename T>
540 | inline int32_t pinCapacity(T *dest, int32_t capacity) {
541 |     if (capacity <= 0) { return capacity; }
542 | 
543 |     uintptr_t destInt = (uintptr_t)dest;
544 |     uintptr_t maxInt;
545 | 
546 | #  if U_PLATFORM == U_PF_OS390 && !defined(_LP64)
547 |     // We have 31-bit pointers.
548 |     maxInt = 0x7fffffff;
549 | #  elif U_PLATFORM == U_PF_OS400
550 |     maxInt = (uintptr_t)uprv_maximumPtr((void *)dest);
551 | #  else
552 |     maxInt = destInt + 0x7fffffffu;
553 |     if (maxInt < destInt) {
554 |         // Less than 2GB to the end of the address space.
555 |         // Pin to that to prevent address overflow.
556 |         maxInt = (uintptr_t)-1;
557 |     }
558 | #  endif
559 | 
560 |     uintptr_t maxBytes = maxInt - destInt;  // max. 2GB
561 |     int32_t maxCapacity = (int32_t)(maxBytes / sizeof(T));
562 |     return capacity <= maxCapacity ? capacity : maxCapacity;
563 | }
564 | #endif   // __cplusplus
565 | 
566 | /*  Dynamic Library Functions */
567 | 
568 | typedef void (UVoidFunction)(void);
569 | 
570 | #if U_ENABLE_DYLOAD
571 | /**
572 |  * Load a library
573 |  * @internal (ICU 4.4)
574 |  */
575 | U_CAPI void * U_EXPORT2 uprv_dl_open(const char *libName, UErrorCode *status);
576 | 
577 | /**
578 |  * Close a library
579 |  * @internal (ICU 4.4)
580 |  */
581 | U_CAPI void U_EXPORT2 uprv_dl_close( void *lib, UErrorCode *status);
582 | 
583 | /**
584 |  * Extract a symbol from a library (function)
585 |  * @internal (ICU 4.8)
586 |  */
587 | U_CAPI UVoidFunction* U_EXPORT2 uprv_dlsym_func( void *lib, const char *symbolName, UErrorCode *status);
588 | 
589 | /**
590 |  * Extract a symbol from a library (function)
591 |  * Not implemented, no clients.
592 |  * @internal
593 |  */
594 | /* U_CAPI void * U_EXPORT2 uprv_dlsym_data( void *lib, const char *symbolName, UErrorCode *status); */
595 | 
596 | #endif
597 | 
598 | /**
599 |  * Define malloc and related functions
600 |  * @internal
601 |  */
602 | #if U_PLATFORM == U_PF_OS400
603 | # define uprv_default_malloc(x) _C_TS_malloc(x)
604 | # define uprv_default_realloc(x,y) _C_TS_realloc(x,y)
605 | # define uprv_default_free(x) _C_TS_free(x)
606 | /* also _C_TS_calloc(x) */
607 | #else
608 | /* C defaults */
609 | # define uprv_default_malloc(x) malloc(x)
610 | # define uprv_default_realloc(x,y) realloc(x,y)
611 | # define uprv_default_free(x) free(x)
612 | #endif
613 | 
614 | 
615 | #endif


--------------------------------------------------------------------------------
/unicode/ucln.h:
--------------------------------------------------------------------------------
 1 | // © 2016 and later: Unicode, Inc. and others.
 2 | // License & terms of use: http://www.unicode.org/copyright.html
 3 | /*
 4 | ******************************************************************************
 5 | *
 6 | * Copyright (C) 2001-2013, International Business Machines
 7 | *                Corporation and others. All Rights Reserved.
 8 | *
 9 | ******************************************************************************
10 | *   file name:  ucln.h
11 | *   encoding:   UTF-8
12 | *   tab size:   8 (not used)
13 | *   indentation:4
14 | *
15 | *   created on: 2001July05
16 | *   created by: George Rhoten
17 | */
18 | 
19 | #ifndef __UCLN_H__
20 | #define __UCLN_H__
21 | 
22 | #include <unicode/utypes.h>
23 | 
24 | /** These are the functions used to register a library's memory cleanup
25 |  * functions.  Each library should define a single library register function
26 |  * to call this API.  In the i18n library, it is ucln_i18n_registerCleanup().
27 |  *
28 |  * None of the cleanup functions should use a mutex to clean up an API's
29 |  * allocated memory because a cleanup function is not meant to be thread safe,
30 |  * and plenty of data cannot be reference counted in order to make sure that
31 |  * no one else needs the allocated data.
32 |  *
33 |  * In order to make a cleanup function get called when u_cleanup is called,
34 |  * You should add your function to the library specific cleanup function.
35 |  * If the cleanup function is not in the common library, the code that
36 |  * allocates the memory should call the library specific cleanup function.
37 |  * For instance, in the i18n library, any memory allocated statically must
38 |  * call ucln_i18n_registerCleanup() from the ucln_in.h header.  These library
39 |  * cleanup functions are needed in order to prevent a circular dependency
40 |  * between the common library and any other library.
41 |  *
42 |  * The order of the cleanup is very important.  In general, an API that
43 |  * depends on a second API should be cleaned up before the second API.
44 |  * For instance, the default converter in ustring depends upon the converter
45 |  * API.  So the default converter should be closed before the converter API
46 |  * has its cache flushed.  This will prevent any memory leaks due to
47 |  * reference counting.
48 |  *
49 |  * Please see common/ucln_cmn.{h,c} and i18n/ucln_in.{h,c} for examples.
50 |  */
51 | 
52 | /**
53 |  * Data Type for cleanup function selector. These roughly correspond to libraries.
54 |  */
55 | typedef enum ECleanupLibraryType {
56 |     UCLN_START = -1,
57 |     UCLN_UPLUG,     /* ICU plugins */
58 |     UCLN_CUSTOM,    /* Custom is for anyone else. */
59 |     UCLN_CTESTFW,
60 |     UCLN_TOOLUTIL,
61 |     UCLN_LAYOUTEX,
62 |     UCLN_LAYOUT,
63 |     UCLN_IO,
64 |     UCLN_I18N,
65 |     UCLN_COMMON /* This must be the last one to cleanup. */
66 | } ECleanupLibraryType;
67 | 
68 | /**
69 |  * Data type for cleanup function pointer
70 |  */
71 | U_CDECL_BEGIN
72 | typedef UBool U_CALLCONV cleanupFunc(void);
73 | typedef void U_CALLCONV initFunc(UErrorCode *);
74 | U_CDECL_END
75 | 
76 | /**
77 |  * Register a cleanup function
78 |  * @param type which library to register for.
79 |  * @param func the function pointer
80 |  */
81 | U_CAPI void U_EXPORT2 ucln_registerCleanup(ECleanupLibraryType type,
82 |                                            cleanupFunc *func);
83 | 
84 | /**
85 |  * Request cleanup for one specific library.
86 |  * Not thread safe.
87 |  * @param type which library to cleanup
88 |  */
89 | U_CAPI void U_EXPORT2 ucln_cleanupOne(ECleanupLibraryType type);
90 | 
91 | #endif


--------------------------------------------------------------------------------
/unicode/ucln_cmn.h:
--------------------------------------------------------------------------------
 1 | // © 2016 and later: Unicode, Inc. and others.
 2 | // License & terms of use: http://www.unicode.org/copyright.html
 3 | /*
 4 | ******************************************************************************
 5 | * Copyright (C) 2001-2016, International Business Machines
 6 | *                Corporation and others. All Rights Reserved.
 7 | ******************************************************************************
 8 | *   file name:  ucln_cmn.h
 9 | *   encoding:   UTF-8
10 | *   tab size:   8 (not used)
11 | *   indentation:4
12 | *
13 | *   created on: 2001July05
14 | *   created by: George Rhoten
15 | */
16 | 
17 | #ifndef __UCLN_CMN_H__
18 | #define __UCLN_CMN_H__
19 | 
20 | #include <unicode/utypes.h>
21 | #include "ucln.h"
22 | 
23 | /* These are the cleanup functions for various APIs. */
24 | /* @return true if cleanup complete successfully.*/
25 | U_CFUNC UBool utrace_cleanup(void);
26 | 
27 | U_CFUNC UBool ucln_lib_cleanup(void);
28 | 
29 | /*
30 | Please keep the order of enums declared in same order
31 | as the cleanup functions are suppose to be called. */
32 | typedef enum ECleanupCommonType {
33 |     UCLN_COMMON_START = -1,
34 |     UCLN_COMMON_NUMPARSE_UNISETS,
35 |     UCLN_COMMON_USPREP,
36 |     UCLN_COMMON_BREAKITERATOR,
37 |     UCLN_COMMON_RBBI,
38 |     UCLN_COMMON_SERVICE,
39 |     UCLN_COMMON_LOCALE_KEY_TYPE,
40 |     UCLN_COMMON_LOCALE,
41 |     UCLN_COMMON_LOCALE_ALIAS,
42 |     UCLN_COMMON_LOCALE_KNOWN_CANONICALIZED,
43 |     UCLN_COMMON_LOCALE_AVAILABLE,
44 |     UCLN_COMMON_LIKELY_SUBTAGS,
45 |     UCLN_COMMON_LOCALE_DISTANCE,
46 |     UCLN_COMMON_ULOC,
47 |     UCLN_COMMON_CURRENCY,
48 |     UCLN_COMMON_LOADED_NORMALIZER2,
49 |     UCLN_COMMON_NORMALIZER2,
50 |     UCLN_COMMON_CHARACTERPROPERTIES,
51 |     UCLN_COMMON_USET,
52 |     UCLN_COMMON_UNAMES,
53 |     UCLN_COMMON_UPROPS,
54 |     UCLN_COMMON_EMOJIPROPS,
55 |     UCLN_COMMON_UCNV,
56 |     UCLN_COMMON_UCNV_IO,
57 |     UCLN_COMMON_UDATA,
58 |     UCLN_COMMON_PUTIL,
59 |     UCLN_COMMON_UINIT,
60 | 
61 |     /*
62 |        Unified caches caches collation stuff. Collation data structures
63 |        contain resource bundles which means that unified cache cleanup
64 |        must happen before resource bundle clean up.
65 |     */
66 |     UCLN_COMMON_UNIFIED_CACHE,
67 |     UCLN_COMMON_URES,
68 |     UCLN_COMMON_MUTEX,    // Mutexes should be the last to be cleaned up.
69 |     UCLN_COMMON_COUNT /* This must be last */
70 | } ECleanupCommonType;
71 | 
72 | /* Main library cleanup registration function. */
73 | /* See common/ucln.h for details on adding a cleanup function. */
74 | /* Note: the global mutex must not be held when calling this function. */
75 | U_CFUNC void U_EXPORT2 ucln_common_registerCleanup(ECleanupCommonType type,
76 |                                                    cleanupFunc *func);
77 | 
78 | #endif


--------------------------------------------------------------------------------
/unicode/ucmndata.h:
--------------------------------------------------------------------------------
  1 | // © 2016 and later: Unicode, Inc. and others.
  2 | // License & terms of use: http://www.unicode.org/copyright.html
  3 | /*
  4 | ******************************************************************************
  5 | *
  6 | *   Copyright (C) 1999-2011, International Business Machines
  7 | *   Corporation and others.  All Rights Reserved.
  8 | *
  9 | ******************************************************************************/
 10 | 
 11 | 
 12 | /*----------------------------------------------------------------------------------
 13 |  *
 14 |  *   UCommonData   An abstract interface for dealing with ICU Common Data Files.
 15 |  *                 ICU Common Data Files are a grouping of a number of individual
 16 |  *                 data items (resources, converters, tables, anything) into a
 17 |  *                 single file or dll.  The combined format includes a table of
 18 |  *                 contents for locating the individual items by name.
 19 |  *
 20 |  *                 Two formats for the table of contents are supported, which is
 21 |  *                 why there is an abstract interface involved.
 22 |  *
 23 |  *                 These functions are part of the ICU internal implementation, and
 24 |  *                 are not intended to be used directly by applications.
 25 |  */
 26 | 
 27 | #ifndef __UCMNDATA_H__
 28 | #define __UCMNDATA_H__
 29 | 
 30 | #include "udata.h"
 31 | #include "umapfile.h"
 32 | 
 33 | 
 34 | #define COMMON_DATA_NAME U_ICUDATA_NAME
 35 | 
 36 | typedef struct  {
 37 |     uint16_t    headerSize;
 38 |     uint8_t     magic1;
 39 |     uint8_t     magic2;
 40 | } MappedData;
 41 | 
 42 | 
 43 | typedef struct  {
 44 |     MappedData  dataHeader;
 45 |     UDataInfo   info;
 46 | } DataHeader;
 47 | 
 48 | typedef struct {
 49 |     uint32_t nameOffset;
 50 |     uint32_t dataOffset;
 51 | } UDataOffsetTOCEntry;
 52 | 
 53 | typedef struct {
 54 |     uint32_t count;
 55 |     /**
 56 |      * Variable-length array declared with length 1 to disable bounds checkers.
 57 |      * The actual array length is in the count field.
 58 |      */
 59 |     UDataOffsetTOCEntry entry[1];
 60 | } UDataOffsetTOC;
 61 | 
 62 | /**
 63 |  * Get the header size from a const DataHeader *udh.
 64 |  * Handles opposite-endian data.
 65 |  *
 66 |  * @internal
 67 |  */
 68 | U_CFUNC uint16_t
 69 | udata_getHeaderSize(const DataHeader *udh);
 70 | 
 71 | /**
 72 |  * Get the UDataInfo.size from a const UDataInfo *info.
 73 |  * Handles opposite-endian data.
 74 |  *
 75 |  * @internal
 76 |  */
 77 | U_CFUNC uint16_t
 78 | udata_getInfoSize(const UDataInfo *info);
 79 | 
 80 | U_CDECL_BEGIN
 81 | /*
 82 |  *  "Virtual" functions for data lookup.
 83 |  *  To call one, given a UDataMemory *p, the code looks like this:
 84 |  *     p->vFuncs.Lookup(p, tocEntryName, pErrorCode);
 85 |  *          (I sure do wish this was written in C++, not C)
 86 |  */
 87 | 
 88 | typedef const DataHeader *
 89 | (U_CALLCONV * LookupFn)(const UDataMemory *pData,
 90 |                         const char *tocEntryName,
 91 |                         int32_t *pLength,
 92 |                         UErrorCode *pErrorCode);
 93 | 
 94 | typedef uint32_t
 95 | (U_CALLCONV * NumEntriesFn)(const UDataMemory *pData);
 96 | 
 97 | U_CDECL_END
 98 | 
 99 | typedef struct {
100 |     LookupFn      Lookup;
101 |     NumEntriesFn  NumEntries; 
102 | } commonDataFuncs;
103 | 
104 | 
105 | /*
106 |  *  Functions to check whether a UDataMemory refers to memory containing 
107 |  *     a recognizable header and table of contents a Common Data Format
108 |  *
109 |  *     If a valid header and TOC are found,
110 |  *         set the CommonDataFuncs function dispatch vector in the UDataMemory
111 |  *             to point to the right functions for the TOC type.
112 |  *     otherwise
113 |  *         set an errorcode.
114 |  */
115 | U_CFUNC void udata_checkCommonData(UDataMemory *pData, UErrorCode *pErrorCode);
116 | 
117 | #endif
118 | 


--------------------------------------------------------------------------------
/unicode/ucptrie.h:
--------------------------------------------------------------------------------
  1 | // © 2017 and later: Unicode, Inc. and others.
  2 | // License & terms of use: http://www.unicode.org/copyright.html
  3 | 
  4 | // ucptrie.h (modified from utrie2.h)
  5 | // created: 2017dec29 Markus W. Scherer
  6 | 
  7 | #ifndef __UCPTRIE_H__
  8 | #define __UCPTRIE_H__
  9 | 
 10 | #include <unicode/utypes.h>
 11 | #include "unicode/ucpmap.h"
 12 | #include "unicode/utf8.h"
 13 | 
 14 | #if U_SHOW_CPLUSPLUS_API
 15 | #include "unicode/localpointer.h"
 16 | #endif   // U_SHOW_CPLUSPLUS_API
 17 | 
 18 | U_CDECL_BEGIN
 19 | 
 20 | /**
 21 |  * \file
 22 |  *
 23 |  * This file defines an immutable Unicode code point trie.
 24 |  *
 25 |  * @see UCPTrie
 26 |  * @see UMutableCPTrie
 27 |  */
 28 | 
 29 | #ifndef U_IN_DOXYGEN
 30 | /** @internal */
 31 | typedef union UCPTrieData {
 32 |     /** @internal */
 33 |     const void *ptr0;
 34 |     /** @internal */
 35 |     const uint16_t *ptr16;
 36 |     /** @internal */
 37 |     const uint32_t *ptr32;
 38 |     /** @internal */
 39 |     const uint8_t *ptr8;
 40 | } UCPTrieData;
 41 | #endif
 42 | 
 43 | /**
 44 |  * Immutable Unicode code point trie structure.
 45 |  * Fast, reasonably compact, map from Unicode code points (U+0000..U+10FFFF) to integer values.
 46 |  * For details see https://icu.unicode.org/design/struct/utrie
 47 |  *
 48 |  * Do not access UCPTrie fields directly; use public functions and macros.
 49 |  * Functions are easy to use: They support all trie types and value widths.
 50 |  *
 51 |  * When performance is really important, macros provide faster access.
 52 |  * Most macros are specific to either "fast" or "small" tries, see UCPTrieType.
 53 |  * There are "fast" macros for special optimized use cases.
 54 |  *
 55 |  * The macros will return bogus values, or may crash, if used on the wrong type or value width.
 56 |  *
 57 |  * @see UMutableCPTrie
 58 |  * @stable ICU 63
 59 |  */
 60 | struct UCPTrie {
 61 | #ifndef U_IN_DOXYGEN
 62 |     /** @internal */
 63 |     const uint16_t *index;
 64 |     /** @internal */
 65 |     UCPTrieData data;
 66 | 
 67 |     /** @internal */
 68 |     int32_t indexLength;
 69 |     /** @internal */
 70 |     int32_t dataLength;
 71 |     /** Start of the last range which ends at U+10FFFF. @internal */
 72 |     UChar32 highStart;
 73 |     /** highStart>>12 @internal */
 74 |     uint16_t shifted12HighStart;
 75 | 
 76 |     /** @internal */
 77 |     int8_t type;  // UCPTrieType
 78 |     /** @internal */
 79 |     int8_t valueWidth;  // UCPTrieValueWidth
 80 | 
 81 |     /** padding/reserved @internal */
 82 |     uint32_t reserved32;
 83 |     /** padding/reserved @internal */
 84 |     uint16_t reserved16;
 85 | 
 86 |     /**
 87 |      * Internal index-3 null block offset.
 88 |      * Set to an impossibly high value (e.g., 0xffff) if there is no dedicated index-3 null block.
 89 |      * @internal
 90 |      */
 91 |     uint16_t index3NullOffset;
 92 |     /**
 93 |      * Internal data null block offset, not shifted.
 94 |      * Set to an impossibly high value (e.g., 0xfffff) if there is no dedicated data null block.
 95 |      * @internal
 96 |      */
 97 |     int32_t dataNullOffset;
 98 |     /** @internal */
 99 |     uint32_t nullValue;
100 | 
101 | #ifdef UCPTRIE_DEBUG
102 |     /** @internal */
103 |     const char *name;
104 | #endif
105 | #endif
106 | };
107 | #ifndef U_IN_DOXYGEN
108 | typedef struct UCPTrie UCPTrie;
109 | #endif
110 | 
111 | /**
112 |  * Selectors for the type of a UCPTrie.
113 |  * Different trade-offs for size vs. speed.
114 |  *
115 |  * @see umutablecptrie_buildImmutable
116 |  * @see ucptrie_openFromBinary
117 |  * @see ucptrie_getType
118 |  * @stable ICU 63
119 |  */
120 | enum UCPTrieType {
121 |     /**
122 |      * For ucptrie_openFromBinary() to accept any type.
123 |      * ucptrie_getType() will return the actual type.
124 |      * @stable ICU 63
125 |      */
126 |     UCPTRIE_TYPE_ANY = -1,
127 |     /**
128 |      * Fast/simple/larger BMP data structure. Use functions and "fast" macros.
129 |      * @stable ICU 63
130 |      */
131 |     UCPTRIE_TYPE_FAST,
132 |     /**
133 |      * Small/slower BMP data structure. Use functions and "small" macros.
134 |      * @stable ICU 63
135 |      */
136 |     UCPTRIE_TYPE_SMALL
137 | };
138 | #ifndef U_IN_DOXYGEN
139 | typedef enum UCPTrieType UCPTrieType;
140 | #endif
141 | 
142 | /**
143 |  * Selectors for the number of bits in a UCPTrie data value.
144 |  *
145 |  * @see umutablecptrie_buildImmutable
146 |  * @see ucptrie_openFromBinary
147 |  * @see ucptrie_getValueWidth
148 |  * @stable ICU 63
149 |  */
150 | enum UCPTrieValueWidth {
151 |     /**
152 |      * For ucptrie_openFromBinary() to accept any data value width.
153 |      * ucptrie_getValueWidth() will return the actual data value width.
154 |      * @stable ICU 63
155 |      */
156 |     UCPTRIE_VALUE_BITS_ANY = -1,
157 |     /**
158 |      * The trie stores 16 bits per data value.
159 |      * It returns them as unsigned values 0..0xffff=65535.
160 |      * @stable ICU 63
161 |      */
162 |     UCPTRIE_VALUE_BITS_16,
163 |     /**
164 |      * The trie stores 32 bits per data value.
165 |      * @stable ICU 63
166 |      */
167 |     UCPTRIE_VALUE_BITS_32,
168 |     /**
169 |      * The trie stores 8 bits per data value.
170 |      * It returns them as unsigned values 0..0xff=255.
171 |      * @stable ICU 63
172 |      */
173 |     UCPTRIE_VALUE_BITS_8
174 | };
175 | #ifndef U_IN_DOXYGEN
176 | typedef enum UCPTrieValueWidth UCPTrieValueWidth;
177 | #endif
178 | 
179 | /**
180 |  * Returns the trie type.
181 |  *
182 |  * @param trie the trie
183 |  * @return the trie type
184 |  * @see ucptrie_openFromBinary
185 |  * @see UCPTRIE_TYPE_ANY
186 |  * @stable ICU 63
187 |  */
188 | U_CAPI UCPTrieType U_EXPORT2
189 | ucptrie_getType(const UCPTrie *trie);
190 | 
191 | /**
192 |  * Returns the number of bits in a trie data value.
193 |  *
194 |  * @param trie the trie
195 |  * @return the number of bits in a trie data value
196 |  * @see ucptrie_openFromBinary
197 |  * @see UCPTRIE_VALUE_BITS_ANY
198 |  * @stable ICU 63
199 |  */
200 | U_CAPI UCPTrieValueWidth U_EXPORT2
201 | ucptrie_getValueWidth(const UCPTrie *trie);
202 | 
203 | /**
204 |  * Returns the value for a code point as stored in the trie, with range checking.
205 |  * Returns the trie error value if c is not in the range 0..U+10FFFF.
206 |  *
207 |  * Easier to use than UCPTRIE_FAST_GET() and similar macros but slower.
208 |  * Easier to use because, unlike the macros, this function works on all UCPTrie
209 |  * objects, for all types and value widths.
210 |  *
211 |  * @param trie the trie
212 |  * @param c the code point
213 |  * @return the trie value,
214 |  *         or the trie error value if the code point is not in the range 0..U+10FFFF
215 |  * @stable ICU 63
216 |  */
217 | U_CAPI uint32_t U_EXPORT2
218 | ucptrie_get(const UCPTrie *trie, UChar32 c);
219 | 
220 | /**
221 |  * Returns the last code point such that all those from start to there have the same value.
222 |  * Can be used to efficiently iterate over all same-value ranges in a trie.
223 |  * (This is normally faster than iterating over code points and get()ting each value,
224 |  * but much slower than a data structure that stores ranges directly.)
225 |  *
226 |  * If the UCPMapValueFilter function pointer is not NULL, then
227 |  * the value to be delivered is passed through that function, and the return value is the end
228 |  * of the range where all values are modified to the same actual value.
229 |  * The value is unchanged if that function pointer is NULL.
230 |  *
231 |  * Example:
232 |  * \code
233 |  * UChar32 start = 0, end;
234 |  * uint32_t value;
235 |  * while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0,
236 |  *                                NULL, NULL, &value)) >= 0) {
237 |  *     // Work with the range start..end and its value.
238 |  *     start = end + 1;
239 |  * }
240 |  * \endcode
241 |  *
242 |  * @param trie the trie
243 |  * @param start range start
244 |  * @param option defines whether surrogates are treated normally,
245 |  *               or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL
246 |  * @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL
247 |  * @param filter a pointer to a function that may modify the trie data value,
248 |  *     or NULL if the values from the trie are to be used unmodified
249 |  * @param context an opaque pointer that is passed on to the filter function
250 |  * @param pValue if not NULL, receives the value that every code point start..end has;
251 |  *     may have been modified by filter(context, trie value)
252 |  *     if that function pointer is not NULL
253 |  * @return the range end code point, or -1 if start is not a valid code point
254 |  * @stable ICU 63
255 |  */
256 | U_CAPI UChar32 U_EXPORT2
257 | ucptrie_getRange(const UCPTrie *trie, UChar32 start,
258 |                  UCPMapRangeOption option, uint32_t surrogateValue,
259 |                  UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
260 | 
261 | /**
262 |  * Writes a memory-mappable form of the trie into 32-bit aligned memory.
263 |  * Inverse of ucptrie_openFromBinary().
264 |  *
265 |  * @param trie the trie
266 |  * @param data a pointer to 32-bit-aligned memory to be filled with the trie data;
267 |  *             can be NULL if capacity==0
268 |  * @param capacity the number of bytes available at data, or 0 for pure preflighting
269 |  * @param pErrorCode an in/out ICU UErrorCode;
270 |  *                   U_BUFFER_OVERFLOW_ERROR if the capacity is too small
271 |  * @return the number of bytes written or (if buffer overflow) needed for the trie
272 |  *
273 |  * @see ucptrie_openFromBinary()
274 |  * @stable ICU 63
275 |  */
276 | U_CAPI int32_t U_EXPORT2
277 | ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode *pErrorCode);
278 | 
279 | /**
280 |  * Macro parameter value for a trie with 16-bit data values.
281 |  * Use the name of this macro as a "dataAccess" parameter in other macros.
282 |  * Do not use this macro in any other way.
283 |  *
284 |  * @see UCPTRIE_VALUE_BITS_16
285 |  * @stable ICU 63
286 |  */
287 | #define UCPTRIE_16(trie, i) ((trie)->data.ptr16[i])
288 | 
289 | /**
290 |  * Macro parameter value for a trie with 32-bit data values.
291 |  * Use the name of this macro as a "dataAccess" parameter in other macros.
292 |  * Do not use this macro in any other way.
293 |  *
294 |  * @see UCPTRIE_VALUE_BITS_32
295 |  * @stable ICU 63
296 |  */
297 | #define UCPTRIE_32(trie, i) ((trie)->data.ptr32[i])
298 | 
299 | /**
300 |  * Macro parameter value for a trie with 8-bit data values.
301 |  * Use the name of this macro as a "dataAccess" parameter in other macros.
302 |  * Do not use this macro in any other way.
303 |  *
304 |  * @see UCPTRIE_VALUE_BITS_8
305 |  * @stable ICU 63
306 |  */
307 | #define UCPTRIE_8(trie, i) ((trie)->data.ptr8[i])
308 | 
309 | /**
310 |  * Returns a trie value for a code point, with range checking.
311 |  * Returns the trie error value if c is not in the range 0..U+10FFFF.
312 |  *
313 |  * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
314 |  * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
315 |  * @param c (UChar32, in) the input code point
316 |  * @return The code point's trie value.
317 |  * @stable ICU 63
318 |  */
319 | #define UCPTRIE_FAST_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_CP_INDEX(trie, 0xffff, c))
320 | 
321 | /**
322 |  * Returns a 16-bit trie value for a code point, with range checking.
323 |  * Returns the trie error value if c is not in the range U+0000..U+10FFFF.
324 |  *
325 |  * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_SMALL
326 |  * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
327 |  * @param c (UChar32, in) the input code point
328 |  * @return The code point's trie value.
329 |  * @stable ICU 63
330 |  */
331 | #define UCPTRIE_SMALL_GET(trie, dataAccess, c) \
332 |     dataAccess(trie, _UCPTRIE_CP_INDEX(trie, UCPTRIE_SMALL_MAX, c))
333 | 
334 | /**
335 |  * UTF-16: Reads the next code point (UChar32 c, out), post-increments src,
336 |  * and gets a value from the trie.
337 |  * Sets the trie error value if c is an unpaired surrogate.
338 |  *
339 |  * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
340 |  * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
341 |  * @param src (const UChar *, in/out) the source text pointer
342 |  * @param limit (const UChar *, in) the limit pointer for the text, or NULL if NUL-terminated
343 |  * @param c (UChar32, out) variable for the code point
344 |  * @param result (out) variable for the trie lookup result
345 |  * @stable ICU 63
346 |  */
347 | #define UCPTRIE_FAST_U16_NEXT(trie, dataAccess, src, limit, c, result) UPRV_BLOCK_MACRO_BEGIN { \
348 |     (c) = *(src)++; \
349 |     int32_t __index; \
350 |     if (!U16_IS_SURROGATE(c)) { \
351 |         __index = _UCPTRIE_FAST_INDEX(trie, c); \
352 |     } else { \
353 |         uint16_t __c2; \
354 |         if (U16_IS_SURROGATE_LEAD(c) && (src) != (limit) && U16_IS_TRAIL(__c2 = *(src))) { \
355 |             ++(src); \
356 |             (c) = U16_GET_SUPPLEMENTARY((c), __c2); \
357 |             __index = _UCPTRIE_SMALL_INDEX(trie, c); \
358 |         } else { \
359 |             __index = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; \
360 |         } \
361 |     } \
362 |     (result) = dataAccess(trie, __index); \
363 | } UPRV_BLOCK_MACRO_END
364 | 
365 | /**
366 |  * UTF-16: Reads the previous code point (UChar32 c, out), pre-decrements src,
367 |  * and gets a value from the trie.
368 |  * Sets the trie error value if c is an unpaired surrogate.
369 |  *
370 |  * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
371 |  * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
372 |  * @param start (const UChar *, in) the start pointer for the text
373 |  * @param src (const UChar *, in/out) the source text pointer
374 |  * @param c (UChar32, out) variable for the code point
375 |  * @param result (out) variable for the trie lookup result
376 |  * @stable ICU 63
377 |  */
378 | #define UCPTRIE_FAST_U16_PREV(trie, dataAccess, start, src, c, result) UPRV_BLOCK_MACRO_BEGIN { \
379 |     (c) = *--(src); \
380 |     int32_t __index; \
381 |     if (!U16_IS_SURROGATE(c)) { \
382 |         __index = _UCPTRIE_FAST_INDEX(trie, c); \
383 |     } else { \
384 |         uint16_t __c2; \
385 |         if (U16_IS_SURROGATE_TRAIL(c) && (src) != (start) && U16_IS_LEAD(__c2 = *((src) - 1))) { \
386 |             --(src); \
387 |             (c) = U16_GET_SUPPLEMENTARY(__c2, (c)); \
388 |             __index = _UCPTRIE_SMALL_INDEX(trie, c); \
389 |         } else { \
390 |             __index = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; \
391 |         } \
392 |     } \
393 |     (result) = dataAccess(trie, __index); \
394 | } UPRV_BLOCK_MACRO_END
395 | 
396 | /**
397 |  * UTF-8: Post-increments src and gets a value from the trie.
398 |  * Sets the trie error value for an ill-formed byte sequence.
399 |  *
400 |  * Unlike UCPTRIE_FAST_U16_NEXT() this UTF-8 macro does not provide the code point
401 |  * because it would be more work to do so and is often not needed.
402 |  * If the trie value differs from the error value, then the byte sequence is well-formed,
403 |  * and the code point can be assembled without revalidation.
404 |  *
405 |  * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
406 |  * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
407 |  * @param src (const char *, in/out) the source text pointer
408 |  * @param limit (const char *, in) the limit pointer for the text (must not be NULL)
409 |  * @param result (out) variable for the trie lookup result
410 |  * @stable ICU 63
411 |  */
412 | #define UCPTRIE_FAST_U8_NEXT(trie, dataAccess, src, limit, result) UPRV_BLOCK_MACRO_BEGIN { \
413 |     int32_t __lead = (uint8_t)*(src)++; \
414 |     if (!U8_IS_SINGLE(__lead)) { \
415 |         uint8_t __t1, __t2, __t3; \
416 |         if ((src) != (limit) && \
417 |             (__lead >= 0xe0 ? \
418 |                 __lead < 0xf0 ?  /* U+0800..U+FFFF except surrogates */ \
419 |                     U8_LEAD3_T1_BITS[__lead &= 0xf] & (1 << ((__t1 = *(src)) >> 5)) && \
420 |                     ++(src) != (limit) && (__t2 = *(src) - 0x80) <= 0x3f && \
421 |                     (__lead = ((int32_t)(trie)->index[(__lead << 6) + (__t1 & 0x3f)]) + __t2, 1) \
422 |                 :  /* U+10000..U+10FFFF */ \
423 |                     (__lead -= 0xf0) <= 4 && \
424 |                     U8_LEAD4_T1_BITS[(__t1 = *(src)) >> 4] & (1 << __lead) && \
425 |                     (__lead = (__lead << 6) | (__t1 & 0x3f), ++(src) != (limit)) && \
426 |                     (__t2 = *(src) - 0x80) <= 0x3f && \
427 |                     ++(src) != (limit) && (__t3 = *(src) - 0x80) <= 0x3f && \
428 |                     (__lead = __lead >= (trie)->shifted12HighStart ? \
429 |                         (trie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : \
430 |                         ucptrie_internalSmallU8Index((trie), __lead, __t2, __t3), 1) \
431 |             :  /* U+0080..U+07FF */ \
432 |                 __lead >= 0xc2 && (__t1 = *(src) - 0x80) <= 0x3f && \
433 |                 (__lead = (int32_t)(trie)->index[__lead & 0x1f] + __t1, 1))) { \
434 |             ++(src); \
435 |         } else { \
436 |             __lead = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET;  /* ill-formed*/ \
437 |         } \
438 |     } \
439 |     (result) = dataAccess(trie, __lead); \
440 | } UPRV_BLOCK_MACRO_END
441 | 
442 | /**
443 |  * UTF-8: Pre-decrements src and gets a value from the trie.
444 |  * Sets the trie error value for an ill-formed byte sequence.
445 |  *
446 |  * Unlike UCPTRIE_FAST_U16_PREV() this UTF-8 macro does not provide the code point
447 |  * because it would be more work to do so and is often not needed.
448 |  * If the trie value differs from the error value, then the byte sequence is well-formed,
449 |  * and the code point can be assembled without revalidation.
450 |  *
451 |  * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
452 |  * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
453 |  * @param start (const char *, in) the start pointer for the text
454 |  * @param src (const char *, in/out) the source text pointer
455 |  * @param result (out) variable for the trie lookup result
456 |  * @stable ICU 63
457 |  */
458 | #define UCPTRIE_FAST_U8_PREV(trie, dataAccess, start, src, result) UPRV_BLOCK_MACRO_BEGIN { \
459 |     int32_t __index = (uint8_t)*--(src); \
460 |     if (!U8_IS_SINGLE(__index)) { \
461 |         __index = ucptrie_internalU8PrevIndex((trie), __index, (const uint8_t *)(start), \
462 |                                               (const uint8_t *)(src)); \
463 |         (src) -= __index & 7; \
464 |         __index >>= 3; \
465 |     } \
466 |     (result) = dataAccess(trie, __index); \
467 | } UPRV_BLOCK_MACRO_END
468 | 
469 | /**
470 |  * Returns a trie value for an ASCII code point, without range checking.
471 |  *
472 |  * @param trie (const UCPTrie *, in) the trie (of either fast or small type)
473 |  * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
474 |  * @param c (UChar32, in) the input code point; must be U+0000..U+007F
475 |  * @return The ASCII code point's trie value.
476 |  * @stable ICU 63
477 |  */
478 | #define UCPTRIE_ASCII_GET(trie, dataAccess, c) dataAccess(trie, c)
479 | 
480 | /**
481 |  * Returns a trie value for a BMP code point (U+0000..U+FFFF), without range checking.
482 |  * Can be used to look up a value for a UTF-16 code unit if other parts of
483 |  * the string processing check for surrogates.
484 |  *
485 |  * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
486 |  * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
487 |  * @param c (UChar32, in) the input code point, must be U+0000..U+FFFF
488 |  * @return The BMP code point's trie value.
489 |  * @stable ICU 63
490 |  */
491 | #define UCPTRIE_FAST_BMP_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_FAST_INDEX(trie, c))
492 | 
493 | /**
494 |  * Returns a trie value for a supplementary code point (U+10000..U+10FFFF),
495 |  * without range checking.
496 |  *
497 |  * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
498 |  * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
499 |  * @param c (UChar32, in) the input code point, must be U+10000..U+10FFFF
500 |  * @return The supplementary code point's trie value.
501 |  * @stable ICU 63
502 |  */
503 | #define UCPTRIE_FAST_SUPP_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_SMALL_INDEX(trie, c))
504 | 
505 | /* Internal definitions ----------------------------------------------------- */
506 | 
507 | #ifndef U_IN_DOXYGEN
508 | 
509 | /**
510 |  * Internal implementation constants.
511 |  * These are needed for the API macros, but users should not use these directly.
512 |  * @internal
513 |  */
514 | enum {
515 |     /** @internal */
516 |     UCPTRIE_FAST_SHIFT = 6,
517 | 
518 |     /** Number of entries in a data block for code points below the fast limit. 64=0x40 @internal */
519 |     UCPTRIE_FAST_DATA_BLOCK_LENGTH = 1 << UCPTRIE_FAST_SHIFT,
520 | 
521 |     /** Mask for getting the lower bits for the in-fast-data-block offset. @internal */
522 |     UCPTRIE_FAST_DATA_MASK = UCPTRIE_FAST_DATA_BLOCK_LENGTH - 1,
523 | 
524 |     /** @internal */
525 |     UCPTRIE_SMALL_MAX = 0xfff,
526 | 
527 |     /**
528 |      * Offset from dataLength (to be subtracted) for fetching the
529 |      * value returned for out-of-range code points and ill-formed UTF-8/16.
530 |      * @internal
531 |      */
532 |     UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET = 1,
533 |     /**
534 |      * Offset from dataLength (to be subtracted) for fetching the
535 |      * value returned for code points highStart..U+10FFFF.
536 |      * @internal
537 |      */
538 |     UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET = 2
539 | };
540 | 
541 | /* Internal functions and macros -------------------------------------------- */
542 | // Do not conditionalize with #ifndef U_HIDE_INTERNAL_API, needed for public API
543 | 
544 | /** @internal */
545 | U_CAPI int32_t U_EXPORT2
546 | ucptrie_internalSmallU8Index(const UCPTrie *trie, int32_t lt1, uint8_t t2, uint8_t t3);
547 | 
548 | /**
549 |  * Internal function for part of the UCPTRIE_FAST_U8_PREVxx() macro implementations.
550 |  * Do not call directly.
551 |  * @internal
552 |  */
553 | U_CAPI int32_t U_EXPORT2
554 | ucptrie_internalU8PrevIndex(const UCPTrie *trie, UChar32 c,
555 |                             const uint8_t *start, const uint8_t *src);
556 | 
557 | /** Internal trie getter for a code point below the fast limit. Returns the data index. @internal */
558 | #define _UCPTRIE_FAST_INDEX(trie, c) \
559 |     ((int32_t)(trie)->index[(c) >> UCPTRIE_FAST_SHIFT] + ((c) & UCPTRIE_FAST_DATA_MASK))
560 | 
561 | /** Internal trie getter for a code point at or above the fast limit. Returns the data index. @internal */
562 | #define _UCPTRIE_SMALL_INDEX(trie, c) \
563 |     ((c) >= (trie)->highStart ? \
564 |         (trie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : \
565 |         ucptrie_internalSmallIndex(trie, c))
566 | 
567 | /**
568 |  * Internal trie getter for a code point, with checking that c is in U+0000..10FFFF.
569 |  * Returns the data index.
570 |  * @internal
571 |  */
572 | #define _UCPTRIE_CP_INDEX(trie, fastMax, c) \
573 |     ((uint32_t)(c) <= (uint32_t)(fastMax) ? \
574 |         _UCPTRIE_FAST_INDEX(trie, c) : \
575 |         (uint32_t)(c) <= 0x10ffff ? \
576 |             _UCPTRIE_SMALL_INDEX(trie, c) : \
577 |             (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET)
578 | 
579 | U_CDECL_END
580 | 
581 | #endif  // U_IN_DOXYGEN
582 | 
583 | #if U_SHOW_CPLUSPLUS_API
584 | 
585 | U_NAMESPACE_BEGIN
586 | 
587 | /**
588 |  * \class LocalUCPTriePointer
589 |  * "Smart pointer" class, closes a UCPTrie via ucptrie_close().
590 |  * For most methods see the LocalPointerBase base class.
591 |  *
592 |  * @see LocalPointerBase
593 |  * @see LocalPointer
594 |  * @stable ICU 63
595 |  */
596 | U_DEFINE_LOCAL_OPEN_POINTER(LocalUCPTriePointer, UCPTrie, ucptrie_close);
597 | 
598 | U_NAMESPACE_END
599 | 
600 | #endif  // U_SHOW_CPLUSPLUS_API
601 | 
602 | #endif


--------------------------------------------------------------------------------
/unicode/ucptrie_impl.h:
--------------------------------------------------------------------------------
  1 | // © 2017 and later: Unicode, Inc. and others.
  2 | // License & terms of use: http://www.unicode.org/copyright.html
  3 | 
  4 | // ucptrie_impl.h (modified from utrie2_impl.h)
  5 | // created: 2017dec29 Markus W. Scherer
  6 | 
  7 | #ifndef __UCPTRIE_IMPL_H__
  8 | #define __UCPTRIE_IMPL_H__
  9 | 
 10 | #include "ucptrie.h"
 11 | 
 12 | // UCPTrie signature values, in platform endianness and opposite endianness.
 13 | // The UCPTrie signature ASCII byte values spell "Tri3".
 14 | #define UCPTRIE_SIG     0x54726933
 15 | #define UCPTRIE_OE_SIG  0x33697254
 16 | 
 17 | /**
 18 |  * Header data for the binary, memory-mappable representation of a UCPTrie/CodePointTrie.
 19 |  * @internal
 20 |  */
 21 | struct UCPTrieHeader {
 22 |     /** "Tri3" in big-endian US-ASCII (0x54726933) */
 23 |     uint32_t signature;
 24 | 
 25 |     /**
 26 |      * Options bit field:
 27 |      * Bits 15..12: Data length bits 19..16.
 28 |      * Bits 11..8: Data null block offset bits 19..16.
 29 |      * Bits 7..6: UCPTrieType
 30 |      * Bits 5..3: Reserved (0).
 31 |      * Bits 2..0: UCPTrieValueWidth
 32 |      */
 33 |     uint16_t options;
 34 | 
 35 |     /** Total length of the index tables. */
 36 |     uint16_t indexLength;
 37 | 
 38 |     /** Data length bits 15..0. */
 39 |     uint16_t dataLength;
 40 | 
 41 |     /** Index-3 null block offset, 0x7fff or 0xffff if none. */
 42 |     uint16_t index3NullOffset;
 43 | 
 44 |     /** Data null block offset bits 15..0, 0xfffff if none. */
 45 |     uint16_t dataNullOffset;
 46 | 
 47 |     /**
 48 |      * First code point of the single-value range ending with U+10ffff,
 49 |      * rounded up and then shifted right by UCPTRIE_SHIFT_2.
 50 |      */
 51 |     uint16_t shiftedHighStart;
 52 | };
 53 | 
 54 | /**
 55 |  * Constants for use with UCPTrieHeader.options.
 56 |  * @internal
 57 |  */
 58 | enum {
 59 |     UCPTRIE_OPTIONS_DATA_LENGTH_MASK = 0xf000,
 60 |     UCPTRIE_OPTIONS_DATA_NULL_OFFSET_MASK = 0xf00,
 61 |     UCPTRIE_OPTIONS_RESERVED_MASK = 0x38,
 62 |     UCPTRIE_OPTIONS_VALUE_BITS_MASK = 7,
 63 |     /**
 64 |      * Value for index3NullOffset which indicates that there is no index-3 null block.
 65 |      * Bit 15 is unused for this value because this bit is used if the index-3 contains
 66 |      * 18-bit indexes.
 67 |      */
 68 |     UCPTRIE_NO_INDEX3_NULL_OFFSET = 0x7fff,
 69 |     UCPTRIE_NO_DATA_NULL_OFFSET = 0xfffff
 70 | };
 71 | 
 72 | // Internal constants.
 73 | enum {
 74 |     /** The length of the BMP index table. 1024=0x400 */
 75 |     UCPTRIE_BMP_INDEX_LENGTH = 0x10000 >> UCPTRIE_FAST_SHIFT,
 76 | 
 77 |     UCPTRIE_SMALL_LIMIT = 0x1000,
 78 |     UCPTRIE_SMALL_INDEX_LENGTH = UCPTRIE_SMALL_LIMIT >> UCPTRIE_FAST_SHIFT,
 79 | 
 80 |     /** Shift size for getting the index-3 table offset. */
 81 |     UCPTRIE_SHIFT_3 = 4,
 82 | 
 83 |     /** Shift size for getting the index-2 table offset. */
 84 |     UCPTRIE_SHIFT_2 = 5 + UCPTRIE_SHIFT_3,
 85 | 
 86 |     /** Shift size for getting the index-1 table offset. */
 87 |     UCPTRIE_SHIFT_1 = 5 + UCPTRIE_SHIFT_2,
 88 | 
 89 |     /**
 90 |      * Difference between two shift sizes,
 91 |      * for getting an index-2 offset from an index-3 offset. 5=9-4
 92 |      */
 93 |     UCPTRIE_SHIFT_2_3 = UCPTRIE_SHIFT_2 - UCPTRIE_SHIFT_3,
 94 | 
 95 |     /**
 96 |      * Difference between two shift sizes,
 97 |      * for getting an index-1 offset from an index-2 offset. 5=14-9
 98 |      */
 99 |     UCPTRIE_SHIFT_1_2 = UCPTRIE_SHIFT_1 - UCPTRIE_SHIFT_2,
100 | 
101 |     /**
102 |      * Number of index-1 entries for the BMP. (4)
103 |      * This part of the index-1 table is omitted from the serialized form.
104 |      */
105 |     UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> UCPTRIE_SHIFT_1,
106 | 
107 |     /** Number of entries in an index-2 block. 32=0x20 */
108 |     UCPTRIE_INDEX_2_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_1_2,
109 | 
110 |     /** Mask for getting the lower bits for the in-index-2-block offset. */
111 |     UCPTRIE_INDEX_2_MASK = UCPTRIE_INDEX_2_BLOCK_LENGTH - 1,
112 | 
113 |     /** Number of code points per index-2 table entry. 512=0x200 */
114 |     UCPTRIE_CP_PER_INDEX_2_ENTRY = 1 << UCPTRIE_SHIFT_2,
115 | 
116 |     /** Number of entries in an index-3 block. 32=0x20 */
117 |     UCPTRIE_INDEX_3_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_2_3,
118 | 
119 |     /** Mask for getting the lower bits for the in-index-3-block offset. */
120 |     UCPTRIE_INDEX_3_MASK = UCPTRIE_INDEX_3_BLOCK_LENGTH - 1,
121 | 
122 |     /** Number of entries in a small data block. 16=0x10 */
123 |     UCPTRIE_SMALL_DATA_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_3,
124 | 
125 |     /** Mask for getting the lower bits for the in-small-data-block offset. */
126 |     UCPTRIE_SMALL_DATA_MASK = UCPTRIE_SMALL_DATA_BLOCK_LENGTH - 1
127 | };
128 | 
129 | typedef UChar32
130 | UCPTrieGetRange(const void *trie, UChar32 start,
131 |                 UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
132 | 
133 | U_CFUNC UChar32
134 | ucptrie_internalGetRange(UCPTrieGetRange *getRange,
135 |                          const void *trie, UChar32 start,
136 |                          UCPMapRangeOption option, uint32_t surrogateValue,
137 |                          UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
138 | 
139 | #ifdef UCPTRIE_DEBUG
140 | U_CFUNC void
141 | ucptrie_printLengths(const UCPTrie *trie, const char *which);
142 | 
143 | U_CFUNC void umutablecptrie_setName(UMutableCPTrie *builder, const char *name);
144 | #endif
145 | 
146 | /*
147 |  * Format of the binary, memory-mappable representation of a UCPTrie/CodePointTrie.
148 |  * For overview information see https://icu.unicode.org/design/struct/utrie
149 |  *
150 |  * The binary trie data should be 32-bit-aligned.
151 |  * The overall layout is:
152 |  *
153 |  * UCPTrieHeader header; -- 16 bytes, see struct definition above
154 |  * uint16_t index[header.indexLength];
155 |  * uintXY_t data[header.dataLength];
156 |  *
157 |  * The trie data array is an array of uint16_t, uint32_t, or uint8_t,
158 |  * specified via the UCPTrieValueWidth when building the trie.
159 |  * The data array is 32-bit-aligned for uint32_t, otherwise 16-bit-aligned.
160 |  * The overall length of the trie data is a multiple of 4 bytes.
161 |  * (Padding is added at the end of the index array and/or near the end of the data array as needed.)
162 |  *
163 |  * The length of the data array (dataLength) is stored as an integer split across two fields
164 |  * of the header struct (high bits in header.options).
165 |  *
166 |  * The trie type can be "fast" or "small" which determines the index structure,
167 |  * specified via the UCPTrieType when building the trie.
168 |  *
169 |  * The type and valueWidth are stored in the header.options.
170 |  * There are reserved type and valueWidth values, and reserved header.options bits.
171 |  * They could be used in future format extensions.
172 |  * Code reading the trie structure must fail with an error when unknown values or options are set.
173 |  *
174 |  * Values for ASCII character (U+0000..U+007F) can always be found at the start of the data array.
175 |  *
176 |  * Values for code points below a type-specific fast-indexing limit are found via two-stage lookup.
177 |  * For a "fast" trie, the limit is the BMP/supplementary boundary at U+10000.
178 |  * For a "small" trie, the limit is UCPTRIE_SMALL_MAX+1=U+1000.
179 |  *
180 |  * All code points in the range highStart..U+10FFFF map to a single highValue
181 |  * which is stored at the second-to-last position of the data array.
182 |  * (See UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET.)
183 |  * The highStart value is header.shiftedHighStart<<UCPTRIE_SHIFT_2.
184 |  * (UCPTRIE_SHIFT_2=9)
185 |  *
186 |  * Values for code points fast_limit..highStart-1 are found via four-stage lookup.
187 |  * The data block size is smaller for this range than for the fast range.
188 |  * This together with more index stages with small blocks makes this range
189 |  * more easily compactable.
190 |  *
191 |  * There is also a trie error value stored at the last position of the data array.
192 |  * (See UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET.)
193 |  * It is intended to be returned for inputs that are not Unicode code points
194 |  * (outside U+0000..U+10FFFF), or in string processing for ill-formed input
195 |  * (unpaired surrogate in UTF-16, ill-formed UTF-8 subsequence).
196 |  *
197 |  * For a "fast" trie:
198 |  *
199 |  * The index array starts with the BMP index table for BMP code point lookup.
200 |  * Its length is 1024=0x400.
201 |  *
202 |  * The supplementary index-1 table follows the BMP index table.
203 |  * Variable length, for code points up to highStart-1.
204 |  * Maximum length 64=0x40=0x100000>>UCPTRIE_SHIFT_1.
205 |  * (For 0x100000 supplementary code points U+10000..U+10ffff.)
206 |  *
207 |  * After this index-1 table follow the variable-length index-3 and index-2 tables.
208 |  *
209 |  * The supplementary index tables are omitted completely
210 |  * if there is only BMP data (highStart<=U+10000).
211 |  *
212 |  * For a "small" trie:
213 |  *
214 |  * The index array starts with a fast-index table for lookup of code points U+0000..U+0FFF.
215 |  *
216 |  * The "supplementary" index tables are always stored.
217 |  * The index-1 table starts from U+0000, its maximum length is 68=0x44=0x110000>>UCPTRIE_SHIFT_1.
218 |  *
219 |  * For both trie types:
220 |  *
221 |  * The last index-2 block may be a partial block, storing indexes only for code points
222 |  * below highStart.
223 |  *
224 |  * Lookup for ASCII code point c:
225 |  *
226 |  * Linear access from the start of the data array.
227 |  *
228 |  * value = data[c];
229 |  *
230 |  * Lookup for fast-range code point c:
231 |  *
232 |  * Shift the code point right by UCPTRIE_FAST_SHIFT=6 bits,
233 |  * fetch the index array value at that offset,
234 |  * add the lower code point bits, index into the data array.
235 |  *
236 |  * value = data[index[c>>6] + (c&0x3f)];
237 |  *
238 |  * (This works for ASCII as well.)
239 |  *
240 |  * Lookup for small-range code point c below highStart:
241 |  *
242 |  * Split the code point into four bit fields using several sets of shifts & masks
243 |  * to read consecutive values from the index-1, index-2, index-3 and data tables.
244 |  *
245 |  * If all of the data block offsets in an index-3 block fit within 16 bits (up to 0xffff),
246 |  * then the data block offsets are stored directly as uint16_t.
247 |  *
248 |  * Otherwise (this is very unusual but possible), the index-2 entry for the index-3 block
249 |  * has bit 15 (0x8000) set, and each set of 8 index-3 entries is preceded by
250 |  * an additional uint16_t word. Data block offsets are 18 bits wide, with the top 2 bits stored
251 |  * in the additional word.
252 |  *
253 |  * See ucptrie_internalSmallIndex() for details.
254 |  *
255 |  * (In a "small" trie, this works for ASCII and below-fast_limit code points as well.)
256 |  *
257 |  * Compaction:
258 |  *
259 |  * Multiple code point ranges ("blocks") that are aligned on certain boundaries
260 |  * (determined by the shifting/bit fields of code points) and
261 |  * map to the same data values normally share a single subsequence of the data array.
262 |  * Data blocks can also overlap partially.
263 |  * (Depending on the builder code finding duplicate and overlapping blocks.)
264 |  *
265 |  * Iteration over same-value ranges:
266 |  *
267 |  * Range iteration (ucptrie_getRange()) walks the structure from a start code point
268 |  * until some code point is found that maps to a different value;
269 |  * the end of the returned range is just before that.
270 |  *
271 |  * The header.dataNullOffset (split across two header fields, high bits in header.options)
272 |  * is the offset of a widely shared data block filled with one single value.
273 |  * It helps quickly skip over large ranges of data with that value.
274 |  * The builder must ensure that if the start of any data block (fast or small)
275 |  * matches the dataNullOffset, then the whole block must be filled with the null value.
276 |  * Special care must be taken if there is no fast null data block
277 |  * but a small one, which is shorter, and it matches the *start* of some fast data block.
278 |  *
279 |  * Similarly, the header.index3NullOffset is the index-array offset of an index-3 block
280 |  * where all index entries point to the dataNullOffset.
281 |  * If there is no such data or index-3 block, then these offsets are set to
282 |  * values that cannot be reached (data offset out of range/reserved index offset),
283 |  * normally UCPTRIE_NO_DATA_NULL_OFFSET or UCPTRIE_NO_INDEX3_NULL_OFFSET respectively.
284 |  */
285 | 
286 | #endif
287 | 


--------------------------------------------------------------------------------
/unicode/udata.h:
--------------------------------------------------------------------------------
  1 | // © 2016 and later: Unicode, Inc. and others.
  2 | // License & terms of use: http://www.unicode.org/copyright.html
  3 | /*
  4 | ******************************************************************************
  5 | *
  6 | *   Copyright (C) 1999-2014, International Business Machines
  7 | *   Corporation and others.  All Rights Reserved.
  8 | *
  9 | ******************************************************************************
 10 | *   file name:  udata.h
 11 | *   encoding:   UTF-8
 12 | *   tab size:   8 (not used)
 13 | *   indentation:4
 14 | *
 15 | *   created on: 1999oct25
 16 | *   created by: Markus W. Scherer
 17 | */
 18 | 
 19 | #ifndef __UDATA_H__
 20 | #define __UDATA_H__
 21 | 
 22 | #include <unicode/utypes.h>
 23 | 
 24 | #if U_SHOW_CPLUSPLUS_API
 25 | #include "unicode/localpointer.h"
 26 | #endif   // U_SHOW_CPLUSPLUS_API
 27 | 
 28 | U_CDECL_BEGIN
 29 | 
 30 | /**
 31 |  * \file
 32 |  * \brief C API: Data loading interface
 33 |  *
 34 |  * <h2>Information about data loading interface</h2>
 35 |  *
 36 |  * This API is used to find and efficiently load data for ICU and applications
 37 |  * using ICU. It provides an abstract interface that specifies a data type and
 38 |  * name to find and load the data. Normally this API is used by other ICU APIs
 39 |  * to load required data out of the ICU data library, but it can be used to
 40 |  * load data out of other places.
 41 |  *
 42 |  * See the User Guide Data Management chapter.
 43 |  */
 44 |  
 45 | #ifndef U_HIDE_INTERNAL_API
 46 | /**
 47 |  * Character used to separate package names from tree names 
 48 |  * @internal ICU 3.0
 49 |  */
 50 | #define U_TREE_SEPARATOR '-'
 51 | 
 52 | /**
 53 |  * String used to separate package names from tree names 
 54 |  * @internal ICU 3.0
 55 |  */
 56 | #define U_TREE_SEPARATOR_STRING "-"
 57 | 
 58 | /**
 59 |  * Character used to separate parts of entry names
 60 |  * @internal ICU 3.0
 61 |  */
 62 | #define U_TREE_ENTRY_SEP_CHAR '/'
 63 | 
 64 | /**
 65 |  * String used to separate parts of entry names
 66 |  * @internal ICU 3.0
 67 |  */
 68 | #define U_TREE_ENTRY_SEP_STRING "/"
 69 | 
 70 | /**
 71 |  * Alias for standard ICU data 
 72 |  * @internal ICU 3.0
 73 |  */
 74 | #define U_ICUDATA_ALIAS "ICUDATA"
 75 | 
 76 | #endif /* U_HIDE_INTERNAL_API */
 77 | 
 78 | /**
 79 |  * UDataInfo contains the properties about the requested data.
 80 |  * This is meta data.
 81 |  *
 82 |  * <p>This structure may grow in the future, indicated by the
 83 |  * <code>size</code> field.</p>
 84 |  *
 85 |  * <p>ICU data must be at least 8-aligned, and should be 16-aligned.
 86 |  * The UDataInfo struct begins 4 bytes after the start of the data item,
 87 |  * so it is 4-aligned.
 88 |  *
 89 |  * <p>The platform data property fields help determine if a data
 90 |  * file can be efficiently used on a given machine.
 91 |  * The particular fields are of importance only if the data
 92 |  * is affected by the properties - if there is integer data
 93 |  * with word sizes > 1 byte, char* text, or UChar* text.</p>
 94 |  *
 95 |  * <p>The implementation for the <code>udata_open[Choice]()</code>
 96 |  * functions may reject data based on the value in <code>isBigEndian</code>.
 97 |  * No other field is used by the <code>udata</code> API implementation.</p>
 98 |  *
 99 |  * <p>The <code>dataFormat</code> may be used to identify
100 |  * the kind of data, e.g. a converter table.</p>
101 |  *
102 |  * <p>The <code>formatVersion</code> field should be used to
103 |  * make sure that the format can be interpreted.
104 |  * It may be a good idea to check only for the one or two highest
105 |  * of the version elements to allow the data memory to
106 |  * get more or somewhat rearranged contents, for as long
107 |  * as the using code can still interpret the older contents.</p>
108 |  *
109 |  * <p>The <code>dataVersion</code> field is intended to be a
110 |  * common place to store the source version of the data;
111 |  * for data from the Unicode character database, this could
112 |  * reflect the Unicode version.</p>
113 |  *
114 |  * @stable ICU 2.0
115 |  */
116 | typedef struct {
117 |     /** sizeof(UDataInfo)
118 |      *  @stable ICU 2.0 */
119 |     uint16_t size;
120 | 
121 |     /** unused, set to 0 
122 |      *  @stable ICU 2.0*/
123 |     uint16_t reservedWord;
124 | 
125 |     /* platform data properties */
126 |     /** 0 for little-endian machine, 1 for big-endian
127 |      *  @stable ICU 2.0 */
128 |     uint8_t isBigEndian;
129 | 
130 |     /** see U_CHARSET_FAMILY values in utypes.h 
131 |      *  @stable ICU 2.0*/
132 |     uint8_t charsetFamily;
133 | 
134 |     /** sizeof(UChar), one of { 1, 2, 4 } 
135 |      *  @stable ICU 2.0*/
136 |     uint8_t sizeofUChar;
137 | 
138 |     /** unused, set to 0 
139 |      *  @stable ICU 2.0*/
140 |     uint8_t reservedByte;
141 | 
142 |     /** data format identifier 
143 |      *  @stable ICU 2.0*/
144 |     uint8_t dataFormat[4];
145 | 
146 |     /** versions: [0] major [1] minor [2] milli [3] micro 
147 |      *  @stable ICU 2.0*/
148 |     uint8_t formatVersion[4];
149 | 
150 |     /** versions: [0] major [1] minor [2] milli [3] micro 
151 |      *  @stable ICU 2.0*/
152 |     uint8_t dataVersion[4];
153 | } UDataInfo;
154 | 
155 | /* API for reading data -----------------------------------------------------*/
156 | 
157 | /**
158 |  * Forward declaration of the data memory type.
159 |  * @stable ICU 2.0
160 |  */
161 | typedef struct UDataMemory UDataMemory;
162 | 
163 | /**
164 |  * Callback function for udata_openChoice().
165 |  * @param context parameter passed into <code>udata_openChoice()</code>.
166 |  * @param type The type of the data as passed into <code>udata_openChoice()</code>.
167 |  *             It may be <code>NULL</code>.
168 |  * @param name The name of the data as passed into <code>udata_openChoice()</code>.
169 |  * @param pInfo A pointer to the <code>UDataInfo</code> structure
170 |  *              of data that has been loaded and will be returned
171 |  *              by <code>udata_openChoice()</code> if this function
172 |  *              returns <code>true</code>.
173 |  * @return true if the current data memory is acceptable
174 |  * @stable ICU 2.0
175 |  */
176 | typedef UBool U_CALLCONV
177 | UDataMemoryIsAcceptable(void *context,
178 |                         const char *type, const char *name,
179 |                         const UDataInfo *pInfo);
180 | 
181 | 
182 | /**
183 |  * Convenience function.
184 |  * This function works the same as <code>udata_openChoice</code>
185 |  * except that any data that matches the type and name
186 |  * is assumed to be acceptable.
187 |  * @param path Specifies an absolute path and/or a basename for the
188 |  *             finding of the data in the file system.
189 |  *             <code>NULL</code> for ICU data.
190 |  * @param type A string that specifies the type of data to be loaded.
191 |  *             For example, resource bundles are loaded with type "res",
192 |  *             conversion tables with type "cnv".
193 |  *             This may be <code>NULL</code> or empty.
194 |  * @param name A string that specifies the name of the data.
195 |  * @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>.
196 |  * @return A pointer (handle) to a data memory object, or <code>NULL</code>
197 |  *         if an error occurs. Call <code>udata_getMemory()</code>
198 |  *         to get a pointer to the actual data.
199 |  *
200 |  * @see udata_openChoice
201 |  * @stable ICU 2.0
202 |  */
203 | U_CAPI UDataMemory * U_EXPORT2
204 | udata_open(const char *path, const char *type, const char *name,
205 |            UErrorCode *pErrorCode);
206 | 
207 | /**
208 |  * Data loading function.
209 |  * This function is used to find and load efficiently data for
210 |  * ICU and applications using ICU.
211 |  * It provides an abstract interface that allows to specify a data
212 |  * type and name to find and load the data.
213 |  *
214 |  * <p>The implementation depends on platform properties and user preferences
215 |  * and may involve loading shared libraries (DLLs), mapping
216 |  * files into memory, or fopen()/fread() files.
217 |  * It may also involve using static memory or database queries etc.
218 |  * Several or all data items may be combined into one entity
219 |  * (DLL, memory-mappable file).</p>
220 |  *
221 |  * <p>The data is always preceded by a header that includes
222 |  * a <code>UDataInfo</code> structure.
223 |  * The caller's <code>isAcceptable()</code> function is called to make
224 |  * sure that the data is useful. It may be called several times if it
225 |  * rejects the data and there is more than one location with data
226 |  * matching the type and name.</p>
227 |  *
228 |  * <p>If <code>path==NULL</code>, then ICU data is loaded.
229 |  * Otherwise, it is separated into a basename and a basename-less directory string.
230 |  * The basename is used as the data package name, and the directory is
231 |  * logically prepended to the ICU data directory string.</p>
232 |  *
233 |  * <p>For details about ICU data loading see the User Guide
234 |  * Data Management chapter. (https://unicode-org.github.io/icu/userguide/icu_data/)</p>
235 |  *
236 |  * @param path Specifies an absolute path and/or a basename for the
237 |  *             finding of the data in the file system.
238 |  *             <code>NULL</code> for ICU data.
239 |  * @param type A string that specifies the type of data to be loaded.
240 |  *             For example, resource bundles are loaded with type "res",
241 |  *             conversion tables with type "cnv".
242 |  *             This may be <code>NULL</code> or empty.
243 |  * @param name A string that specifies the name of the data.
244 |  * @param isAcceptable This function is called to verify that loaded data
245 |  *                     is useful for the client code. If it returns false
246 |  *                     for all data items, then <code>udata_openChoice()</code>
247 |  *                     will return with an error.
248 |  * @param context Arbitrary parameter to be passed into isAcceptable.
249 |  * @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>.
250 |  * @return A pointer (handle) to a data memory object, or <code>NULL</code>
251 |  *         if an error occurs. Call <code>udata_getMemory()</code>
252 |  *         to get a pointer to the actual data.
253 |  * @stable ICU 2.0
254 |  */
255 | U_CAPI UDataMemory * U_EXPORT2
256 | udata_openChoice(const char *path, const char *type, const char *name,
257 |                  UDataMemoryIsAcceptable *isAcceptable, void *context,
258 |                  UErrorCode *pErrorCode);
259 | 
260 | /**
261 |  * Close the data memory.
262 |  * This function must be called to allow the system to
263 |  * release resources associated with this data memory.
264 |  * @param pData The pointer to data memory object
265 |  * @stable ICU 2.0
266 |  */
267 | U_CAPI void U_EXPORT2
268 | udata_close(UDataMemory *pData);
269 | 
270 | /**
271 |  * Get the pointer to the actual data inside the data memory.
272 |  * The data is read-only.
273 |  *
274 |  * ICU data must be at least 8-aligned, and should be 16-aligned.
275 |  *
276 |  * @param pData The pointer to data memory object
277 |  * @stable ICU 2.0
278 |  */
279 | U_CAPI const void * U_EXPORT2
280 | udata_getMemory(UDataMemory *pData);
281 | 
282 | /**
283 |  * Get the information from the data memory header.
284 |  * This allows to get access to the header containing
285 |  * platform data properties etc. which is not part of
286 |  * the data itself and can therefore not be accessed
287 |  * via the pointer that <code>udata_getMemory()</code> returns.
288 |  *
289 |  * @param pData pointer to the data memory object
290 |  * @param pInfo pointer to a UDataInfo object;
291 |  *              its <code>size</code> field must be set correctly,
292 |  *              typically to <code>sizeof(UDataInfo)</code>.
293 |  *
294 |  * <code>*pInfo</code> will be filled with the UDataInfo structure
295 |  * in the data memory object. If this structure is smaller than
296 |  * <code>pInfo->size</code>, then the <code>size</code> will be
297 |  * adjusted and only part of the structure will be filled.
298 |  * @stable ICU 2.0
299 |  */
300 | U_CAPI void U_EXPORT2
301 | udata_getInfo(UDataMemory *pData, UDataInfo *pInfo);
302 | 
303 | /**
304 |  * This function bypasses the normal ICU data loading process and
305 |  * allows you to force ICU's system data to come out of a user-specified
306 |  * area in memory.
307 |  *
308 |  * ICU data must be at least 8-aligned, and should be 16-aligned.
309 |  * See https://unicode-org.github.io/icu/userguide/icudata
310 |  *
311 |  * The format of this data is that of the icu common data file, as is
312 |  * generated by the pkgdata tool with mode=common or mode=dll.
313 |  * You can read in a whole common mode file and pass the address to the start of the
314 |  * data, or (with the appropriate link options) pass in the pointer to
315 |  * the data that has been loaded from a dll by the operating system,
316 |  * as shown in this code:
317 |  *
318 |  *       extern const char U_IMPORT U_ICUDATA_ENTRY_POINT [];
319 |  *        // U_ICUDATA_ENTRY_POINT is same as entry point specified to pkgdata tool
320 |  *       UErrorCode  status = U_ZERO_ERROR;
321 |  *
322 |  *       udata_setCommonData(&U_ICUDATA_ENTRY_POINT, &status);
323 |  *
324 |  * It is important that the declaration be as above. The entry point
325 |  * must not be declared as an extern void*.
326 |  *
327 |  * Starting with ICU 4.4, it is possible to set several data packages,
328 |  * one per call to this function.
329 |  * udata_open() will look for data in the multiple data packages in the order
330 |  * in which they were set.
331 |  * The position of the linked-in or default-name ICU .data package in the
332 |  * search list depends on when the first data item is loaded that is not contained
333 |  * in the already explicitly set packages.
334 |  * If data was loaded implicitly before the first call to this function
335 |  * (for example, via opening a converter, constructing a UnicodeString
336 |  * from default-codepage data, using formatting or collation APIs, etc.),
337 |  * then the default data will be first in the list.
338 |  *
339 |  * This function has no effect on application (non ICU) data.  See udata_setAppData()
340 |  * for similar functionality for application data.
341 |  *
342 |  * @param data pointer to ICU common data
343 |  * @param err outgoing error status <code>U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR</code>
344 |  * @stable ICU 2.0
345 |  */
346 | U_CAPI void U_EXPORT2
347 | udata_setCommonData(const void *data, UErrorCode *err);
348 | 
349 | 
350 | /**
351 |  * This function bypasses the normal ICU data loading process for application-specific
352 |  * data and allows you to force the it to come out of a user-specified
353 |  * pointer.
354 |  *
355 |  * ICU data must be at least 8-aligned, and should be 16-aligned.
356 |  * See https://unicode-org.github.io/icu/userguide/icudata
357 |  *
358 |  * The format of this data is that of the icu common data file, like 'icudt26l.dat'
359 |  * or the corresponding shared library (DLL) file.
360 |  * The application must read in or otherwise construct an image of the data and then
361 |  * pass the address of it to this function.
362 |  *
363 |  *
364 |  * Warning:  setAppData will set a U_USING_DEFAULT_WARNING code if
365 |  *           data with the specified path that has already been opened, or
366 |  *           if setAppData with the same path has already been called.
367 |  *           Any such calls to setAppData will have no effect.
368 |  *
369 |  *
370 |  * @param packageName the package name by which the application will refer
371 |  *             to (open) this data
372 |  * @param data pointer to the data
373 |  * @param err outgoing error status <code>U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR</code>
374 |  * @see udata_setCommonData
375 |  * @stable ICU 2.0
376 |  */
377 | U_CAPI void U_EXPORT2
378 | udata_setAppData(const char *packageName, const void *data, UErrorCode *err);
379 | 
380 | /**
381 |  * Possible settings for udata_setFileAccess()
382 |  * @see udata_setFileAccess
383 |  * @stable ICU 3.4
384 |  */
385 | typedef enum UDataFileAccess {
386 |     /** ICU looks for data in single files first, then in packages. (default) @stable ICU 3.4 */
387 |     UDATA_FILES_FIRST,
388 |     /** An alias for the default access mode. @stable ICU 3.4 */
389 |     UDATA_DEFAULT_ACCESS = UDATA_FILES_FIRST,
390 |     /** ICU only loads data from packages, not from single files. @stable ICU 3.4 */
391 |     UDATA_ONLY_PACKAGES,
392 |     /** ICU loads data from packages first, and only from single files
393 |         if the data cannot be found in a package. @stable ICU 3.4 */
394 |     UDATA_PACKAGES_FIRST,
395 |     /** ICU does not access the file system for data loading. @stable ICU 3.4 */
396 |     UDATA_NO_FILES,
397 | #ifndef U_HIDE_DEPRECATED_API
398 |     /**
399 |      * Number of real UDataFileAccess values.
400 |      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
401 |      */
402 |     UDATA_FILE_ACCESS_COUNT
403 | #endif  // U_HIDE_DEPRECATED_API
404 | } UDataFileAccess;
405 | 
406 | /**
407 |  * This function may be called to control how ICU loads data. It must be called
408 |  * before any ICU data is loaded, including application data loaded with 
409 |  * ures/ResourceBundle or udata APIs. This function is not multithread safe.  
410 |  * The results of calling it while other threads are loading data are undefined.
411 |  * @param access The type of file access to be used
412 |  * @param status Error code.
413 |  * @see UDataFileAccess
414 |  * @stable ICU 3.4 
415 |  */
416 | U_CAPI void U_EXPORT2
417 | udata_setFileAccess(UDataFileAccess access, UErrorCode *status);
418 | 
419 | U_CDECL_END
420 | 
421 | #if U_SHOW_CPLUSPLUS_API
422 | 
423 | U_NAMESPACE_BEGIN
424 | 
425 | /**
426 |  * \class LocalUDataMemoryPointer
427 |  * "Smart pointer" class, closes a UDataMemory via udata_close().
428 |  * For most methods see the LocalPointerBase base class.
429 |  *
430 |  * @see LocalPointerBase
431 |  * @see LocalPointer
432 |  * @stable ICU 4.4
433 |  */
434 | U_DEFINE_LOCAL_OPEN_POINTER(LocalUDataMemoryPointer, UDataMemory, udata_close);
435 | 
436 | U_NAMESPACE_END
437 | 
438 | #endif  // U_SHOW_CPLUSPLUS_API
439 | 
440 | #endif


--------------------------------------------------------------------------------
/unicode/udatamem.h:
--------------------------------------------------------------------------------
 1 | // © 2016 and later: Unicode, Inc. and others.
 2 | // License & terms of use: http://www.unicode.org/copyright.html
 3 | /*
 4 | ******************************************************************************
 5 | *
 6 | *   Copyright (C) 1999-2010, International Business Machines
 7 | *   Corporation and others.  All Rights Reserved.
 8 | *
 9 | ******************************************************************************/
10 | 
11 | 
12 | /*----------------------------------------------------------------------------------
13 |  *
14 |  *  UDataMemory     A class-like struct that serves as a handle to a piece of memory
15 |  *                  that contains some ICU data (resource, converters, whatever.)
16 |  *
17 |  *                  When an application opens ICU data (with udata_open, for example,
18 |  *                  a UDataMemory * is returned.
19 |  *
20 |  *----------------------------------------------------------------------------------*/
21 | #ifndef __UDATAMEM_H__
22 | #define __UDATAMEM_H__
23 | 
24 | #include "udata.h"
25 | #include "ucmndata.h"
26 | 
27 | struct UDataMemory {
28 |     const commonDataFuncs  *vFuncs;      /* Function Pointers for accessing TOC             */
29 | 
30 |     const DataHeader *pHeader;     /* Header of the memory being described by this    */
31 |                                    /*   UDataMemory object.                           */
32 |     const void       *toc;         /* For common memory, table of contents for        */
33 |                                    /*   the pieces within.                            */
34 |     UBool             heapAllocated;  /* True if this UDataMemory Object is on the    */
35 |                                    /*  heap and thus needs to be deleted when closed. */
36 | 
37 |     void             *mapAddr;     /* For mapped or allocated memory, the start addr. */
38 |                                    /* Only non-null if a close operation should unmap */
39 |                                    /*  the associated data.                           */
40 |     void             *map;         /* Handle, or other data, OS dependent.            */
41 |                                    /* Only non-null if a close operation should unmap */
42 |                                    /*  the associated data, and additional info       */
43 |                                    /*   beyond the mapAddr is needed to do that.      */
44 |     int32_t           length;      /* Length of the data in bytes; -1 if unknown.     */
45 | };
46 | 
47 | U_CFUNC void         UDatamemory_assign  (UDataMemory *dest, UDataMemory *source);
48 | U_CFUNC UBool        UDataMemory_isLoaded(const UDataMemory *This);
49 | U_CFUNC void         UDataMemory_setData (UDataMemory *This, const void *dataAddr);
50 | 
51 | U_CFUNC const DataHeader *UDataMemory_normalizeDataPointer(const void *p);
52 | 
53 | U_CAPI int32_t U_EXPORT2
54 | udata_getLength(const UDataMemory *pData);
55 | 
56 | U_CAPI const void * U_EXPORT2
57 | udata_getRawMemory(const UDataMemory *pData);
58 | 
59 | #endif
60 | 


--------------------------------------------------------------------------------
/unicode/umapfile.h:
--------------------------------------------------------------------------------
 1 | // © 2016 and later: Unicode, Inc. and others.
 2 | // License & terms of use: http://www.unicode.org/copyright.html
 3 | /*
 4 | ******************************************************************************
 5 | *
 6 | *   Copyright (C) 1999-2011, International Business Machines
 7 | *   Corporation and others.  All Rights Reserved.
 8 | *
 9 | ******************************************************************************/
10 | 
11 | /*----------------------------------------------------------------------------------
12 |  *
13 |  *       Memory mapped file wrappers for use by the ICU Data Implementation
14 |  *
15 |  *           Porting note:  The implementation of these functions is very platform specific.
16 |  *             Not all platforms can do real memory mapping.  Those that can't
17 |  *             still must implement these functions, getting the data into memory using
18 |  *             whatever means are available.
19 |  *
20 |  *            These functions are part of the ICU internal implementation, and
21 |  *            are not intended to be used directly by applications.
22 |  *
23 |  *----------------------------------------------------------------------------------*/
24 | 
25 | #ifndef __UMAPFILE_H__
26 | #define __UMAPFILE_H__
27 | 
28 | #include <unicode/putil.h>
29 | #include "udata.h"
30 | #include "putilimp.h"
31 | 
32 | U_CFUNC UBool uprv_mapFile(UDataMemory *pdm, const char *path, UErrorCode *status);
33 | U_CFUNC void  uprv_unmapFile(UDataMemory *pData);
34 | 
35 | /* MAP_NONE: no memory mapping, no file access at all */
36 | #define MAP_NONE        0
37 | #define MAP_WIN32       1
38 | #define MAP_POSIX       2
39 | #define MAP_STDIO       3
40 | #define MAP_390DLL      4
41 | 
42 | #if UCONFIG_NO_FILE_IO
43 | #   define MAP_IMPLEMENTATION MAP_NONE
44 | #elif U_PLATFORM_USES_ONLY_WIN32_API
45 | #   define MAP_IMPLEMENTATION MAP_WIN32
46 | #elif U_HAVE_MMAP || U_PLATFORM == U_PF_OS390
47 | #   if U_PLATFORM == U_PF_OS390 && defined (OS390_STUBDATA)
48 |         /*   No memory mapping for 390 batch mode.  Fake it using dll loading.  */
49 | #       define MAP_IMPLEMENTATION MAP_390DLL
50 | #   else
51 | #       define MAP_IMPLEMENTATION MAP_POSIX
52 | #   endif
53 | #else /* unknown platform, no memory map implementation: use stdio.h and uprv_malloc() instead */
54 | #   define MAP_IMPLEMENTATION MAP_STDIO
55 | #endif
56 | 
57 | #endif
58 | 


--------------------------------------------------------------------------------
/unicode/umutex.h:
--------------------------------------------------------------------------------
  1 | // © 2016 and later: Unicode, Inc. and others.
  2 | // License & terms of use: http://www.unicode.org/copyright.html
  3 | /*
  4 | **********************************************************************
  5 | *   Copyright (C) 1997-2015, International Business Machines
  6 | *   Corporation and others.  All Rights Reserved.
  7 | **********************************************************************
  8 | *
  9 | * File UMUTEX.H
 10 | *
 11 | * Modification History:
 12 | *
 13 | *   Date        Name        Description
 14 | *   04/02/97  aliu        Creation.
 15 | *   04/07/99  srl         rewrite - C interface, multiple mutices
 16 | *   05/13/99  stephen     Changed to umutex (from cmutex)
 17 | ******************************************************************************
 18 | */
 19 | 
 20 | #ifndef UMUTEX_H
 21 | #define UMUTEX_H
 22 | 
 23 | #include <atomic>
 24 | #include <condition_variable>
 25 | #include <mutex>
 26 | #include <type_traits>
 27 | 
 28 | #include <unicode/utypes.h>
 29 | #include <unicode/uclean.h>
 30 | #include <unicode/uobject.h>
 31 | 
 32 | #include "putilimp.h"
 33 | 
 34 | #if defined(U_USER_ATOMICS_H) || defined(U_USER_MUTEX_H)
 35 | // Support for including an alternate implementation of atomic & mutex operations has been withdrawn.
 36 | // See issue ICU-20185.
 37 | #error U_USER_ATOMICS and U_USER_MUTEX_H are not supported
 38 | #endif
 39 | 
 40 | // Export an explicit template instantiation of std::atomic<int32_t>. 
 41 | // When building DLLs for Windows this is required as it is used as a data member of the exported SharedObject class.
 42 | // See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples.
 43 | //
 44 | // Similar story for std::atomic<std::mutex *>, and the exported UMutex class.
 45 | #if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN && !defined(U_IN_DOXYGEN)
 46 | #if defined(__clang__) || defined(_MSC_VER)
 47 |   #if defined(__clang__)
 48 |     // Suppress the warning that the explicit instantiation after explicit specialization has no effect.
 49 |     #pragma clang diagnostic push
 50 |     #pragma clang diagnostic ignored "-Winstantiation-after-specialization"
 51 |   #endif
 52 | template struct U_COMMON_API std::atomic<int32_t>;
 53 | template struct U_COMMON_API std::atomic<std::mutex *>;
 54 |   #if defined(__clang__)
 55 |     #pragma clang diagnostic pop
 56 |   #endif
 57 | #elif defined(__GNUC__)
 58 | // For GCC this class is already exported/visible, so no need for U_COMMON_API.
 59 | template struct std::atomic<int32_t>;
 60 | template struct std::atomic<std::mutex *>;
 61 | #endif
 62 | #endif
 63 | 
 64 | 
 65 | U_NAMESPACE_BEGIN
 66 | 
 67 | /****************************************************************************
 68 |  *
 69 |  *   Low Level Atomic Operations, ICU wrappers for.
 70 |  *
 71 |  ****************************************************************************/
 72 | 
 73 | typedef std::atomic<int32_t> u_atomic_int32_t;
 74 | #define ATOMIC_INT32_T_INITIALIZER(val) ATOMIC_VAR_INIT(val)
 75 | 
 76 | inline int32_t umtx_loadAcquire(u_atomic_int32_t &var) {
 77 |     return var.load(std::memory_order_acquire);
 78 | }
 79 | 
 80 | inline void umtx_storeRelease(u_atomic_int32_t &var, int32_t val) {
 81 |     var.store(val, std::memory_order_release);
 82 | }
 83 | 
 84 | inline int32_t umtx_atomic_inc(u_atomic_int32_t *var) {
 85 |     return var->fetch_add(1) + 1;
 86 | }
 87 | 
 88 | inline int32_t umtx_atomic_dec(u_atomic_int32_t *var) {
 89 |     return var->fetch_sub(1) - 1;
 90 | }
 91 | 
 92 | 
 93 | /*************************************************************************************************
 94 |  *
 95 |  *  UInitOnce Definitions.
 96 |  *
 97 |  *************************************************************************************************/
 98 | 
 99 | struct UInitOnce {
100 |     u_atomic_int32_t   fState;
101 |     UErrorCode       fErrCode;
102 |     void reset() {fState = 0;}
103 |     UBool isReset() {return umtx_loadAcquire(fState) == 0;}
104 | // Note: isReset() is used by service registration code.
105 | //                 Thread safety of this usage needs review.
106 | };
107 | 
108 | #define U_INITONCE_INITIALIZER {ATOMIC_INT32_T_INITIALIZER(0), U_ZERO_ERROR}
109 | 
110 | 
111 | U_COMMON_API UBool U_EXPORT2 umtx_initImplPreInit(UInitOnce &);
112 | U_COMMON_API void  U_EXPORT2 umtx_initImplPostInit(UInitOnce &);
113 | 
114 | template<class T> void umtx_initOnce(UInitOnce &uio, T *obj, void (U_CALLCONV T::*fp)()) {
115 |     if (umtx_loadAcquire(uio.fState) == 2) {
116 |         return;
117 |     }
118 |     if (umtx_initImplPreInit(uio)) {
119 |         (obj->*fp)();
120 |         umtx_initImplPostInit(uio);
121 |     }
122 | }
123 | 
124 | 
125 | // umtx_initOnce variant for plain functions, or static class functions.
126 | //               No context parameter.
127 | inline void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)()) {
128 |     if (umtx_loadAcquire(uio.fState) == 2) {
129 |         return;
130 |     }
131 |     if (umtx_initImplPreInit(uio)) {
132 |         (*fp)();
133 |         umtx_initImplPostInit(uio);
134 |     }
135 | }
136 | 
137 | // umtx_initOnce variant for plain functions, or static class functions.
138 | //               With ErrorCode, No context parameter.
139 | inline void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)(UErrorCode &), UErrorCode &errCode) {
140 |     if (U_FAILURE(errCode)) {
141 |         return;
142 |     }
143 |     if (umtx_loadAcquire(uio.fState) != 2 && umtx_initImplPreInit(uio)) {
144 |         // We run the initialization.
145 |         (*fp)(errCode);
146 |         uio.fErrCode = errCode;
147 |         umtx_initImplPostInit(uio);
148 |     } else {
149 |         // Someone else already ran the initialization.
150 |         if (U_FAILURE(uio.fErrCode)) {
151 |             errCode = uio.fErrCode;
152 |         }
153 |     }
154 | }
155 | 
156 | // umtx_initOnce variant for plain functions, or static class functions,
157 | //               with a context parameter.
158 | template<class T> void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)(T), T context) {
159 |     if (umtx_loadAcquire(uio.fState) == 2) {
160 |         return;
161 |     }
162 |     if (umtx_initImplPreInit(uio)) {
163 |         (*fp)(context);
164 |         umtx_initImplPostInit(uio);
165 |     }
166 | }
167 | 
168 | // umtx_initOnce variant for plain functions, or static class functions,
169 | //               with a context parameter and an error code.
170 | template<class T> void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)(T, UErrorCode &), T context, UErrorCode &errCode) {
171 |     if (U_FAILURE(errCode)) {
172 |         return;
173 |     }
174 |     if (umtx_loadAcquire(uio.fState) != 2 && umtx_initImplPreInit(uio)) {
175 |         // We run the initialization.
176 |         (*fp)(context, errCode);
177 |         uio.fErrCode = errCode;
178 |         umtx_initImplPostInit(uio);
179 |     } else {
180 |         // Someone else already ran the initialization.
181 |         if (U_FAILURE(uio.fErrCode)) {
182 |             errCode = uio.fErrCode;
183 |         }
184 |     }
185 | }
186 | 
187 | // UMutex should be constexpr-constructible, so that no initialization code
188 | // is run during startup.
189 | // This works on all C++ libraries except MS VS before VS2019.
190 | #if (defined(_CPPLIB_VER) && !defined(_MSVC_STL_VERSION)) || \
191 |     (defined(_MSVC_STL_VERSION) && _MSVC_STL_VERSION < 142)
192 |     // (VS std lib older than VS2017) || (VS std lib version < VS2019)
193 | #   define UMUTEX_CONSTEXPR
194 | #else
195 | #   define UMUTEX_CONSTEXPR constexpr
196 | #endif
197 | 
198 | /**
199 |  * UMutex - ICU Mutex class.
200 |  *
201 |  * This is the preferred Mutex class for use within ICU implementation code.
202 |  * It is a thin wrapper over C++ std::mutex, with these additions:
203 |  *    - Static instances are safe, not triggering static construction or destruction,
204 |  *      and the associated order of construction or destruction issues.
205 |  *    - Plumbed into u_cleanup() for destructing the underlying std::mutex,
206 |  *      which frees any OS level resources they may be holding.
207 |  *
208 |  * Limitations:
209 |  *    - Static or global instances only. Cannot be heap allocated. Cannot appear as a
210 |  *      member of another class.
211 |  *    - No condition variables or other advanced features. If needed, you will need to use
212 |  *      std::mutex and std::condition_variable directly. For an example, see unifiedcache.cpp
213 |  *
214 |  * Typical Usage:
215 |  *    static UMutex myMutex;
216 |  *
217 |  *    {
218 |  *       Mutex lock(myMutex);
219 |  *       ...    // Do stuff that is protected by myMutex;
220 |  *    }         // myMutex is released when lock goes out of scope.
221 |  */
222 | 
223 | class U_COMMON_API UMutex {
224 | public:
225 |     UMUTEX_CONSTEXPR UMutex() {}
226 |     ~UMutex() = default;
227 | 
228 |     UMutex(const UMutex &other) = delete;
229 |     UMutex &operator =(const UMutex &other) = delete;
230 |     void *operator new(size_t) = delete;
231 | 
232 |     // requirements for C++ BasicLockable, allows UMutex to work with std::lock_guard
233 |     void lock() {
234 |         std::mutex *m = fMutex.load(std::memory_order_acquire);
235 |         if (m == nullptr) { m = getMutex(); }
236 |         m->lock();
237 |     }
238 |     void unlock() { fMutex.load(std::memory_order_relaxed)->unlock(); }
239 | 
240 |     static void cleanup();
241 | 
242 | private:
243 |     alignas(std::mutex) char fStorage[sizeof(std::mutex)] {};
244 |     std::atomic<std::mutex *> fMutex { nullptr };
245 | 
246 |     /** All initialized UMutexes are kept in a linked list, so that they can be found,
247 |      * and the underlying std::mutex destructed, by u_cleanup().
248 |      */
249 |     UMutex *fListLink { nullptr };
250 |     static UMutex *gListHead;
251 | 
252 |     /** Out-of-line function to lazily initialize a UMutex on first use.
253 |      * Initial fast check is inline, in lock().  The returned value may never
254 |      * be nullptr.
255 |      */
256 |     std::mutex *getMutex();
257 | };
258 | 
259 | 
260 | /* Lock a mutex.
261 |  * @param mutex The given mutex to be locked.  Pass NULL to specify
262 |  *              the global ICU mutex.  Recursive locks are an error
263 |  *              and may cause a deadlock on some platforms.
264 |  */
265 | U_CAPI void U_EXPORT2 umtx_lock(UMutex* mutex);
266 | 
267 | /* Unlock a mutex.
268 |  * @param mutex The given mutex to be unlocked.  Pass NULL to specify
269 |  *              the global ICU mutex.
270 |  */
271 | U_CAPI void U_EXPORT2 umtx_unlock (UMutex* mutex);
272 | 
273 | 
274 | U_NAMESPACE_END
275 | 
276 | #endif /* UMUTEX_H */
277 | /*eof*/


--------------------------------------------------------------------------------
/unicode/uset_imp.h:
--------------------------------------------------------------------------------
 1 | // © 2016 and later: Unicode, Inc. and others.
 2 | // License & terms of use: http://www.unicode.org/copyright.html
 3 | /*
 4 | *******************************************************************************
 5 | *
 6 | *   Copyright (C) 2004-2007, International Business Machines
 7 | *   Corporation and others.  All Rights Reserved.
 8 | *
 9 | *******************************************************************************
10 | *   file name:  uset_imp.h
11 | *   encoding:   UTF-8
12 | *   tab size:   8 (not used)
13 | *   indentation:4
14 | *
15 | *   created on: 2004sep07
16 | *   created by: Markus W. Scherer
17 | *
18 | *   Internal USet definitions.
19 | */
20 | 
21 | #ifndef __USET_IMP_H__
22 | #define __USET_IMP_H__
23 | 
24 | #include <unicode/utypes.h>
25 | #include <unicode/uset.h>
26 | 
27 | U_CDECL_BEGIN
28 | 
29 | typedef void U_CALLCONV
30 | USetAdd(USet *set, UChar32 c);
31 | 
32 | typedef void U_CALLCONV
33 | USetAddRange(USet *set, UChar32 start, UChar32 end);
34 | 
35 | typedef void U_CALLCONV
36 | USetAddString(USet *set, const UChar *str, int32_t length);
37 | 
38 | typedef void U_CALLCONV
39 | USetRemove(USet *set, UChar32 c);
40 | 
41 | typedef void U_CALLCONV
42 | USetRemoveRange(USet *set, UChar32 start, UChar32 end);
43 | 
44 | /**
45 |  * Interface for adding items to a USet, to keep low-level code from
46 |  * statically depending on the USet implementation.
47 |  * Calls will look like sa->add(sa->set, c);
48 |  */
49 | struct USetAdder {
50 |     USet *set;
51 |     USetAdd *add;
52 |     USetAddRange *addRange;
53 |     USetAddString *addString;
54 |     USetRemove *remove;
55 |     USetRemoveRange *removeRange;
56 | };
57 | typedef struct USetAdder USetAdder;
58 | 
59 | U_CDECL_END
60 | 
61 | #endif
62 | 


--------------------------------------------------------------------------------
/unicode/ustringtrie.h:
--------------------------------------------------------------------------------
 1 | // © 2016 and later: Unicode, Inc. and others.
 2 | // License & terms of use: http://www.unicode.org/copyright.html
 3 | /*
 4 | *******************************************************************************
 5 | *   Copyright (C) 2010-2012, International Business Machines
 6 | *   Corporation and others.  All Rights Reserved.
 7 | *******************************************************************************
 8 | *   file name:  udicttrie.h
 9 | *   encoding:   UTF-8
10 | *   tab size:   8 (not used)
11 | *   indentation:4
12 | *
13 | *   created on: 2010dec17
14 | *   created by: Markus W. Scherer
15 | */
16 | 
17 | #ifndef __USTRINGTRIE_H__
18 | #define __USTRINGTRIE_H__
19 | 
20 | /**
21 |  * \file
22 |  * \brief C API: Helper definitions for dictionary trie APIs.
23 |  */
24 | 
25 | #include <unicode/utypes.h>
26 | 
27 | 
28 | /**
29 |  * Return values for BytesTrie::next(), UCharsTrie::next() and similar methods.
30 |  * @see USTRINGTRIE_MATCHES
31 |  * @see USTRINGTRIE_HAS_VALUE
32 |  * @see USTRINGTRIE_HAS_NEXT
33 |  * @stable ICU 4.8
34 |  */
35 | enum UStringTrieResult {
36 |     /**
37 |      * The input unit(s) did not continue a matching string.
38 |      * Once current()/next() return USTRINGTRIE_NO_MATCH,
39 |      * all further calls to current()/next() will also return USTRINGTRIE_NO_MATCH,
40 |      * until the trie is reset to its original state or to a saved state.
41 |      * @stable ICU 4.8
42 |      */
43 |     USTRINGTRIE_NO_MATCH,
44 |     /**
45 |      * The input unit(s) continued a matching string
46 |      * but there is no value for the string so far.
47 |      * (It is a prefix of a longer string.)
48 |      * @stable ICU 4.8
49 |      */
50 |     USTRINGTRIE_NO_VALUE,
51 |     /**
52 |      * The input unit(s) continued a matching string
53 |      * and there is a value for the string so far.
54 |      * This value will be returned by getValue().
55 |      * No further input byte/unit can continue a matching string.
56 |      * @stable ICU 4.8
57 |      */
58 |     USTRINGTRIE_FINAL_VALUE,
59 |     /**
60 |      * The input unit(s) continued a matching string
61 |      * and there is a value for the string so far.
62 |      * This value will be returned by getValue().
63 |      * Another input byte/unit can continue a matching string.
64 |      * @stable ICU 4.8
65 |      */
66 |     USTRINGTRIE_INTERMEDIATE_VALUE
67 | };
68 | 
69 | /**
70 |  * Same as (result!=USTRINGTRIE_NO_MATCH).
71 |  * @param result A result from BytesTrie::first(), UCharsTrie::next() etc.
72 |  * @return true if the input bytes/units so far are part of a matching string/byte sequence.
73 |  * @stable ICU 4.8
74 |  */
75 | #define USTRINGTRIE_MATCHES(result) ((result)!=USTRINGTRIE_NO_MATCH)
76 | 
77 | /**
78 |  * Equivalent to (result==USTRINGTRIE_INTERMEDIATE_VALUE || result==USTRINGTRIE_FINAL_VALUE) but
79 |  * this macro evaluates result exactly once.
80 |  * @param result A result from BytesTrie::first(), UCharsTrie::next() etc.
81 |  * @return true if there is a value for the input bytes/units so far.
82 |  * @see BytesTrie::getValue
83 |  * @see UCharsTrie::getValue
84 |  * @stable ICU 4.8
85 |  */
86 | #define USTRINGTRIE_HAS_VALUE(result) ((result)>=USTRINGTRIE_FINAL_VALUE)
87 | 
88 | /**
89 |  * Equivalent to (result==USTRINGTRIE_NO_VALUE || result==USTRINGTRIE_INTERMEDIATE_VALUE) but
90 |  * this macro evaluates result exactly once.
91 |  * @param result A result from BytesTrie::first(), UCharsTrie::next() etc.
92 |  * @return true if another input byte/unit can continue a matching string.
93 |  * @stable ICU 4.8
94 |  */
95 | #define USTRINGTRIE_HAS_NEXT(result) ((result)&1)
96 | 
97 | #endif  /* __USTRINGTRIE_H__ */


--------------------------------------------------------------------------------
/uset.h:
--------------------------------------------------------------------------------
 1 | #ifndef _MY_USET_H_
 2 | #define _MY_USET_H_
 3 | 
 4 | #include <unicode/utypes.h>
 5 | 
 6 | typedef struct USet USet;
 7 | 
 8 | extern "C" {
 9 |     USet *uset_openEmpty(void);
10 |     void uset_close(USet *);
11 |     void uset_freeze(USet *);
12 |     void uset_add(USet *, UChar32);
13 |     UBool uset_contains(USet *, UChar32);
14 | }
15 | 
16 | #endif


--------------------------------------------------------------------------------