├── .gitignore ├── GTMDefines.h ├── GTMNSString+HTML.h ├── GTMNSString+HTML.m ├── GTMNSString-HTML.podspec ├── LICENSE ├── NSString+HTML.h ├── NSString+HTML.m └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Xcode 2 | .DS_Store 3 | build/ 4 | *.pbxuser 5 | !default.pbxuser 6 | *.mode1v3 7 | !default.mode1v3 8 | *.mode2v3 9 | !default.mode2v3 10 | *.perspectivev3 11 | !default.perspectivev3 12 | *.xcworkspace 13 | !default.xcworkspace 14 | xcuserdata 15 | profile 16 | *.moved-aside 17 | DerivedData 18 | .idea/ 19 | -------------------------------------------------------------------------------- /GTMDefines.h: -------------------------------------------------------------------------------- 1 | // 2 | // GTMDefines.h 3 | // 4 | // Copyright 2008 Google Inc. 5 | // 6 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not 7 | // use this file except in compliance with the License. You may obtain a copy 8 | // of the License at 9 | // 10 | // http://www.apache.org/licenses/LICENSE-2.0 11 | // 12 | // Unless required by applicable law or agreed to in writing, software 13 | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 15 | // License for the specific language governing permissions and limitations under 16 | // the License. 17 | // 18 | 19 | // ============================================================================ 20 | 21 | #include 22 | #include 23 | 24 | #if TARGET_OS_IPHONE 25 | #include 26 | #endif // TARGET_OS_IPHONE 27 | 28 | // Not all MAC_OS_X_VERSION_10_X macros defined in past SDKs 29 | #ifndef MAC_OS_X_VERSION_10_5 30 | #define MAC_OS_X_VERSION_10_5 1050 31 | #endif 32 | #ifndef MAC_OS_X_VERSION_10_6 33 | #define MAC_OS_X_VERSION_10_6 1060 34 | #endif 35 | 36 | // Not all __IPHONE_X macros defined in past SDKs 37 | #ifndef __IPHONE_2_1 38 | #define __IPHONE_2_1 20100 39 | #endif 40 | #ifndef __IPHONE_2_2 41 | #define __IPHONE_2_2 20200 42 | #endif 43 | #ifndef __IPHONE_3_0 44 | #define __IPHONE_3_0 30000 45 | #endif 46 | #ifndef __IPHONE_3_1 47 | #define __IPHONE_3_1 30100 48 | #endif 49 | #ifndef __IPHONE_3_2 50 | #define __IPHONE_3_2 30200 51 | #endif 52 | #ifndef __IPHONE_4_0 53 | #define __IPHONE_4_0 40000 54 | #endif 55 | 56 | // ---------------------------------------------------------------------------- 57 | // CPP symbols that can be overridden in a prefix to control how the toolbox 58 | // is compiled. 59 | // ---------------------------------------------------------------------------- 60 | 61 | 62 | // By setting the GTM_CONTAINERS_VALIDATION_FAILED_LOG and 63 | // GTM_CONTAINERS_VALIDATION_FAILED_ASSERT macros you can control what happens 64 | // when a validation fails. If you implement your own validators, you may want 65 | // to control their internals using the same macros for consistency. 66 | #ifndef GTM_CONTAINERS_VALIDATION_FAILED_ASSERT 67 | #define GTM_CONTAINERS_VALIDATION_FAILED_ASSERT 0 68 | #endif 69 | 70 | // Give ourselves a consistent way to do inlines. Apple's macros even use 71 | // a few different actual definitions, so we're based off of the foundation 72 | // one. 73 | #if !defined(GTM_INLINE) 74 | #if defined (__GNUC__) && (__GNUC__ == 4) 75 | #define GTM_INLINE static __inline__ __attribute__((always_inline)) 76 | #else 77 | #define GTM_INLINE static __inline__ 78 | #endif 79 | #endif 80 | 81 | // Give ourselves a consistent way of doing externs that links up nicely 82 | // when mixing objc and objc++ 83 | #if !defined (GTM_EXTERN) 84 | #if defined __cplusplus 85 | #define GTM_EXTERN extern "C" 86 | #else 87 | #define GTM_EXTERN extern 88 | #endif 89 | #endif 90 | 91 | // Give ourselves a consistent way of exporting things if we have visibility 92 | // set to hidden. 93 | #if !defined (GTM_EXPORT) 94 | #define GTM_EXPORT __attribute__((visibility("default"))) 95 | #endif 96 | 97 | // _GTMDevLog & _GTMDevAssert 98 | // 99 | // _GTMDevLog & _GTMDevAssert are meant to be a very lightweight shell for 100 | // developer level errors. This implementation simply macros to NSLog/NSAssert. 101 | // It is not intended to be a general logging/reporting system. 102 | // 103 | // Please see http://code.google.com/p/google-toolbox-for-mac/wiki/DevLogNAssert 104 | // for a little more background on the usage of these macros. 105 | // 106 | // _GTMDevLog log some error/problem in debug builds 107 | // _GTMDevAssert assert if conditon isn't met w/in a method/function 108 | // in all builds. 109 | // 110 | // To replace this system, just provide different macro definitions in your 111 | // prefix header. Remember, any implementation you provide *must* be thread 112 | // safe since this could be called by anything in what ever situtation it has 113 | // been placed in. 114 | // 115 | 116 | // We only define the simple macros if nothing else has defined this. 117 | #ifndef _GTMDevLog 118 | 119 | #ifdef DEBUG 120 | #define _GTMDevLog(...) NSLog(__VA_ARGS__) 121 | #else 122 | #define _GTMDevLog(...) do { } while (0) 123 | #endif 124 | 125 | #endif // _GTMDevLog 126 | 127 | // Declared here so that it can easily be used for logging tracking if 128 | // necessary. See GTMUnitTestDevLog.h for details. 129 | @class NSString; 130 | GTM_EXTERN void _GTMUnitTestDevLog(NSString *format, ...); 131 | 132 | #ifndef _GTMDevAssert 133 | // we directly invoke the NSAssert handler so we can pass on the varargs 134 | // (NSAssert doesn't have a macro we can use that takes varargs) 135 | #if !defined(NS_BLOCK_ASSERTIONS) 136 | #define _GTMDevAssert(condition, ...) \ 137 | do { \ 138 | if (!(condition)) { \ 139 | [[NSAssertionHandler currentHandler] \ 140 | handleFailureInFunction:[NSString stringWithUTF8String:__PRETTY_FUNCTION__] \ 141 | file:[NSString stringWithUTF8String:__FILE__] \ 142 | lineNumber:__LINE__ \ 143 | description:__VA_ARGS__]; \ 144 | } \ 145 | } while(0) 146 | #else // !defined(NS_BLOCK_ASSERTIONS) 147 | #define _GTMDevAssert(condition, ...) do { } while (0) 148 | #endif // !defined(NS_BLOCK_ASSERTIONS) 149 | 150 | #endif // _GTMDevAssert 151 | 152 | // _GTMCompileAssert 153 | // _GTMCompileAssert is an assert that is meant to fire at compile time if you 154 | // want to check things at compile instead of runtime. For example if you 155 | // want to check that a wchar is 4 bytes instead of 2 you would use 156 | // _GTMCompileAssert(sizeof(wchar_t) == 4, wchar_t_is_4_bytes_on_OS_X) 157 | // Note that the second "arg" is not in quotes, and must be a valid processor 158 | // symbol in it's own right (no spaces, punctuation etc). 159 | 160 | // Wrapping this in an #ifndef allows external groups to define their own 161 | // compile time assert scheme. 162 | #ifndef _GTMCompileAssert 163 | // We got this technique from here: 164 | // http://unixjunkie.blogspot.com/2007/10/better-compile-time-asserts_29.html 165 | 166 | #define _GTMCompileAssertSymbolInner(line, msg) _GTMCOMPILEASSERT ## line ## __ ## msg 167 | #define _GTMCompileAssertSymbol(line, msg) _GTMCompileAssertSymbolInner(line, msg) 168 | #define _GTMCompileAssert(test, msg) \ 169 | typedef char _GTMCompileAssertSymbol(__LINE__, msg) [ ((test) ? 1 : -1) ] 170 | #endif // _GTMCompileAssert 171 | 172 | // Macro to allow you to create NSStrings out of other macros. 173 | // #define FOO foo 174 | // NSString *fooString = GTM_NSSTRINGIFY(FOO); 175 | #if !defined (GTM_NSSTRINGIFY) 176 | #define GTM_NSSTRINGIFY_INNER(x) @#x 177 | #define GTM_NSSTRINGIFY(x) GTM_NSSTRINGIFY_INNER(x) 178 | #endif 179 | 180 | // Macro to allow fast enumeration when building for 10.5 or later, and 181 | // reliance on NSEnumerator for 10.4. Remember, NSDictionary w/ FastEnumeration 182 | // does keys, so pick the right thing, nothing is done on the FastEnumeration 183 | // side to be sure you're getting what you wanted. 184 | #ifndef GTM_FOREACH_OBJECT 185 | #if TARGET_OS_IPHONE || !(MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_5) 186 | #define GTM_FOREACH_ENUMEREE(element, enumeration) \ 187 | for (element in enumeration) 188 | #define GTM_FOREACH_OBJECT(element, collection) \ 189 | for (element in collection) 190 | #define GTM_FOREACH_KEY(element, collection) \ 191 | for (element in collection) 192 | #else 193 | #define GTM_FOREACH_ENUMEREE(element, enumeration) \ 194 | for (NSEnumerator *_ ## element ## _enum = enumeration; \ 195 | (element = [_ ## element ## _enum nextObject]) != nil; ) 196 | #define GTM_FOREACH_OBJECT(element, collection) \ 197 | GTM_FOREACH_ENUMEREE(element, [collection objectEnumerator]) 198 | #define GTM_FOREACH_KEY(element, collection) \ 199 | GTM_FOREACH_ENUMEREE(element, [collection keyEnumerator]) 200 | #endif 201 | #endif 202 | 203 | // ============================================================================ 204 | 205 | // ---------------------------------------------------------------------------- 206 | // CPP symbols defined based on the project settings so the GTM code has 207 | // simple things to test against w/o scattering the knowledge of project 208 | // setting through all the code. 209 | // ---------------------------------------------------------------------------- 210 | 211 | // Provide a single constant CPP symbol that all of GTM uses for ifdefing 212 | // iPhone code. 213 | #if TARGET_OS_IPHONE // iPhone SDK 214 | // For iPhone specific stuff 215 | #define GTM_IPHONE_SDK 1 216 | #if TARGET_IPHONE_SIMULATOR 217 | #define GTM_IPHONE_SIMULATOR 1 218 | #else 219 | #define GTM_IPHONE_DEVICE 1 220 | #endif // TARGET_IPHONE_SIMULATOR 221 | #else 222 | // For MacOS specific stuff 223 | #define GTM_MACOS_SDK 1 224 | #endif 225 | 226 | // Some of our own availability macros 227 | #if GTM_MACOS_SDK 228 | #define GTM_AVAILABLE_ONLY_ON_IPHONE UNAVAILABLE_ATTRIBUTE 229 | #define GTM_AVAILABLE_ONLY_ON_MACOS 230 | #else 231 | #define GTM_AVAILABLE_ONLY_ON_IPHONE 232 | #define GTM_AVAILABLE_ONLY_ON_MACOS UNAVAILABLE_ATTRIBUTE 233 | #endif 234 | 235 | // Provide a symbol to include/exclude extra code for GC support. (This mainly 236 | // just controls the inclusion of finalize methods). 237 | #ifndef GTM_SUPPORT_GC 238 | #if GTM_IPHONE_SDK 239 | // iPhone never needs GC 240 | #define GTM_SUPPORT_GC 0 241 | #else 242 | // We can't find a symbol to tell if GC is supported/required, so best we 243 | // do on Mac targets is include it if we're on 10.5 or later. 244 | #if MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_5 245 | #define GTM_SUPPORT_GC 0 246 | #else 247 | #define GTM_SUPPORT_GC 1 248 | #endif 249 | #endif 250 | #endif 251 | 252 | // To simplify support for 64bit (and Leopard in general), we provide the type 253 | // defines for non Leopard SDKs 254 | #if !(MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_5) 255 | // NSInteger/NSUInteger and Max/Mins 256 | #ifndef NSINTEGER_DEFINED 257 | #if __LP64__ || NS_BUILD_32_LIKE_64 258 | typedef long NSInteger; 259 | typedef unsigned long NSUInteger; 260 | #else 261 | typedef int NSInteger; 262 | typedef unsigned int NSUInteger; 263 | #endif 264 | #define NSIntegerMax LONG_MAX 265 | #define NSIntegerMin LONG_MIN 266 | #define NSUIntegerMax ULONG_MAX 267 | #define NSINTEGER_DEFINED 1 268 | #endif // NSINTEGER_DEFINED 269 | // CGFloat 270 | #ifndef CGFLOAT_DEFINED 271 | #if defined(__LP64__) && __LP64__ 272 | // This really is an untested path (64bit on Tiger?) 273 | typedef double CGFloat; 274 | #define CGFLOAT_MIN DBL_MIN 275 | #define CGFLOAT_MAX DBL_MAX 276 | #define CGFLOAT_IS_DOUBLE 1 277 | #else /* !defined(__LP64__) || !__LP64__ */ 278 | typedef float CGFloat; 279 | #define CGFLOAT_MIN FLT_MIN 280 | #define CGFLOAT_MAX FLT_MAX 281 | #define CGFLOAT_IS_DOUBLE 0 282 | #endif /* !defined(__LP64__) || !__LP64__ */ 283 | #define CGFLOAT_DEFINED 1 284 | #endif // CGFLOAT_DEFINED 285 | #endif // MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_5 286 | 287 | // Some support for advanced clang static analysis functionality 288 | // See http://clang-analyzer.llvm.org/annotations.html 289 | #ifndef __has_feature // Optional. 290 | #define __has_feature(x) 0 // Compatibility with non-clang compilers. 291 | #endif 292 | 293 | #ifndef NS_RETURNS_RETAINED 294 | #if __has_feature(attribute_ns_returns_retained) 295 | #define NS_RETURNS_RETAINED __attribute__((ns_returns_retained)) 296 | #else 297 | #define NS_RETURNS_RETAINED 298 | #endif 299 | #endif 300 | 301 | #ifndef NS_RETURNS_NOT_RETAINED 302 | #if __has_feature(attribute_ns_returns_not_retained) 303 | #define NS_RETURNS_NOT_RETAINED __attribute__((ns_returns_not_retained)) 304 | #else 305 | #define NS_RETURNS_NOT_RETAINED 306 | #endif 307 | #endif 308 | 309 | #ifndef CF_RETURNS_RETAINED 310 | #if __has_feature(attribute_cf_returns_retained) 311 | #define CF_RETURNS_RETAINED __attribute__((cf_returns_retained)) 312 | #else 313 | #define CF_RETURNS_RETAINED 314 | #endif 315 | #endif 316 | 317 | #ifndef CF_RETURNS_NOT_RETAINED 318 | #if __has_feature(attribute_cf_returns_not_retained) 319 | #define CF_RETURNS_NOT_RETAINED __attribute__((cf_returns_not_retained)) 320 | #else 321 | #define CF_RETURNS_NOT_RETAINED 322 | #endif 323 | #endif 324 | 325 | // Defined on 10.6 and above. 326 | #ifndef NS_FORMAT_ARGUMENT 327 | #define NS_FORMAT_ARGUMENT(A) 328 | #endif 329 | 330 | // Defined on 10.6 and above. 331 | #ifndef NS_FORMAT_FUNCTION 332 | #define NS_FORMAT_FUNCTION(F,A) 333 | #endif 334 | 335 | #ifndef GTM_NONNULL 336 | #define GTM_NONNULL(x) __attribute__((nonnull(x))) 337 | #endif 338 | 339 | // To simplify support for both Leopard and Snow Leopard we declare 340 | // the Snow Leopard protocols that we need here. 341 | #if !defined(GTM_10_6_PROTOCOLS_DEFINED) && !(MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) 342 | #define GTM_10_6_PROTOCOLS_DEFINED 1 343 | @protocol NSConnectionDelegate 344 | @end 345 | @protocol NSAnimationDelegate 346 | @end 347 | @protocol NSImageDelegate 348 | @end 349 | @protocol NSTabViewDelegate 350 | @end 351 | #endif // !defined(GTM_10_6_PROTOCOLS_DEFINED) && !(MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) 352 | -------------------------------------------------------------------------------- /GTMNSString+HTML.h: -------------------------------------------------------------------------------- 1 | // 2 | // GTMNSString+HTML.h 3 | // Dealing with NSStrings that contain HTML 4 | // 5 | // Copyright 2006-2008 Google Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not 8 | // use this file except in compliance with the License. You may obtain a copy 9 | // of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 15 | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 16 | // License for the specific language governing permissions and limitations under 17 | // the License. 18 | // 19 | 20 | #import 21 | 22 | /// Utilities for NSStrings containing HTML 23 | @interface NSString (GTMNSStringHTMLAdditions) 24 | 25 | /// Get a string where internal characters that need escaping for HTML are escaped 26 | // 27 | /// For example, '&' become '&'. This will only cover characters from table 28 | /// A.2.2 of http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters 29 | /// which is what you want for a unicode encoded webpage. If you have a ascii 30 | /// or non-encoded webpage, please use stringByEscapingAsciiHTML which will 31 | /// encode all characters. 32 | /// 33 | /// For obvious reasons this call is only safe once. 34 | // 35 | // Returns: 36 | // Autoreleased NSString 37 | // 38 | - (NSString *)gtm_stringByEscapingForHTML; 39 | 40 | /// Get a string where internal characters that need escaping for HTML are escaped 41 | // 42 | /// For example, '&' become '&' 43 | /// All non-mapped characters (unicode that don't have a &keyword; mapping) 44 | /// will be converted to the appropriate &#xxx; value. If your webpage is 45 | /// unicode encoded (UTF16 or UTF8) use stringByEscapingHTML instead as it is 46 | /// faster, and produces less bloated and more readable HTML (as long as you 47 | /// are using a unicode compliant HTML reader). 48 | /// 49 | /// For obvious reasons this call is only safe once. 50 | // 51 | // Returns: 52 | // Autoreleased NSString 53 | // 54 | - (NSString *)gtm_stringByEscapingForAsciiHTML; 55 | 56 | /// Get a string where internal characters that are escaped for HTML are unescaped 57 | // 58 | /// For example, '&' becomes '&' 59 | /// Handles and 2 cases as well 60 | /// 61 | // Returns: 62 | // Autoreleased NSString 63 | // 64 | - (NSString *)gtm_stringByUnescapingFromHTML; 65 | 66 | @end -------------------------------------------------------------------------------- /GTMNSString+HTML.m: -------------------------------------------------------------------------------- 1 | // 2 | // GTMNSString+HTML.m 3 | // Dealing with NSStrings that contain HTML 4 | // 5 | // Copyright 2006-2008 Google Inc. 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not 8 | // use this file except in compliance with the License. You may obtain a copy 9 | // of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 15 | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 16 | // License for the specific language governing permissions and limitations under 17 | // the License. 18 | // 19 | 20 | //#import "GTMDefines.h" 21 | #import "GTMNSString+HTML.h" 22 | 23 | typedef struct { 24 | NSString *escapeSequence; 25 | unichar uchar; 26 | } HTMLEscapeMap; 27 | 28 | // Taken from http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters 29 | // Ordered by uchar lowest to highest for bsearching 30 | static HTMLEscapeMap gAsciiHTMLEscapeMap[] = { 31 | // A.2.2. Special characters 32 | { @""", 34 }, 33 | { @"&", 38 }, 34 | { @"'", 39 }, 35 | { @"<", 60 }, 36 | { @">", 62 }, 37 | 38 | // A.2.1. Latin-1 characters 39 | { @" ", 160 }, 40 | { @"¡", 161 }, 41 | { @"¢", 162 }, 42 | { @"£", 163 }, 43 | { @"¤", 164 }, 44 | { @"¥", 165 }, 45 | { @"¦", 166 }, 46 | { @"§", 167 }, 47 | { @"¨", 168 }, 48 | { @"©", 169 }, 49 | { @"ª", 170 }, 50 | { @"«", 171 }, 51 | { @"¬", 172 }, 52 | { @"­", 173 }, 53 | { @"®", 174 }, 54 | { @"¯", 175 }, 55 | { @"°", 176 }, 56 | { @"±", 177 }, 57 | { @"²", 178 }, 58 | { @"³", 179 }, 59 | { @"´", 180 }, 60 | { @"µ", 181 }, 61 | { @"¶", 182 }, 62 | { @"·", 183 }, 63 | { @"¸", 184 }, 64 | { @"¹", 185 }, 65 | { @"º", 186 }, 66 | { @"»", 187 }, 67 | { @"¼", 188 }, 68 | { @"½", 189 }, 69 | { @"¾", 190 }, 70 | { @"¿", 191 }, 71 | { @"À", 192 }, 72 | { @"Á", 193 }, 73 | { @"Â", 194 }, 74 | { @"Ã", 195 }, 75 | { @"Ä", 196 }, 76 | { @"Å", 197 }, 77 | { @"Æ", 198 }, 78 | { @"Ç", 199 }, 79 | { @"È", 200 }, 80 | { @"É", 201 }, 81 | { @"Ê", 202 }, 82 | { @"Ë", 203 }, 83 | { @"Ì", 204 }, 84 | { @"Í", 205 }, 85 | { @"Î", 206 }, 86 | { @"Ï", 207 }, 87 | { @"Ð", 208 }, 88 | { @"Ñ", 209 }, 89 | { @"Ò", 210 }, 90 | { @"Ó", 211 }, 91 | { @"Ô", 212 }, 92 | { @"Õ", 213 }, 93 | { @"Ö", 214 }, 94 | { @"×", 215 }, 95 | { @"Ø", 216 }, 96 | { @"Ù", 217 }, 97 | { @"Ú", 218 }, 98 | { @"Û", 219 }, 99 | { @"Ü", 220 }, 100 | { @"Ý", 221 }, 101 | { @"Þ", 222 }, 102 | { @"ß", 223 }, 103 | { @"à", 224 }, 104 | { @"á", 225 }, 105 | { @"â", 226 }, 106 | { @"ã", 227 }, 107 | { @"ä", 228 }, 108 | { @"å", 229 }, 109 | { @"æ", 230 }, 110 | { @"ç", 231 }, 111 | { @"è", 232 }, 112 | { @"é", 233 }, 113 | { @"ê", 234 }, 114 | { @"ë", 235 }, 115 | { @"ì", 236 }, 116 | { @"í", 237 }, 117 | { @"î", 238 }, 118 | { @"ï", 239 }, 119 | { @"ð", 240 }, 120 | { @"ñ", 241 }, 121 | { @"ò", 242 }, 122 | { @"ó", 243 }, 123 | { @"ô", 244 }, 124 | { @"õ", 245 }, 125 | { @"ö", 246 }, 126 | { @"÷", 247 }, 127 | { @"ø", 248 }, 128 | { @"ù", 249 }, 129 | { @"ú", 250 }, 130 | { @"û", 251 }, 131 | { @"ü", 252 }, 132 | { @"ý", 253 }, 133 | { @"þ", 254 }, 134 | { @"ÿ", 255 }, 135 | 136 | // A.2.2. Special characters cont'd 137 | { @"Œ", 338 }, 138 | { @"œ", 339 }, 139 | { @"Š", 352 }, 140 | { @"š", 353 }, 141 | { @"Ÿ", 376 }, 142 | 143 | // A.2.3. Symbols 144 | { @"ƒ", 402 }, 145 | 146 | // A.2.2. Special characters cont'd 147 | { @"ˆ", 710 }, 148 | { @"˜", 732 }, 149 | 150 | // A.2.3. Symbols cont'd 151 | { @"Α", 913 }, 152 | { @"Β", 914 }, 153 | { @"Γ", 915 }, 154 | { @"Δ", 916 }, 155 | { @"Ε", 917 }, 156 | { @"Ζ", 918 }, 157 | { @"Η", 919 }, 158 | { @"Θ", 920 }, 159 | { @"Ι", 921 }, 160 | { @"Κ", 922 }, 161 | { @"Λ", 923 }, 162 | { @"Μ", 924 }, 163 | { @"Ν", 925 }, 164 | { @"Ξ", 926 }, 165 | { @"Ο", 927 }, 166 | { @"Π", 928 }, 167 | { @"Ρ", 929 }, 168 | { @"Σ", 931 }, 169 | { @"Τ", 932 }, 170 | { @"Υ", 933 }, 171 | { @"Φ", 934 }, 172 | { @"Χ", 935 }, 173 | { @"Ψ", 936 }, 174 | { @"Ω", 937 }, 175 | { @"α", 945 }, 176 | { @"β", 946 }, 177 | { @"γ", 947 }, 178 | { @"δ", 948 }, 179 | { @"ε", 949 }, 180 | { @"ζ", 950 }, 181 | { @"η", 951 }, 182 | { @"θ", 952 }, 183 | { @"ι", 953 }, 184 | { @"κ", 954 }, 185 | { @"λ", 955 }, 186 | { @"μ", 956 }, 187 | { @"ν", 957 }, 188 | { @"ξ", 958 }, 189 | { @"ο", 959 }, 190 | { @"π", 960 }, 191 | { @"ρ", 961 }, 192 | { @"ς", 962 }, 193 | { @"σ", 963 }, 194 | { @"τ", 964 }, 195 | { @"υ", 965 }, 196 | { @"φ", 966 }, 197 | { @"χ", 967 }, 198 | { @"ψ", 968 }, 199 | { @"ω", 969 }, 200 | { @"ϑ", 977 }, 201 | { @"ϒ", 978 }, 202 | { @"ϖ", 982 }, 203 | 204 | // A.2.2. Special characters cont'd 205 | { @" ", 8194 }, 206 | { @" ", 8195 }, 207 | { @" ", 8201 }, 208 | { @"‌", 8204 }, 209 | { @"‍", 8205 }, 210 | { @"‎", 8206 }, 211 | { @"‏", 8207 }, 212 | { @"–", 8211 }, 213 | { @"—", 8212 }, 214 | { @"‘", 8216 }, 215 | { @"’", 8217 }, 216 | { @"‚", 8218 }, 217 | { @"“", 8220 }, 218 | { @"”", 8221 }, 219 | { @"„", 8222 }, 220 | { @"†", 8224 }, 221 | { @"‡", 8225 }, 222 | // A.2.3. Symbols cont'd 223 | { @"•", 8226 }, 224 | { @"…", 8230 }, 225 | 226 | // A.2.2. Special characters cont'd 227 | { @"‰", 8240 }, 228 | 229 | // A.2.3. Symbols cont'd 230 | { @"′", 8242 }, 231 | { @"″", 8243 }, 232 | 233 | // A.2.2. Special characters cont'd 234 | { @"‹", 8249 }, 235 | { @"›", 8250 }, 236 | 237 | // A.2.3. Symbols cont'd 238 | { @"‾", 8254 }, 239 | { @"⁄", 8260 }, 240 | 241 | // A.2.2. Special characters cont'd 242 | { @"€", 8364 }, 243 | 244 | // A.2.3. Symbols cont'd 245 | { @"ℑ", 8465 }, 246 | { @"℘", 8472 }, 247 | { @"ℜ", 8476 }, 248 | { @"™", 8482 }, 249 | { @"ℵ", 8501 }, 250 | { @"←", 8592 }, 251 | { @"↑", 8593 }, 252 | { @"→", 8594 }, 253 | { @"↓", 8595 }, 254 | { @"↔", 8596 }, 255 | { @"↵", 8629 }, 256 | { @"⇐", 8656 }, 257 | { @"⇑", 8657 }, 258 | { @"⇒", 8658 }, 259 | { @"⇓", 8659 }, 260 | { @"⇔", 8660 }, 261 | { @"∀", 8704 }, 262 | { @"∂", 8706 }, 263 | { @"∃", 8707 }, 264 | { @"∅", 8709 }, 265 | { @"∇", 8711 }, 266 | { @"∈", 8712 }, 267 | { @"∉", 8713 }, 268 | { @"∋", 8715 }, 269 | { @"∏", 8719 }, 270 | { @"∑", 8721 }, 271 | { @"−", 8722 }, 272 | { @"∗", 8727 }, 273 | { @"√", 8730 }, 274 | { @"∝", 8733 }, 275 | { @"∞", 8734 }, 276 | { @"∠", 8736 }, 277 | { @"∧", 8743 }, 278 | { @"∨", 8744 }, 279 | { @"∩", 8745 }, 280 | { @"∪", 8746 }, 281 | { @"∫", 8747 }, 282 | { @"∴", 8756 }, 283 | { @"∼", 8764 }, 284 | { @"≅", 8773 }, 285 | { @"≈", 8776 }, 286 | { @"≠", 8800 }, 287 | { @"≡", 8801 }, 288 | { @"≤", 8804 }, 289 | { @"≥", 8805 }, 290 | { @"⊂", 8834 }, 291 | { @"⊃", 8835 }, 292 | { @"⊄", 8836 }, 293 | { @"⊆", 8838 }, 294 | { @"⊇", 8839 }, 295 | { @"⊕", 8853 }, 296 | { @"⊗", 8855 }, 297 | { @"⊥", 8869 }, 298 | { @"⋅", 8901 }, 299 | { @"⌈", 8968 }, 300 | { @"⌉", 8969 }, 301 | { @"⌊", 8970 }, 302 | { @"⌋", 8971 }, 303 | { @"⟨", 9001 }, 304 | { @"⟩", 9002 }, 305 | { @"◊", 9674 }, 306 | { @"♠", 9824 }, 307 | { @"♣", 9827 }, 308 | { @"♥", 9829 }, 309 | { @"♦", 9830 } 310 | }; 311 | 312 | // Taken from http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters 313 | // This is table A.2.2 Special Characters 314 | static HTMLEscapeMap gUnicodeHTMLEscapeMap[] = { 315 | // C0 Controls and Basic Latin 316 | { @""", 34 }, 317 | { @"&", 38 }, 318 | { @"'", 39 }, 319 | { @"<", 60 }, 320 | { @">", 62 }, 321 | 322 | // Latin Extended-A 323 | { @"Œ", 338 }, 324 | { @"œ", 339 }, 325 | { @"Š", 352 }, 326 | { @"š", 353 }, 327 | { @"Ÿ", 376 }, 328 | 329 | // Spacing Modifier Letters 330 | { @"ˆ", 710 }, 331 | { @"˜", 732 }, 332 | 333 | // General Punctuation 334 | { @" ", 8194 }, 335 | { @" ", 8195 }, 336 | { @" ", 8201 }, 337 | { @"‌", 8204 }, 338 | { @"‍", 8205 }, 339 | { @"‎", 8206 }, 340 | { @"‏", 8207 }, 341 | { @"–", 8211 }, 342 | { @"—", 8212 }, 343 | { @"‘", 8216 }, 344 | { @"’", 8217 }, 345 | { @"‚", 8218 }, 346 | { @"“", 8220 }, 347 | { @"”", 8221 }, 348 | { @"„", 8222 }, 349 | { @"†", 8224 }, 350 | { @"‡", 8225 }, 351 | { @"‰", 8240 }, 352 | { @"‹", 8249 }, 353 | { @"›", 8250 }, 354 | { @"€", 8364 }, 355 | }; 356 | 357 | 358 | // Utility function for Bsearching table above 359 | static int EscapeMapCompare(const void *ucharVoid, const void *mapVoid) { 360 | const unichar *uchar = (const unichar*)ucharVoid; 361 | const HTMLEscapeMap *map = (const HTMLEscapeMap*)mapVoid; 362 | int val; 363 | if (*uchar > map->uchar) { 364 | val = 1; 365 | } else if (*uchar < map->uchar) { 366 | val = -1; 367 | } else { 368 | val = 0; 369 | } 370 | return val; 371 | } 372 | 373 | @implementation NSString (GTMNSStringHTMLAdditions) 374 | 375 | - (NSString *)gtm_stringByEscapingHTMLUsingTable:(HTMLEscapeMap*)table 376 | ofSize:(NSUInteger)size 377 | escapingUnicode:(BOOL)escapeUnicode { 378 | NSUInteger length = [self length]; 379 | if (!length) { 380 | return self; 381 | } 382 | 383 | NSMutableString *finalString = [NSMutableString string]; 384 | NSMutableData *data2 = [NSMutableData dataWithCapacity:sizeof(unichar) * length]; 385 | 386 | // this block is common between GTMNSString+HTML and GTMNSString+XML but 387 | // it's so short that it isn't really worth trying to share. 388 | const unichar *buffer = CFStringGetCharactersPtr((CFStringRef)self); 389 | if (!buffer) { 390 | // We want this buffer to be autoreleased. 391 | NSMutableData *data = [NSMutableData dataWithLength:length * sizeof(UniChar)]; 392 | if (!data) { 393 | // COV_NF_START - Memory fail case 394 | // _GTMDevLog(@"couldn't alloc buffer"); 395 | return nil; 396 | // COV_NF_END 397 | } 398 | [self getCharacters:[data mutableBytes]]; 399 | buffer = [data bytes]; 400 | } 401 | 402 | if (!buffer || !data2) { 403 | // COV_NF_START 404 | // _GTMDevLog(@"Unable to allocate buffer or data2"); 405 | return nil; 406 | // COV_NF_END 407 | } 408 | 409 | unichar *buffer2 = (unichar *)[data2 mutableBytes]; 410 | 411 | NSUInteger buffer2Length = 0; 412 | 413 | for (NSUInteger i = 0; i < length; ++i) { 414 | HTMLEscapeMap *val = bsearch(&buffer[i], table, 415 | size / sizeof(HTMLEscapeMap), 416 | sizeof(HTMLEscapeMap), EscapeMapCompare); 417 | if (val || (escapeUnicode && buffer[i] > 127)) { 418 | if (buffer2Length) { 419 | CFStringAppendCharacters((CFMutableStringRef)finalString, 420 | buffer2, 421 | buffer2Length); 422 | buffer2Length = 0; 423 | } 424 | if (val) { 425 | [finalString appendString:val->escapeSequence]; 426 | } 427 | else { 428 | // _GTMDevAssert(escapeUnicode && buffer[i] > 127, @"Illegal Character"); 429 | [finalString appendFormat:@"&#%d;", buffer[i]]; 430 | } 431 | } else { 432 | buffer2[buffer2Length] = buffer[i]; 433 | buffer2Length += 1; 434 | } 435 | } 436 | if (buffer2Length) { 437 | CFStringAppendCharacters((CFMutableStringRef)finalString, 438 | buffer2, 439 | buffer2Length); 440 | } 441 | return finalString; 442 | } 443 | 444 | - (NSString *)gtm_stringByEscapingForHTML { 445 | return [self gtm_stringByEscapingHTMLUsingTable:gUnicodeHTMLEscapeMap 446 | ofSize:sizeof(gUnicodeHTMLEscapeMap) 447 | escapingUnicode:NO]; 448 | } // gtm_stringByEscapingHTML 449 | 450 | - (NSString *)gtm_stringByEscapingForAsciiHTML { 451 | return [self gtm_stringByEscapingHTMLUsingTable:gAsciiHTMLEscapeMap 452 | ofSize:sizeof(gAsciiHTMLEscapeMap) 453 | escapingUnicode:YES]; 454 | } // gtm_stringByEscapingAsciiHTML 455 | 456 | - (NSString *)gtm_stringByUnescapingFromHTML { 457 | NSRange range = NSMakeRange(0, [self length]); 458 | NSRange subrange = [self rangeOfString:@"&" options:NSBackwardsSearch range:range]; 459 | 460 | // if no ampersands, we've got a quick way out 461 | if (subrange.length == 0) return self; 462 | NSMutableString *finalString = [NSMutableString stringWithString:self]; 463 | do { 464 | NSRange semiColonRange = NSMakeRange(subrange.location, NSMaxRange(range) - subrange.location); 465 | semiColonRange = [self rangeOfString:@";" options:0 range:semiColonRange]; 466 | range = NSMakeRange(0, subrange.location); 467 | // if we don't find a semicolon in the range, we don't have a sequence 468 | if (semiColonRange.location == NSNotFound) { 469 | continue; 470 | } 471 | NSRange escapeRange = NSMakeRange(subrange.location, semiColonRange.location - subrange.location + 1); 472 | NSString *escapeString = [self substringWithRange:escapeRange]; 473 | NSUInteger length = [escapeString length]; 474 | // a squence must be longer than 3 (<) and less than 11 (ϑ) 475 | if (length > 3 && length < 11) { 476 | if ([escapeString characterAtIndex:1] == '#') { 477 | unichar char2 = [escapeString characterAtIndex:2]; 478 | if (char2 == 'x' || char2 == 'X') { 479 | // Hex escape squences £ 480 | NSString *hexSequence = [escapeString substringWithRange:NSMakeRange(3, length - 4)]; 481 | NSScanner *scanner = [NSScanner scannerWithString:hexSequence]; 482 | unsigned value; 483 | if ([scanner scanHexInt:&value] && 484 | value < USHRT_MAX && 485 | value > 0 486 | && [scanner scanLocation] == length - 4) { 487 | unichar uchar = value; 488 | NSString *charString = [NSString stringWithCharacters:&uchar length:1]; 489 | [finalString replaceCharactersInRange:escapeRange withString:charString]; 490 | } 491 | 492 | } else { 493 | // Decimal Sequences { 494 | NSString *numberSequence = [escapeString substringWithRange:NSMakeRange(2, length - 3)]; 495 | NSScanner *scanner = [NSScanner scannerWithString:numberSequence]; 496 | int value; 497 | if ([scanner scanInt:&value] && 498 | value < USHRT_MAX && 499 | value > 0 500 | && [scanner scanLocation] == length - 3) { 501 | unichar uchar = value; 502 | NSString *charString = [NSString stringWithCharacters:&uchar length:1]; 503 | [finalString replaceCharactersInRange:escapeRange withString:charString]; 504 | } 505 | } 506 | } else { 507 | // "standard" sequences 508 | for (unsigned i = 0; i < sizeof(gAsciiHTMLEscapeMap) / sizeof(HTMLEscapeMap); ++i) { 509 | if ([escapeString isEqualToString:gAsciiHTMLEscapeMap[i].escapeSequence]) { 510 | [finalString replaceCharactersInRange:escapeRange withString:[NSString stringWithCharacters:&gAsciiHTMLEscapeMap[i].uchar length:1]]; 511 | break; 512 | } 513 | } 514 | } 515 | } 516 | } while ((subrange = [self rangeOfString:@"&" options:NSBackwardsSearch range:range]).length != 0); 517 | return finalString; 518 | } // gtm_stringByUnescapingHTML 519 | 520 | 521 | 522 | @end -------------------------------------------------------------------------------- /GTMNSString-HTML.podspec: -------------------------------------------------------------------------------- 1 | Pod::Spec.new do |s| 2 | s.name = 'GTMNSString-HTML' 3 | s.version = '0.0.1' 4 | s.summary = 'Dealing with NSStrings that contain HTML.' 5 | s.homepage = 'https://github.com/siriusdely/GTMNSString-HTML' 6 | s.author = { 7 | 'Sirius Dely' => 'mail@siriusdely.com' 8 | } 9 | s.license = { 10 | :type => 'Apache License 2.0', 11 | :file => 'LICENSE' 12 | } 13 | s.source = { 14 | :git => 'https://github.com/siriusdely/GTMNSString-HTML.git', 15 | :commit => '57cc38f106911d9d4ee73db67fcaa6df681b51bc' 16 | } 17 | s.platform = :ios 18 | s.source_files = '*.{h,m}' 19 | s.compiler_flags = '-w' # Disable all warnings 20 | end 21 | 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2006-2008 Google Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not 4 | use this file except in compliance with the License. You may obtain a copy 5 | of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11 | WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12 | License for the specific language governing permissions and limitations under 13 | the License. 14 | 15 | -------------------------------------------------------------------------------- /NSString+HTML.h: -------------------------------------------------------------------------------- 1 | // 2 | // NSString+HTML.h 3 | // MWFeedParser 4 | // 5 | // Copyright (c) 2010 Michael Waterfall 6 | // 7 | // Permission is hereby granted, free of charge, to any person obtaining a copy 8 | // of this software and associated documentation files (the "Software"), to deal 9 | // in the Software without restriction, including without limitation the rights 10 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | // copies of the Software, and to permit persons to whom the Software is 12 | // furnished to do so, subject to the following conditions: 13 | // 14 | // 1. The above copyright notice and this permission notice shall be included 15 | // in all copies or substantial portions of the Software. 16 | // 17 | // 2. This Software cannot be used to archive or collect data such as (but not 18 | // limited to) that of events, news, experiences and activities, for the 19 | // purpose of any concept relating to diary/journal keeping. 20 | // 21 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 24 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 26 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 27 | // THE SOFTWARE. 28 | // 29 | 30 | #import 31 | 32 | // Dependant upon GTMNSString+HTML 33 | 34 | @interface NSString (HTML) 35 | 36 | // Strips HTML tags & comments, removes extra whitespace and decodes HTML character entities. 37 | - (NSString *)stringByConvertingHTMLToPlainText; 38 | 39 | // Decode all HTML entities using GTM. 40 | - (NSString *)stringByDecodingHTMLEntities; 41 | 42 | // Encode all HTML entities using GTM. 43 | - (NSString *)stringByEncodingHTMLEntities; 44 | 45 | // Minimal unicode encoding will only cover characters from table 46 | // A.2.2 of http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters 47 | // which is what you want for a unicode encoded webpage. 48 | - (NSString *)stringByEncodingHTMLEntities:(BOOL)isUnicode; 49 | 50 | // Replace newlines with
tags. 51 | - (NSString *)stringWithNewLinesAsBRs; 52 | 53 | // Remove newlines and white space from string. 54 | - (NSString *)stringByRemovingNewLinesAndWhitespace; 55 | 56 | // Wrap plain URLs in ... 57 | // - Ignores URLs inside tags (any URL beginning with =") 58 | // - HTTP & HTTPS schemes only 59 | // - Only works in iOS 4+ as we use NSRegularExpression (returns self if not supported so be careful with NSMutableStrings) 60 | // - Expression: (?" intoString:NULL]; 70 | [scanner scanString:@"-->" intoString:NULL]; 71 | 72 | } else { 73 | 74 | // Tag - remove and replace with space unless it's 75 | // a closing inline tag then dont replace with a space 76 | if ([scanner scanString:@"/" intoString:NULL]) { 77 | 78 | // Closing tag - replace with space unless it's inline 79 | tagName = nil; dontReplaceTagWithSpace = NO; 80 | if ([scanner scanCharactersFromSet:tagNameCharacters intoString:&tagName]) { 81 | tagName = [tagName lowercaseString]; 82 | dontReplaceTagWithSpace = ([tagName isEqualToString:@"a"] || 83 | [tagName isEqualToString:@"b"] || 84 | [tagName isEqualToString:@"i"] || 85 | [tagName isEqualToString:@"q"] || 86 | [tagName isEqualToString:@"span"] || 87 | [tagName isEqualToString:@"em"] || 88 | [tagName isEqualToString:@"strong"] || 89 | [tagName isEqualToString:@"cite"] || 90 | [tagName isEqualToString:@"abbr"] || 91 | [tagName isEqualToString:@"acronym"] || 92 | [tagName isEqualToString:@"label"]); 93 | } 94 | 95 | // Replace tag with string unless it was an inline 96 | if (!dontReplaceTagWithSpace && result.length > 0 && ![scanner isAtEnd]) [result appendString:@" "]; 97 | 98 | } 99 | 100 | // Scan past tag 101 | [scanner scanUpToString:@">" intoString:NULL]; 102 | [scanner scanString:@">" intoString:NULL]; 103 | 104 | } 105 | 106 | } else { 107 | 108 | // Stopped at whitespace - replace all whitespace and newlines with a space 109 | if ([scanner scanCharactersFromSet:newLineAndWhitespaceCharacters intoString:NULL]) { 110 | if (result.length > 0 && ![scanner isAtEnd]) [result appendString:@" "]; // Dont append space to beginning or end of result 111 | } 112 | 113 | } 114 | 115 | } while (![scanner isAtEnd]); 116 | 117 | // Cleanup 118 | [scanner release]; 119 | 120 | // Decode HTML entities and return 121 | NSString *retString = [[result stringByDecodingHTMLEntities] retain]; 122 | [result release]; 123 | 124 | // Drain 125 | [pool drain]; 126 | 127 | // Return 128 | return [retString autorelease]; 129 | 130 | } 131 | 132 | - (NSString *)stringByDecodingHTMLEntities { 133 | // Can return self so create new string if we're a mutable string 134 | return [NSString stringWithString:[self gtm_stringByUnescapingFromHTML]]; 135 | } 136 | 137 | 138 | - (NSString *)stringByEncodingHTMLEntities { 139 | // Can return self so create new string if we're a mutable string 140 | return [NSString stringWithString:[self gtm_stringByEscapingForAsciiHTML]]; 141 | } 142 | 143 | - (NSString *)stringByEncodingHTMLEntities:(BOOL)isUnicode { 144 | // Can return self so create new string if we're a mutable string 145 | return [NSString stringWithString:(isUnicode ? [self gtm_stringByEscapingForHTML] : [self gtm_stringByEscapingForAsciiHTML])]; 146 | } 147 | 148 | - (NSString *)stringWithNewLinesAsBRs { 149 | 150 | // Pool 151 | NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; 152 | 153 | // Strange New lines: 154 | // Next Line, U+0085 155 | // Form Feed, U+000C 156 | // Line Separator, U+2028 157 | // Paragraph Separator, U+2029 158 | 159 | // Scanner 160 | NSScanner *scanner = [[NSScanner alloc] initWithString:self]; 161 | [scanner setCharactersToBeSkipped:nil]; 162 | NSMutableString *result = [[NSMutableString alloc] init]; 163 | NSString *temp; 164 | NSCharacterSet *newLineCharacters = [NSCharacterSet characterSetWithCharactersInString: 165 | [NSString stringWithFormat:@"\n\r%d%d%d%d", 0x0085, 0x000C, 0x2028, 0x2029]]; 166 | // Scan 167 | do { 168 | 169 | // Get non new line characters 170 | temp = nil; 171 | [scanner scanUpToCharactersFromSet:newLineCharacters intoString:&temp]; 172 | if (temp) [result appendString:temp]; 173 | temp = nil; 174 | 175 | // Add
s 176 | if ([scanner scanString:@"\r\n" intoString:nil]) { 177 | 178 | // Combine \r\n into just 1
179 | [result appendString:@"
"]; 180 | 181 | } else if ([scanner scanCharactersFromSet:newLineCharacters intoString:&temp]) { 182 | 183 | // Scan other new line characters and add
s 184 | if (temp) { 185 | for (NSUInteger i = 0; i < temp.length; i++) { 186 | [result appendString:@"
"]; 187 | } 188 | } 189 | 190 | } 191 | 192 | } while (![scanner isAtEnd]); 193 | 194 | // Cleanup & return 195 | [scanner release]; 196 | NSString *retString = [[NSString stringWithString:result] retain]; 197 | [result release]; 198 | 199 | // Drain 200 | [pool drain]; 201 | 202 | // Return 203 | return [retString autorelease]; 204 | 205 | } 206 | 207 | - (NSString *)stringByRemovingNewLinesAndWhitespace { 208 | 209 | // Pool 210 | NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; 211 | 212 | // Strange New lines: 213 | // Next Line, U+0085 214 | // Form Feed, U+000C 215 | // Line Separator, U+2028 216 | // Paragraph Separator, U+2029 217 | 218 | // Scanner 219 | NSScanner *scanner = [[NSScanner alloc] initWithString:self]; 220 | [scanner setCharactersToBeSkipped:nil]; 221 | NSMutableString *result = [[NSMutableString alloc] init]; 222 | NSString *temp; 223 | NSCharacterSet *newLineAndWhitespaceCharacters = [NSCharacterSet characterSetWithCharactersInString: 224 | [NSString stringWithFormat:@" \t\n\r%d%d%d%d", 0x0085, 0x000C, 0x2028, 0x2029]]; 225 | // Scan 226 | while (![scanner isAtEnd]) { 227 | 228 | // Get non new line or whitespace characters 229 | temp = nil; 230 | [scanner scanUpToCharactersFromSet:newLineAndWhitespaceCharacters intoString:&temp]; 231 | if (temp) [result appendString:temp]; 232 | 233 | // Replace with a space 234 | if ([scanner scanCharactersFromSet:newLineAndWhitespaceCharacters intoString:NULL]) { 235 | if (result.length > 0 && ![scanner isAtEnd]) // Dont append space to beginning or end of result 236 | [result appendString:@" "]; 237 | } 238 | 239 | } 240 | 241 | // Cleanup 242 | [scanner release]; 243 | 244 | // Return 245 | NSString *retString = [[NSString stringWithString:result] retain]; 246 | [result release]; 247 | 248 | // Drain 249 | [pool drain]; 250 | 251 | // Return 252 | return [retString autorelease]; 253 | 254 | } 255 | 256 | - (NSString *)stringByLinkifyingURLs { 257 | if (!NSClassFromString(@"NSRegularExpression")) return self; 258 | NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; 259 | NSString *pattern = @"(?$1"] retain]; 263 | [pool drain]; 264 | return [modifiedString autorelease]; 265 | } 266 | 267 | - (NSString *)stringByStrippingTags { 268 | 269 | // Pool 270 | NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; 271 | 272 | // Find first & and short-cut if we can 273 | NSUInteger ampIndex = [self rangeOfString:@"<" options:NSLiteralSearch].location; 274 | if (ampIndex == NSNotFound) { 275 | return [NSString stringWithString:self]; // return copy of string as no tags found 276 | } 277 | 278 | // Scan and find all tags 279 | NSScanner *scanner = [NSScanner scannerWithString:self]; 280 | [scanner setCharactersToBeSkipped:nil]; 281 | NSMutableSet *tags = [[NSMutableSet alloc] init]; 282 | NSString *tag; 283 | do { 284 | 285 | // Scan up to < 286 | tag = nil; 287 | [scanner scanUpToString:@"<" intoString:NULL]; 288 | [scanner scanUpToString:@">" intoString:&tag]; 289 | 290 | // Add to set 291 | if (tag) { 292 | NSString *t = [[NSString alloc] initWithFormat:@"%@>", tag]; 293 | [tags addObject:t]; 294 | [t release]; 295 | } 296 | 297 | } while (![scanner isAtEnd]); 298 | 299 | // Strings 300 | NSMutableString *result = [[NSMutableString alloc] initWithString:self]; 301 | NSString *finalString; 302 | 303 | // Replace tags 304 | NSString *replacement; 305 | for (NSString *t in tags) { 306 | 307 | // Replace tag with space unless it's an inline element 308 | replacement = @" "; 309 | if ([t isEqualToString:@""] || 310 | [t isEqualToString:@""] || 311 | [t isEqualToString:@""] || 312 | [t isEqualToString:@""] || 313 | [t isEqualToString:@""] || 314 | [t isEqualToString:@""] || 315 | [t isEqualToString:@""] || 316 | [t isEqualToString:@""]) { 317 | replacement = @""; 318 | } 319 | 320 | // Replace 321 | [result replaceOccurrencesOfString:t 322 | withString:replacement 323 | options:NSLiteralSearch 324 | range:NSMakeRange(0, result.length)]; 325 | } 326 | 327 | // Remove multi-spaces and line breaks 328 | finalString = [[result stringByRemovingNewLinesAndWhitespace] retain]; 329 | 330 | // Cleanup 331 | [result release]; 332 | [tags release]; 333 | 334 | // Drain 335 | [pool drain]; 336 | 337 | // Return 338 | return [finalString autorelease]; 339 | 340 | } 341 | 342 | static NSDictionary *htmlEscapes = nil; 343 | static NSDictionary *htmlUnescapes = nil; 344 | 345 | + (NSDictionary *)htmlEscapes { 346 | if (!htmlEscapes) { 347 | htmlEscapes = [[NSDictionary alloc] initWithObjectsAndKeys: 348 | @"&", @"&", 349 | @"<", @"<", 350 | @">", @">", 351 | nil 352 | ]; 353 | } 354 | return htmlEscapes; 355 | } 356 | 357 | + (NSDictionary *)htmlUnescapes { 358 | if (!htmlUnescapes) { 359 | htmlUnescapes = [[NSDictionary alloc] initWithObjectsAndKeys: 360 | @"\n", @"
", 361 | @"\n", @"

", 362 | @"", @"

", 363 | @"", @"
    ", 364 | @"", @"
", 365 | @"", @"
  • ", 366 | @"\n", @"
  • ", 367 | @"", @"", 368 | @"", @"", 369 | @"", @"
    ", 370 | @"\n", @"
    ", 371 | @"", @"", 372 | @"", @"", 373 | @"", @"", 374 | @"", @"", 375 | @"", @"", 376 | @"", @"", 377 | @"", @"", 378 | @"", @"", 379 | @"&", @"&", 380 | @"<", @"<", 381 | @">", @">", 382 | //@"\n", @" ", 383 | nil 384 | ]; 385 | } 386 | return htmlUnescapes; 387 | } 388 | 389 | static NSString *replaceAll(NSString *s, NSDictionary *replacements) { 390 | for (NSString *key in replacements) { 391 | NSString *replacement = [replacements objectForKey:key]; 392 | s = [s stringByReplacingOccurrencesOfString:key withString:replacement]; 393 | } 394 | return s; 395 | } 396 | 397 | - (NSString *)htmlEscapedString { 398 | return replaceAll(self, [[self class] htmlEscapes]); 399 | } 400 | 401 | - (NSString *)htmlUnescapedString { 402 | return replaceAll(self, [[self class] htmlUnescapes]); 403 | } 404 | 405 | @end -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | GTMNSString-HTML 2 | ================ 3 | Dealing with NSStrings that contain HTML 4 | 5 | Copyright 2006-2008 Google Inc. 6 | 7 | Licensed under the Apache License, Version 2.0 (the "License"); you may not 8 | use this file except in compliance with the License. You may obtain a copy 9 | of the License at 10 | 11 | http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | Unless required by applicable law or agreed to in writing, software 14 | distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 15 | WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 16 | License for the specific language governing permissions and limitations under 17 | the License. 18 | 19 | 20 | NSString+HTML.h 21 | =============== 22 | MWFeedParser 23 | 24 | Copyright (c) 2010 Michael Waterfall 25 | 26 | Permission is hereby granted, free of charge, to any person obtaining a copy 27 | of this software and associated documentation files (the "Software"), to deal 28 | in the Software without restriction, including without limitation the rights 29 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 30 | copies of the Software, and to permit persons to whom the Software is 31 | furnished to do so, subject to the following conditions: 32 | 33 | 1. The above copyright notice and this permission notice shall be included 34 | in all copies or substantial portions of the Software. 35 | 36 | 2. This Software cannot be used to archive or collect data such as (but not 37 | limited to) that of events, news, experiences and activities, for the 38 | purpose of any concept relating to diary/journal keeping. 39 | 40 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 41 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 42 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 43 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 44 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 45 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 46 | THE SOFTWARE. 47 | 48 | --------------------------------------------------------------------------------