├── .gitignore ├── .swift-version ├── .swiftlint.yml ├── LICENSE ├── Package.swift ├── README.md ├── Sources └── HTMLSpecialCharacters │ └── HTMLSpecialCharacters.swift └── Tests ├── HTMLSpecialCharactersTests ├── HTMLSpecialCharactersTests.swift └── XCTestManifests.swift └── LinuxMain.swift /.gitignore: -------------------------------------------------------------------------------- 1 | # Xcode 2 | # 3 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore 4 | 5 | ## Build generated 6 | build/ 7 | DerivedData/ 8 | 9 | ## Various settings 10 | *.pbxuser 11 | !default.pbxuser 12 | *.mode1v3 13 | !default.mode1v3 14 | *.mode2v3 15 | !default.mode2v3 16 | *.perspectivev3 17 | !default.perspectivev3 18 | xcuserdata/ 19 | 20 | ## Other 21 | *.moved-aside 22 | *.xcuserstate 23 | 24 | ## Obj-C/Swift specific 25 | *.hmap 26 | *.ipa 27 | *.dSYM.zip 28 | *.dSYM 29 | 30 | ## Playgrounds 31 | timeline.xctimeline 32 | playground.xcworkspace 33 | 34 | # Swift Package Manager 35 | # 36 | # Add this line if you want to avoid checking in source code from Swift Package Manager dependencies. 37 | # Packages/ 38 | .build/ 39 | .swiftpm 40 | 41 | # CocoaPods 42 | # 43 | # We recommend against adding the Pods directory to your .gitignore. However 44 | # you should judge for yourself, the pros and cons are mentioned at: 45 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control 46 | # 47 | # Pods/ 48 | 49 | # Carthage 50 | # 51 | # Add this line if you want to avoid checking in source code from Carthage dependencies. 52 | # Carthage/Checkouts 53 | 54 | Carthage/Build 55 | 56 | # fastlane 57 | # 58 | # It is recommended to not store the screenshots in the git repo. Instead, use fastlane to re-generate the 59 | # screenshots whenever they are needed. 60 | # For more information about the recommended setup visit: 61 | # https://github.com/fastlane/fastlane/blob/master/fastlane/docs/Gitignore.md 62 | 63 | fastlane/report.xml 64 | fastlane/Preview.html 65 | fastlane/screenshots 66 | fastlane/test_output 67 | 68 | .DS_Store 69 | *~ 70 | \#* 71 | -------------------------------------------------------------------------------- /.swift-version: -------------------------------------------------------------------------------- 1 | 4.0 2 | -------------------------------------------------------------------------------- /.swiftlint.yml: -------------------------------------------------------------------------------- 1 | disabled_rules: 2 | - trailing_whitespace 3 | - valid_docs 4 | - type_name 5 | - variable_name 6 | 7 | excluded: 8 | - reddift/vendor 9 | - playground 10 | - reddiftTests 11 | - reddiftSample 12 | - reddiftSampleTV 13 | 14 | file_length: 15 | warning: 2000 16 | error: 4000 17 | 18 | line_length: 19 | warning: 12000 20 | error: 20000 21 | 22 | type_body_length: 23 | warning: 1000 24 | error: 2000 25 | 26 | variable_name: 27 | max_length: 28 | warning: 50 29 | error: 40 30 | min_length: 31 | warning: 1 32 | error: 0 33 | 34 | function_parameter_count: 35 | warning: 10 36 | error: 20 37 | 38 | function_body_length: 39 | warning: 200 40 | error: 400 41 | 42 | cyclomatic_complexity: 43 | warning: 100 44 | error: 150 45 | 46 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 sonson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Package.swift: -------------------------------------------------------------------------------- 1 | // swift-tools-version:5.1 2 | // The swift-tools-version declares the minimum version of Swift required to build this package. 3 | 4 | import PackageDescription 5 | 6 | let package = Package( 7 | name: "HTMLSpecialCharacters", 8 | products: [ 9 | // Products define the executables and libraries produced by a package, and make them visible to other packages. 10 | .library( 11 | name: "HTMLSpecialCharacters", 12 | targets: ["HTMLSpecialCharacters"]), 13 | ], 14 | dependencies: [ 15 | // Dependencies declare other packages that this package depends on. 16 | // .package(url: /* package url */, from: "1.0.0"), 17 | ], 18 | targets: [ 19 | // Targets are the basic building blocks of a package. A target can define a module or a test suite. 20 | // Targets can depend on other targets in this package, and on products in packages which this package depends on. 21 | .target( 22 | name: "HTMLSpecialCharacters", 23 | dependencies: []), 24 | .testTarget( 25 | name: "HTMLSpecialCharactersTests", 26 | dependencies: ["HTMLSpecialCharacters"]), 27 | ] 28 | ) 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HTMLSpecialCharacters 2 | 3 | Library to escape/unescape HTML special characters in Swift. 4 | [Google Toolbox for Mac](https://github.com/google/google-toolbox-for-mac) is known as a great library which supports escaping/unescaping HTML special characters. 5 | But it's written in Objective-C. 6 | 7 | # Test code 8 | 9 | HTMLSpecialCharacters passed the alomost same test code of [Google Toolbox for Mac](https://github.com/google/google-toolbox-for-mac/blob/master/Foundation/GTMNSString%2BHTMLTest.m). 10 | Please check it. 11 | 12 | # Performance 13 | 14 | HTMLSpecialCharacters can escape/unescape HTML special chracters much faster than[ Google Toolbox for Mac](https://github.com/google/google-toolbox-for-mac). 15 | The figure below shows the performance of escaping/unescaping the test code. These score were measured on iMac (27-inch Late 2012). 16 | Test code for Google Toolbox for Mac is [here](https://github.com/sonsongithub/GTMHTMLSpecialCharacters). 17 | 18 | 19 | 20 | 21 | 22 | # Acknowledgement 23 | 24 | [@norio_nomura](https://github.com) gave me a lot of codes and comments. This project is based on [his code](https://gist.github.com/norio-nomura/2a79822004e7c89228300cf19595ca99). 25 | 26 | # License 27 | 28 | MIT License. This library includes source codes of [Google Toolbox for Mac](https://github.com/google/google-toolbox-for-mac). -------------------------------------------------------------------------------- /Sources/HTMLSpecialCharacters/HTMLSpecialCharacters.swift: -------------------------------------------------------------------------------- 1 | // 2 | // HTMLSpecialCharacters.swift 3 | // HTMLSpecialCharacters 4 | // 5 | // Created by sonson on 2017/02/08. 6 | // Copyright © 2017年 sonson. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | // MARK: - Table for unescaping 12 | 13 | // Structure as LUT(look up table) 14 | private struct HtmlUnescapeMap { 15 | let unescapingCodes: [unichar] 16 | let code: unichar 17 | init(_ u: [unichar], _ c: unichar) { 18 | unescapingCodes = u 19 | code = c 20 | } 21 | } 22 | 23 | /** 24 | Get array of HtmlUnescapeMap according to the length of HTML code. 25 | - parameter length: The length of HTML code to be unescaped. 26 | - returns: Array of HtmlUnescapeMap. 27 | */ 28 | private func getUnescapeTable(length: Int) -> [HtmlUnescapeMap]? { 29 | switch length { 30 | case 2: 31 | return unicodeHtmlUnescapeMapNameLength_2 32 | case 3: 33 | return unicodeHtmlUnescapeMapNameLength_3 34 | case 4: 35 | return unicodeHtmlUnescapeMapNameLength_4 36 | case 5: 37 | return unicodeHtmlUnescapeMapNameLength_5 38 | case 6: 39 | return unicodeHtmlUnescapeMapNameLength_6 40 | case 7: 41 | return unicodeHtmlUnescapeMapNameLength_7 42 | case 8: 43 | return unicodeHtmlUnescapeMapNameLength_8 44 | default: 45 | return nil 46 | } 47 | } 48 | 49 | private let unicodeHtmlUnescapeMapNameLength_2: [HtmlUnescapeMap] = [ 50 | HtmlUnescapeMap([108, 116], 60), // "lt" => "<" 51 | HtmlUnescapeMap([103, 116], 62), // "gt" => ">" 52 | HtmlUnescapeMap([77, 117], 924), // "Mu" => "Μ" 53 | HtmlUnescapeMap([78, 117], 925), // "Nu" => "Ν" 54 | HtmlUnescapeMap([88, 105], 926), // "Xi" => "Ξ" 55 | HtmlUnescapeMap([80, 105], 928), // "Pi" => "Π" 56 | HtmlUnescapeMap([109, 117], 956), // "mu" => "μ" 57 | HtmlUnescapeMap([110, 117], 957), // "nu" => "ν" 58 | HtmlUnescapeMap([120, 105], 958), // "xi" => "ξ" 59 | HtmlUnescapeMap([112, 105], 960), // "pi" => "π" 60 | HtmlUnescapeMap([110, 105], 8715), // "ni" => "∋" 61 | HtmlUnescapeMap([111, 114], 8744), // "or" => "∨" 62 | HtmlUnescapeMap([110, 101], 8800), // "ne" => "≠" 63 | HtmlUnescapeMap([108, 101], 8804), // "le" => "≤" 64 | HtmlUnescapeMap([103, 101], 8805) // "ge" => "≥" 65 | ] 66 | 67 | private let unicodeHtmlUnescapeMapNameLength_3: [HtmlUnescapeMap] = [ 68 | HtmlUnescapeMap([97, 109, 112], 38), // "amp" => "&" 69 | HtmlUnescapeMap([121, 101, 110], 165), // "yen" => "¥" 70 | HtmlUnescapeMap([117, 109, 108], 168), // "uml" => "¨" 71 | HtmlUnescapeMap([110, 111, 116], 172), // "not" => "¬" 72 | HtmlUnescapeMap([115, 104, 121], 173), // "shy" => "­" 73 | HtmlUnescapeMap([114, 101, 103], 174), // "reg" => "®" 74 | HtmlUnescapeMap([100, 101, 103], 176), // "deg" => "°" 75 | HtmlUnescapeMap([69, 84, 72], 208), // "ETH" => "Ð" 76 | HtmlUnescapeMap([101, 116, 104], 240), // "eth" => "ð" 77 | HtmlUnescapeMap([69, 116, 97], 919), // "Eta" => "Η" 78 | HtmlUnescapeMap([82, 104, 111], 929), // "Rho" => "Ρ" 79 | HtmlUnescapeMap([84, 97, 117], 932), // "Tau" => "Τ" 80 | HtmlUnescapeMap([80, 104, 105], 934), // "Phi" => "Φ" 81 | HtmlUnescapeMap([67, 104, 105], 935), // "Chi" => "Χ" 82 | HtmlUnescapeMap([80, 115, 105], 936), // "Psi" => "Ψ" 83 | HtmlUnescapeMap([101, 116, 97], 951), // "eta" => "η" 84 | HtmlUnescapeMap([114, 104, 111], 961), // "rho" => "ρ" 85 | HtmlUnescapeMap([116, 97, 117], 964), // "tau" => "τ" 86 | HtmlUnescapeMap([112, 104, 105], 966), // "phi" => "φ" 87 | HtmlUnescapeMap([99, 104, 105], 967), // "chi" => "χ" 88 | HtmlUnescapeMap([112, 115, 105], 968), // "psi" => "ψ" 89 | HtmlUnescapeMap([112, 105, 118], 982), // "piv" => "ϖ" 90 | HtmlUnescapeMap([122, 119, 106], 8205), // "zwj" => "‍" 91 | HtmlUnescapeMap([108, 114, 109], 8206), // "lrm" => "‎" 92 | HtmlUnescapeMap([114, 108, 109], 8207), // "rlm" => "‏" 93 | HtmlUnescapeMap([115, 117, 109], 8721), // "sum" => "∑" 94 | HtmlUnescapeMap([97, 110, 103], 8736), // "ang" => "∠" 95 | HtmlUnescapeMap([97, 110, 100], 8743), // "and" => "∧" 96 | HtmlUnescapeMap([99, 97, 112], 8745), // "cap" => "∩" 97 | HtmlUnescapeMap([99, 117, 112], 8746), // "cup" => "∪" 98 | HtmlUnescapeMap([105, 110, 116], 8747), // "int" => "∫" 99 | HtmlUnescapeMap([115, 105, 109], 8764), // "sim" => "∼" 100 | HtmlUnescapeMap([115, 117, 98], 8834), // "sub" => "⊂" 101 | HtmlUnescapeMap([115, 117, 112], 8835), // "sup" => "⊃" 102 | HtmlUnescapeMap([108, 111, 122], 9674) // "loz" => "◊" 103 | ] 104 | 105 | private let unicodeHtmlUnescapeMapNameLength_4: [HtmlUnescapeMap] = [ 106 | HtmlUnescapeMap([113, 117, 111, 116], 34), // "quot" => """ 107 | HtmlUnescapeMap([97, 112, 111, 115], 39), // "apos" => "'" 108 | HtmlUnescapeMap([110, 98, 115, 112], 160), // "nbsp" => " " 109 | HtmlUnescapeMap([99, 101, 110, 116], 162), // "cent" => "¢" 110 | HtmlUnescapeMap([115, 101, 99, 116], 167), // "sect" => "§" 111 | HtmlUnescapeMap([99, 111, 112, 121], 169), // "copy" => "©" 112 | HtmlUnescapeMap([111, 114, 100, 102], 170), // "ordf" => "ª" 113 | HtmlUnescapeMap([109, 97, 99, 114], 175), // "macr" => "¯" 114 | HtmlUnescapeMap([115, 117, 112, 50], 178), // "sup2" => "²" 115 | HtmlUnescapeMap([115, 117, 112, 51], 179), // "sup3" => "³" 116 | HtmlUnescapeMap([112, 97, 114, 97], 182), // "para" => "¶" 117 | HtmlUnescapeMap([115, 117, 112, 49], 185), // "sup1" => "¹" 118 | HtmlUnescapeMap([111, 114, 100, 109], 186), // "ordm" => "º" 119 | HtmlUnescapeMap([65, 117, 109, 108], 196), // "Auml" => "Ä" 120 | HtmlUnescapeMap([69, 117, 109, 108], 203), // "Euml" => "Ë" 121 | HtmlUnescapeMap([73, 117, 109, 108], 207), // "Iuml" => "Ï" 122 | HtmlUnescapeMap([79, 117, 109, 108], 214), // "Ouml" => "Ö" 123 | HtmlUnescapeMap([85, 117, 109, 108], 220), // "Uuml" => "Ü" 124 | HtmlUnescapeMap([97, 117, 109, 108], 228), // "auml" => "ä" 125 | HtmlUnescapeMap([101, 117, 109, 108], 235), // "euml" => "ë" 126 | HtmlUnescapeMap([105, 117, 109, 108], 239), // "iuml" => "ï" 127 | HtmlUnescapeMap([111, 117, 109, 108], 246), // "ouml" => "ö" 128 | HtmlUnescapeMap([117, 117, 109, 108], 252), // "uuml" => "ü" 129 | HtmlUnescapeMap([121, 117, 109, 108], 255), // "yuml" => "ÿ" 130 | HtmlUnescapeMap([89, 117, 109, 108], 376), // "Yuml" => "Ÿ" 131 | HtmlUnescapeMap([102, 110, 111, 102], 402), // "fnof" => "ƒ" 132 | HtmlUnescapeMap([99, 105, 114, 99], 710), // "circ" => "ˆ" 133 | HtmlUnescapeMap([66, 101, 116, 97], 914), // "Beta" => "Β" 134 | HtmlUnescapeMap([90, 101, 116, 97], 918), // "Zeta" => "Ζ" 135 | HtmlUnescapeMap([73, 111, 116, 97], 921), // "Iota" => "Ι" 136 | HtmlUnescapeMap([98, 101, 116, 97], 946), // "beta" => "β" 137 | HtmlUnescapeMap([122, 101, 116, 97], 950), // "zeta" => "ζ" 138 | HtmlUnescapeMap([105, 111, 116, 97], 953), // "iota" => "ι" 139 | HtmlUnescapeMap([101, 110, 115, 112], 8194), // "ensp" => " " 140 | HtmlUnescapeMap([101, 109, 115, 112], 8195), // "emsp" => " " 141 | HtmlUnescapeMap([122, 119, 110, 106], 8204), // "zwnj" => "‌" 142 | HtmlUnescapeMap([98, 117, 108, 108], 8226), // "bull" => "•" 143 | HtmlUnescapeMap([101, 117, 114, 111], 8364), // "euro" => "€" 144 | HtmlUnescapeMap([114, 101, 97, 108], 8476), // "real" => "ℜ" 145 | HtmlUnescapeMap([108, 97, 114, 114], 8592), // "larr" => "←" 146 | HtmlUnescapeMap([117, 97, 114, 114], 8593), // "uarr" => "↑" 147 | HtmlUnescapeMap([114, 97, 114, 114], 8594), // "rarr" => "→" 148 | HtmlUnescapeMap([100, 97, 114, 114], 8595), // "darr" => "↓" 149 | HtmlUnescapeMap([104, 97, 114, 114], 8596), // "harr" => "↔" 150 | HtmlUnescapeMap([108, 65, 114, 114], 8656), // "lArr" => "⇐" 151 | HtmlUnescapeMap([117, 65, 114, 114], 8657), // "uArr" => "⇑" 152 | HtmlUnescapeMap([114, 65, 114, 114], 8658), // "rArr" => "⇒" 153 | HtmlUnescapeMap([100, 65, 114, 114], 8659), // "dArr" => "⇓" 154 | HtmlUnescapeMap([104, 65, 114, 114], 8660), // "hArr" => "⇔" 155 | HtmlUnescapeMap([112, 97, 114, 116], 8706), // "part" => "∂" 156 | HtmlUnescapeMap([105, 115, 105, 110], 8712), // "isin" => "∈" 157 | HtmlUnescapeMap([112, 114, 111, 100], 8719), // "prod" => "∏" 158 | HtmlUnescapeMap([112, 114, 111, 112], 8733), // "prop" => "∝" 159 | HtmlUnescapeMap([99, 111, 110, 103], 8773), // "cong" => "≅" 160 | HtmlUnescapeMap([110, 115, 117, 98], 8836), // "nsub" => "⊄" 161 | HtmlUnescapeMap([115, 117, 98, 101], 8838), // "sube" => "⊆" 162 | HtmlUnescapeMap([115, 117, 112, 101], 8839), // "supe" => "⊇" 163 | HtmlUnescapeMap([112, 101, 114, 112], 8869), // "perp" => "⊥" 164 | HtmlUnescapeMap([115, 100, 111, 116], 8901), // "sdot" => "⋅" 165 | HtmlUnescapeMap([108, 97, 110, 103], 9001), // "lang" => "〈" 166 | HtmlUnescapeMap([114, 97, 110, 103], 9002) // "rang" => "〉" 167 | ] 168 | 169 | private let unicodeHtmlUnescapeMapNameLength_5: [HtmlUnescapeMap] = [ 170 | HtmlUnescapeMap([105, 101, 120, 99, 108], 161), // "iexcl" => "¡" 171 | HtmlUnescapeMap([112, 111, 117, 110, 100], 163), // "pound" => "£" 172 | HtmlUnescapeMap([108, 97, 113, 117, 111], 171), // "laquo" => "«" 173 | HtmlUnescapeMap([97, 99, 117, 116, 101], 180), // "acute" => "´" 174 | HtmlUnescapeMap([109, 105, 99, 114, 111], 181), // "micro" => "µ" 175 | HtmlUnescapeMap([99, 101, 100, 105, 108], 184), // "cedil" => "¸" 176 | HtmlUnescapeMap([114, 97, 113, 117, 111], 187), // "raquo" => "»" 177 | HtmlUnescapeMap([65, 99, 105, 114, 99], 194), // "Acirc" => "Â" 178 | HtmlUnescapeMap([65, 114, 105, 110, 103], 197), // "Aring" => "Å" 179 | HtmlUnescapeMap([65, 69, 108, 105, 103], 198), // "AElig" => "Æ" 180 | HtmlUnescapeMap([69, 99, 105, 114, 99], 202), // "Ecirc" => "Ê" 181 | HtmlUnescapeMap([73, 99, 105, 114, 99], 206), // "Icirc" => "Î" 182 | HtmlUnescapeMap([79, 99, 105, 114, 99], 212), // "Ocirc" => "Ô" 183 | HtmlUnescapeMap([116, 105, 109, 101, 115], 215), // "times" => "×" 184 | HtmlUnescapeMap([85, 99, 105, 114, 99], 219), // "Ucirc" => "Û" 185 | HtmlUnescapeMap([84, 72, 79, 82, 78], 222), // "THORN" => "Þ" 186 | HtmlUnescapeMap([115, 122, 108, 105, 103], 223), // "szlig" => "ß" 187 | HtmlUnescapeMap([97, 99, 105, 114, 99], 226), // "acirc" => "â" 188 | HtmlUnescapeMap([97, 114, 105, 110, 103], 229), // "aring" => "å" 189 | HtmlUnescapeMap([97, 101, 108, 105, 103], 230), // "aelig" => "æ" 190 | HtmlUnescapeMap([101, 99, 105, 114, 99], 234), // "ecirc" => "ê" 191 | HtmlUnescapeMap([105, 99, 105, 114, 99], 238), // "icirc" => "î" 192 | HtmlUnescapeMap([111, 99, 105, 114, 99], 244), // "ocirc" => "ô" 193 | HtmlUnescapeMap([117, 99, 105, 114, 99], 251), // "ucirc" => "û" 194 | HtmlUnescapeMap([116, 104, 111, 114, 110], 254), // "thorn" => "þ" 195 | HtmlUnescapeMap([79, 69, 108, 105, 103], 338), // "OElig" => "Œ" 196 | HtmlUnescapeMap([111, 101, 108, 105, 103], 339), // "oelig" => "œ" 197 | HtmlUnescapeMap([116, 105, 108, 100, 101], 732), // "tilde" => "˜" 198 | HtmlUnescapeMap([65, 108, 112, 104, 97], 913), // "Alpha" => "Α" 199 | HtmlUnescapeMap([71, 97, 109, 109, 97], 915), // "Gamma" => "Γ" 200 | HtmlUnescapeMap([68, 101, 108, 116, 97], 916), // "Delta" => "Δ" 201 | HtmlUnescapeMap([84, 104, 101, 116, 97], 920), // "Theta" => "Θ" 202 | HtmlUnescapeMap([75, 97, 112, 112, 97], 922), // "Kappa" => "Κ" 203 | HtmlUnescapeMap([83, 105, 103, 109, 97], 931), // "Sigma" => "Σ" 204 | HtmlUnescapeMap([79, 109, 101, 103, 97], 937), // "Omega" => "Ω" 205 | HtmlUnescapeMap([97, 108, 112, 104, 97], 945), // "alpha" => "α" 206 | HtmlUnescapeMap([103, 97, 109, 109, 97], 947), // "gamma" => "γ" 207 | HtmlUnescapeMap([100, 101, 108, 116, 97], 948), // "delta" => "δ" 208 | HtmlUnescapeMap([116, 104, 101, 116, 97], 952), // "theta" => "θ" 209 | HtmlUnescapeMap([107, 97, 112, 112, 97], 954), // "kappa" => "κ" 210 | HtmlUnescapeMap([115, 105, 103, 109, 97], 963), // "sigma" => "σ" 211 | HtmlUnescapeMap([111, 109, 101, 103, 97], 969), // "omega" => "ω" 212 | HtmlUnescapeMap([117, 112, 115, 105, 104], 978), // "upsih" => "ϒ" 213 | HtmlUnescapeMap([110, 100, 97, 115, 104], 8211), // "ndash" => "–" 214 | HtmlUnescapeMap([109, 100, 97, 115, 104], 8212), // "mdash" => "—" 215 | HtmlUnescapeMap([108, 115, 113, 117, 111], 8216), // "lsquo" => "‘" 216 | HtmlUnescapeMap([114, 115, 113, 117, 111], 8217), // "rsquo" => "’" 217 | HtmlUnescapeMap([115, 98, 113, 117, 111], 8218), // "sbquo" => "‚" 218 | HtmlUnescapeMap([108, 100, 113, 117, 111], 8220), // "ldquo" => "“" 219 | HtmlUnescapeMap([114, 100, 113, 117, 111], 8221), // "rdquo" => "”" 220 | HtmlUnescapeMap([98, 100, 113, 117, 111], 8222), // "bdquo" => "„" 221 | HtmlUnescapeMap([112, 114, 105, 109, 101], 8242), // "prime" => "′" 222 | HtmlUnescapeMap([80, 114, 105, 109, 101], 8243), // "Prime" => "″" 223 | HtmlUnescapeMap([111, 108, 105, 110, 101], 8254), // "oline" => "‾" 224 | HtmlUnescapeMap([102, 114, 97, 115, 108], 8260), // "frasl" => "⁄" 225 | HtmlUnescapeMap([105, 109, 97, 103, 101], 8465), // "image" => "ℑ" 226 | HtmlUnescapeMap([116, 114, 97, 100, 101], 8482), // "trade" => "™" 227 | HtmlUnescapeMap([99, 114, 97, 114, 114], 8629), // "crarr" => "↵" 228 | HtmlUnescapeMap([101, 120, 105, 115, 116], 8707), // "exist" => "∃" 229 | HtmlUnescapeMap([101, 109, 112, 116, 121], 8709), // "empty" => "∅" 230 | HtmlUnescapeMap([110, 97, 98, 108, 97], 8711), // "nabla" => "∇" 231 | HtmlUnescapeMap([110, 111, 116, 105, 110], 8713), // "notin" => "∉" 232 | HtmlUnescapeMap([109, 105, 110, 117, 115], 8722), // "minus" => "−" 233 | HtmlUnescapeMap([114, 97, 100, 105, 99], 8730), // "radic" => "√" 234 | HtmlUnescapeMap([105, 110, 102, 105, 110], 8734), // "infin" => "∞" 235 | HtmlUnescapeMap([97, 115, 121, 109, 112], 8776), // "asymp" => "≈" 236 | HtmlUnescapeMap([101, 113, 117, 105, 118], 8801), // "equiv" => "≡" 237 | HtmlUnescapeMap([111, 112, 108, 117, 115], 8853), // "oplus" => "⊕" 238 | HtmlUnescapeMap([108, 99, 101, 105, 108], 8968), // "lceil" => "⌈" 239 | HtmlUnescapeMap([114, 99, 101, 105, 108], 8969), // "rceil" => "⌉" 240 | HtmlUnescapeMap([99, 108, 117, 98, 115], 9827), // "clubs" => "♣" 241 | HtmlUnescapeMap([100, 105, 97, 109, 115], 9830) // "diams" => "♦" 242 | ] 243 | 244 | private let unicodeHtmlUnescapeMapNameLength_6: [HtmlUnescapeMap] = [ 245 | HtmlUnescapeMap([99, 117, 114, 114, 101, 110], 164), // "curren" => "¤" 246 | HtmlUnescapeMap([98, 114, 118, 98, 97, 114], 166), // "brvbar" => "¦" 247 | HtmlUnescapeMap([112, 108, 117, 115, 109, 110], 177), // "plusmn" => "±" 248 | HtmlUnescapeMap([109, 105, 100, 100, 111, 116], 183), // "middot" => "·" 249 | HtmlUnescapeMap([102, 114, 97, 99, 49, 52], 188), // "frac14" => "¼" 250 | HtmlUnescapeMap([102, 114, 97, 99, 49, 50], 189), // "frac12" => "½" 251 | HtmlUnescapeMap([102, 114, 97, 99, 51, 52], 190), // "frac34" => "¾" 252 | HtmlUnescapeMap([105, 113, 117, 101, 115, 116], 191), // "iquest" => "¿" 253 | HtmlUnescapeMap([65, 103, 114, 97, 118, 101], 192), // "Agrave" => "À" 254 | HtmlUnescapeMap([65, 97, 99, 117, 116, 101], 193), // "Aacute" => "Á" 255 | HtmlUnescapeMap([65, 116, 105, 108, 100, 101], 195), // "Atilde" => "Ã" 256 | HtmlUnescapeMap([67, 99, 101, 100, 105, 108], 199), // "Ccedil" => "Ç" 257 | HtmlUnescapeMap([69, 103, 114, 97, 118, 101], 200), // "Egrave" => "È" 258 | HtmlUnescapeMap([69, 97, 99, 117, 116, 101], 201), // "Eacute" => "É" 259 | HtmlUnescapeMap([73, 103, 114, 97, 118, 101], 204), // "Igrave" => "Ì" 260 | HtmlUnescapeMap([73, 97, 99, 117, 116, 101], 205), // "Iacute" => "Í" 261 | HtmlUnescapeMap([78, 116, 105, 108, 100, 101], 209), // "Ntilde" => "Ñ" 262 | HtmlUnescapeMap([79, 103, 114, 97, 118, 101], 210), // "Ograve" => "Ò" 263 | HtmlUnescapeMap([79, 97, 99, 117, 116, 101], 211), // "Oacute" => "Ó" 264 | HtmlUnescapeMap([79, 116, 105, 108, 100, 101], 213), // "Otilde" => "Õ" 265 | HtmlUnescapeMap([79, 115, 108, 97, 115, 104], 216), // "Oslash" => "Ø" 266 | HtmlUnescapeMap([85, 103, 114, 97, 118, 101], 217), // "Ugrave" => "Ù" 267 | HtmlUnescapeMap([85, 97, 99, 117, 116, 101], 218), // "Uacute" => "Ú" 268 | HtmlUnescapeMap([89, 97, 99, 117, 116, 101], 221), // "Yacute" => "Ý" 269 | HtmlUnescapeMap([97, 103, 114, 97, 118, 101], 224), // "agrave" => "à" 270 | HtmlUnescapeMap([97, 97, 99, 117, 116, 101], 225), // "aacute" => "á" 271 | HtmlUnescapeMap([97, 116, 105, 108, 100, 101], 227), // "atilde" => "ã" 272 | HtmlUnescapeMap([99, 99, 101, 100, 105, 108], 231), // "ccedil" => "ç" 273 | HtmlUnescapeMap([101, 103, 114, 97, 118, 101], 232), // "egrave" => "è" 274 | HtmlUnescapeMap([101, 97, 99, 117, 116, 101], 233), // "eacute" => "é" 275 | HtmlUnescapeMap([105, 103, 114, 97, 118, 101], 236), // "igrave" => "ì" 276 | HtmlUnescapeMap([105, 97, 99, 117, 116, 101], 237), // "iacute" => "í" 277 | HtmlUnescapeMap([110, 116, 105, 108, 100, 101], 241), // "ntilde" => "ñ" 278 | HtmlUnescapeMap([111, 103, 114, 97, 118, 101], 242), // "ograve" => "ò" 279 | HtmlUnescapeMap([111, 97, 99, 117, 116, 101], 243), // "oacute" => "ó" 280 | HtmlUnescapeMap([111, 116, 105, 108, 100, 101], 245), // "otilde" => "õ" 281 | HtmlUnescapeMap([100, 105, 118, 105, 100, 101], 247), // "divide" => "÷" 282 | HtmlUnescapeMap([111, 115, 108, 97, 115, 104], 248), // "oslash" => "ø" 283 | HtmlUnescapeMap([117, 103, 114, 97, 118, 101], 249), // "ugrave" => "ù" 284 | HtmlUnescapeMap([117, 97, 99, 117, 116, 101], 250), // "uacute" => "ú" 285 | HtmlUnescapeMap([121, 97, 99, 117, 116, 101], 253), // "yacute" => "ý" 286 | HtmlUnescapeMap([83, 99, 97, 114, 111, 110], 352), // "Scaron" => "Š" 287 | HtmlUnescapeMap([115, 99, 97, 114, 111, 110], 353), // "scaron" => "š" 288 | HtmlUnescapeMap([76, 97, 109, 98, 100, 97], 923), // "Lambda" => "Λ" 289 | HtmlUnescapeMap([108, 97, 109, 98, 100, 97], 955), // "lambda" => "λ" 290 | HtmlUnescapeMap([115, 105, 103, 109, 97, 102], 962), // "sigmaf" => "ς" 291 | HtmlUnescapeMap([116, 104, 105, 110, 115, 112], 8201), // "thinsp" => " " 292 | HtmlUnescapeMap([100, 97, 103, 103, 101, 114], 8224), // "dagger" => "†" 293 | HtmlUnescapeMap([68, 97, 103, 103, 101, 114], 8225), // "Dagger" => "‡" 294 | HtmlUnescapeMap([104, 101, 108, 108, 105, 112], 8230), // "hellip" => "…" 295 | HtmlUnescapeMap([112, 101, 114, 109, 105, 108], 8240), // "permil" => "‰" 296 | HtmlUnescapeMap([108, 115, 97, 113, 117, 111], 8249), // "lsaquo" => "‹" 297 | HtmlUnescapeMap([114, 115, 97, 113, 117, 111], 8250), // "rsaquo" => "›" 298 | HtmlUnescapeMap([119, 101, 105, 101, 114, 112], 8472), // "weierp" => "℘" 299 | HtmlUnescapeMap([102, 111, 114, 97, 108, 108], 8704), // "forall" => "∀" 300 | HtmlUnescapeMap([108, 111, 119, 97, 115, 116], 8727), // "lowast" => "∗" 301 | HtmlUnescapeMap([116, 104, 101, 114, 101, 52], 8756), // "there4" => "∴" 302 | HtmlUnescapeMap([111, 116, 105, 109, 101, 115], 8855), // "otimes" => "⊗" 303 | HtmlUnescapeMap([108, 102, 108, 111, 111, 114], 8970), // "lfloor" => "⌊" 304 | HtmlUnescapeMap([114, 102, 108, 111, 111, 114], 8971), // "rfloor" => "⌋" 305 | HtmlUnescapeMap([115, 112, 97, 100, 101, 115], 9824), // "spades" => "♠" 306 | HtmlUnescapeMap([104, 101, 97, 114, 116, 115], 9829) // "hearts" => "♥" 307 | ] 308 | 309 | private let unicodeHtmlUnescapeMapNameLength_7: [HtmlUnescapeMap] = [ 310 | HtmlUnescapeMap([69, 112, 115, 105, 108, 111, 110], 917), // "Epsilon" => "Ε" 311 | HtmlUnescapeMap([79, 109, 105, 99, 114, 111, 110], 927), // "Omicron" => "Ο" 312 | HtmlUnescapeMap([85, 112, 115, 105, 108, 111, 110], 933), // "Upsilon" => "Υ" 313 | HtmlUnescapeMap([101, 112, 115, 105, 108, 111, 110], 949), // "epsilon" => "ε" 314 | HtmlUnescapeMap([111, 109, 105, 99, 114, 111, 110], 959), // "omicron" => "ο" 315 | HtmlUnescapeMap([117, 112, 115, 105, 108, 111, 110], 965), // "upsilon" => "υ" 316 | HtmlUnescapeMap([97, 108, 101, 102, 115, 121, 109], 8501) // "alefsym" => "ℵ" 317 | ] 318 | 319 | private let unicodeHtmlUnescapeMapNameLength_8: [HtmlUnescapeMap] = [ 320 | HtmlUnescapeMap([116, 104, 101, 116, 97, 115, 121, 109], 977) // "thetasym" => "ϑ" 321 | ] 322 | 323 | // MARK: - Table for escaping 324 | 325 | // Structure as LUT(look up table) 326 | private struct HtmlEscapeMap { 327 | let unescapingCodes: [unichar] 328 | let code: unichar 329 | let count: Int 330 | init(_ c: unichar, _ u: [unichar]) { 331 | unescapingCodes = u 332 | code = c 333 | count = unescapingCodes.count 334 | } 335 | } 336 | 337 | private let unicodeHtmlEscapeMapForUTF8: [HtmlEscapeMap] = [ 338 | HtmlEscapeMap(34, [38, 113, 117, 111, 116, 59]), // => "quot" 339 | HtmlEscapeMap(38, [38, 97, 109, 112, 59]), // => "amp" 340 | HtmlEscapeMap(39, [38, 97, 112, 111, 115, 59]), // => "apos" 341 | HtmlEscapeMap(60, [38, 108, 116, 59]), // => "lt" 342 | HtmlEscapeMap(62, [38, 103, 116, 59]), // => "gt" 343 | HtmlEscapeMap(338, [38, 79, 69, 108, 105, 103, 59]), // => "OElig" 344 | HtmlEscapeMap(339, [38, 111, 101, 108, 105, 103, 59]), // => "oelig" 345 | HtmlEscapeMap(352, [38, 83, 99, 97, 114, 111, 110, 59]), // => "Scaron" 346 | HtmlEscapeMap(353, [38, 115, 99, 97, 114, 111, 110, 59]), // => "scaron" 347 | HtmlEscapeMap(376, [38, 89, 117, 109, 108, 59]), // => "Yuml" 348 | HtmlEscapeMap(710, [38, 99, 105, 114, 99, 59]), // => "circ" 349 | HtmlEscapeMap(732, [38, 116, 105, 108, 100, 101, 59]), // => "tilde" 350 | HtmlEscapeMap(8194, [38, 101, 110, 115, 112, 59]), // => "ensp" 351 | HtmlEscapeMap(8195, [38, 101, 109, 115, 112, 59]), // => "emsp" 352 | HtmlEscapeMap(8201, [38, 116, 104, 105, 110, 115, 112, 59]), // => "thinsp" 353 | HtmlEscapeMap(8204, [38, 122, 119, 110, 106, 59]), // => "zwnj" 354 | HtmlEscapeMap(8205, [38, 122, 119, 106, 59]), // => "zwj" 355 | HtmlEscapeMap(8206, [38, 108, 114, 109, 59]), // => "lrm" 356 | HtmlEscapeMap(8207, [38, 114, 108, 109, 59]), // => "rlm" 357 | HtmlEscapeMap(8211, [38, 110, 100, 97, 115, 104, 59]), // => "ndash" 358 | HtmlEscapeMap(8212, [38, 109, 100, 97, 115, 104, 59]), // => "mdash" 359 | HtmlEscapeMap(8216, [38, 108, 115, 113, 117, 111, 59]), // => "lsquo" 360 | HtmlEscapeMap(8217, [38, 114, 115, 113, 117, 111, 59]), // => "rsquo" 361 | HtmlEscapeMap(8218, [38, 115, 98, 113, 117, 111, 59]), // => "sbquo" 362 | HtmlEscapeMap(8220, [38, 108, 100, 113, 117, 111, 59]), // => "ldquo" 363 | HtmlEscapeMap(8221, [38, 114, 100, 113, 117, 111, 59]), // => "rdquo" 364 | HtmlEscapeMap(8222, [38, 98, 100, 113, 117, 111, 59]), // => "bdquo" 365 | HtmlEscapeMap(8224, [38, 100, 97, 103, 103, 101, 114, 59]), // => "dagger" 366 | HtmlEscapeMap(8225, [38, 68, 97, 103, 103, 101, 114, 59]), // => "Dagger" 367 | HtmlEscapeMap(8240, [38, 112, 101, 114, 109, 105, 108, 59]), // => "permil" 368 | HtmlEscapeMap(8249, [38, 108, 115, 97, 113, 117, 111, 59]), // => "lsaquo" 369 | HtmlEscapeMap(8250, [38, 114, 115, 97, 113, 117, 111, 59]), // => "rsaquo" 370 | HtmlEscapeMap(8364, [38, 101, 117, 114, 111, 59]) // => "euro" 371 | ] 372 | 373 | // MARK: - 374 | 375 | /// Error 376 | private enum HTMLSpecialCharactersError: Error { 377 | case invalidHexSquence 378 | case invalidDecimalSquence 379 | case invalidEscapeSquence 380 | case invalidBufferSequence 381 | case notErrorMatchedUnicode(code: unichar) 382 | } 383 | 384 | /** 385 | Comparator for HtmlEscapeMap structure. 386 | */ 387 | private func comp(v1: unichar, v2: HtmlEscapeMap) -> Int { 388 | if v1 > v2.code { 389 | return 1 390 | } else if v1 < v2.code { 391 | return -1 392 | } else { 393 | return 0 394 | } 395 | } 396 | 397 | /** 398 | Binary search. 399 | - parameter key: Query. 400 | - parameter sortedArray: Must be sorted in ascending order. 401 | - parameter comparator: Comparator for each pair. 402 | - returns: Result element among sortedArray and a number of execution of the compator. If no one is matched in the sortedArray, return nil. 403 | */ 404 | internal func bsearch(with key: T, from sortedArray: [U], comparator: (T, U) -> Int) -> (U, Int)? { 405 | var searchCount = 0 406 | var startIndex = sortedArray.startIndex 407 | var endIndex = sortedArray.endIndex 408 | var range = startIndex.. 1 425 | return nil 426 | } 427 | 428 | // MARK: - Unicode 429 | 430 | /** 431 | Decode, convert unicode scalar value to UTF16 code. 432 | - parameter unicode: Unicode scalar value to be decoded. 433 | - returns: Array of `unichar`, which contains UTF16 code. 434 | */ 435 | private func convertToSurrogatePair(unicodeScalar: UInt) -> [unichar] { 436 | // This convert algorithm is based on https://en.wikipedia.org/wiki/UTF-16 437 | let w: UInt = (unicodeScalar & 0b00000000000111110000000000000000) >> 16 - 1 438 | let x1: UInt = (unicodeScalar & 0b00000000000000001111110000000000) >> 10 439 | let x2: UInt = (unicodeScalar & 0b00000000000000000000001111111111) >> 0 440 | let u1_UInt: UInt = UInt(0b11011000 << 8) + UInt(w << 6) + x1 441 | let u1: UInt16 = UInt16(u1_UInt) 442 | let u2: UInt16 = UInt16(UInt(0b11011100 << 8) + x2) 443 | return [u1, u2] 444 | } 445 | 446 | /** 447 | Encode, convert UTF16 code to unicode scalar value. 448 | - parameter first: First one of a surrogate pair. 449 | - parameter second: Second one of a surrogate pair. 450 | - returns: Unicode scalar value. 451 | */ 452 | private func convertToUnicodeScalar(firstOfSurrogatePair u1: unichar, second u2: unichar) -> UInt? { 453 | // This convert algorithm is based on https://en.wikipedia.org/wiki/UTF-16 454 | guard u1 > (0b11011000 << 8) else { return nil } 455 | guard u1 < (0b11011100 << 8) else { return nil } 456 | guard u2 > (0b11011100 << 8) else { return nil } 457 | guard u2 < (0b11100000 << 8) else { return nil } 458 | 459 | let u = (u1 & 0b0000001111000000) >> 6 + 1 460 | let x1 = (u1 & 0b0000000000111111) 461 | let x2 = (u2 & 0b0000001111111111) 462 | return (UInt(u) << 16) + (UInt(x1) << 10) + UInt(x2) 463 | } 464 | 465 | /** 466 | Convert a surrogate pair to HTML escaping string which is composed of Unicode scalar value. 467 | - parameter first: First one of a surrogate pair. 468 | - parameter second: Second one of a surrogate pair. 469 | - returns: String, HTML escaping string which is composed of Unicode scalar value. 470 | */ 471 | private func convertToUnicodeScalarString(firstOfSurrogatePair u1: unichar, second u2: unichar) -> [unichar]? { 472 | guard let unicodeScalar = convertToUnicodeScalar(firstOfSurrogatePair: u1, second: u2) else { return nil } 473 | 474 | let hexArray = (0...3).reversed().map({ (unicodeScalar >> ($0 * 8)) & 255 }) 475 | 476 | let ampersand = unichar(UInt8(ascii: "&")) 477 | let semicolon = unichar(UInt8(ascii: ";")) 478 | let sharp = unichar(UInt8(ascii: "#")) 479 | let x = unichar(UInt8(ascii: "x")) 480 | let uc: [unichar] = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F"].map({unichar(UInt8(ascii: $0))}) 481 | let hexCharacters = hexArray 482 | .map({ [Int($0 / 16), Int($0 % 16)] }) 483 | .flatMap({$0}) 484 | .reduce([]) { (result, value) -> [Int] in 485 | if result.count != 0 || value != 0 { return result + [value] } 486 | return result 487 | } 488 | .map({uc[$0]}) 489 | return [ampersand, sharp, x] + hexCharacters + [semicolon] 490 | } 491 | 492 | /** 493 | Convert a hex UTF code string to the UTF16 code which includes matching UTF-8 characters. 494 | */ 495 | private func convertToUTF16Codes(hexCodeStorage utf16Storage: T) throws -> [unichar] where T: ContiguousStorage, T.Iterator.Element == unichar { 496 | let utf16: UInt = try utf16Storage.reduce(0) { 497 | switch $1 { 498 | case 48...57: return UInt($0) << 4 + UInt($1) - 48 499 | case 65...70: return UInt($0) << 4 + UInt($1) - 65 + 10 500 | case 97...102: return UInt($0) << 4 + UInt($1) - 97 + 10 501 | default: throw HTMLSpecialCharactersError.invalidHexSquence 502 | } 503 | } 504 | if utf16 < UInt(unichar.max) { 505 | return [unichar(utf16)] 506 | } else if utf16 < UInt(0x110000) { 507 | return convertToSurrogatePair(unicodeScalar: utf16) 508 | } else { 509 | throw HTMLSpecialCharactersError.invalidDecimalSquence 510 | } 511 | } 512 | 513 | /** 514 | Convert a decimal UTF code string to the UTF16 code which includes matching UTF-8 characters. 515 | */ 516 | private func convertToUTF16Codes(decimalCodeStorage utf16Storage: T) throws -> [unichar] where T: ContiguousStorage, T.Iterator.Element == unichar { 517 | let utf16: UInt = try utf16Storage.reduce(0) { 518 | switch $1 { 519 | case 48...57: return UInt($0 * 10) + UInt($1) - 48 520 | default: throw HTMLSpecialCharactersError.invalidDecimalSquence 521 | } 522 | } 523 | if utf16 < UInt(unichar.max) { 524 | return [unichar(utf16)] 525 | } else if utf16 < UInt(0x110000) { 526 | return convertToSurrogatePair(unicodeScalar: utf16) 527 | } else { 528 | throw HTMLSpecialCharactersError.invalidDecimalSquence 529 | } 530 | } 531 | 532 | /** 533 | Convert a standard sequence code string to the UTF16 code which includes matching UTF-8 characters. 534 | */ 535 | private func convertToUTF16Codes(standardSequence utf16Storage: T) throws -> unichar where T: ContiguousStorage, T.Iterator.Element == unichar { 536 | return try utf16Storage.withUnsafeBufferPointer { 537 | let length = $0.count 538 | guard let unichars = $0.baseAddress 539 | else { throw HTMLSpecialCharactersError.invalidEscapeSquence } 540 | guard let table = getUnescapeTable(length: $0.count) 541 | else { throw HTMLSpecialCharactersError.invalidEscapeSquence } 542 | guard let entry = table.first(where: {memcmp($0.unescapingCodes, unichars, MemoryLayout.size * length) == 0}) 543 | else { throw HTMLSpecialCharactersError.invalidEscapeSquence } 544 | return entry.code 545 | } 546 | } 547 | 548 | // MARK: - Extension 549 | 550 | private protocol ContiguousStorage: Sequence { 551 | func withUnsafeBufferPointer(_ body: (UnsafeBufferPointer) throws -> R) rethrows -> R 552 | } 553 | extension Array: ContiguousStorage {} 554 | extension ArraySlice: ContiguousStorage {} 555 | extension ContiguousArray: ContiguousStorage {} 556 | 557 | // MARK: - 558 | 559 | extension String { 560 | 561 | /** 562 | Returns a new string made from the String by removing all HTML tag. 563 | */ 564 | public var removingHTMLTags: String { 565 | let length = utf16.count 566 | var buffer = [unichar](repeating: 0, count: utf16.count) 567 | 568 | NSString(string: self).getCharacters(&buffer) 569 | 570 | guard let destinationBuffer = NSMutableData(capacity: MemoryLayout.size * utf16.count) else { return self } 571 | 572 | // let p = UnsafeMutablePointer(&buffer) 573 | 574 | buffer.withUnsafeBufferPointer { (pointer) -> Void in 575 | let p = pointer.baseAddress! 576 | let leftParenthesis = unichar(UInt8(ascii: "<")) 577 | let rightParenthesis = unichar(UInt8(ascii: ">")) 578 | 579 | var begin = 0 580 | let end = length 581 | while let leftIndex = buffer.suffix(from: begin).firstIndex(of: leftParenthesis) { 582 | guard let rightIndex = buffer[leftIndex...size) 585 | destinationBuffer.append(p + begin, length: MemoryLayout.size * range.count) 586 | begin = rightIndex 587 | } 588 | if length - begin > 0 { 589 | let copyLength = length - begin 590 | destinationBuffer.append(p + begin, length: MemoryLayout.size * copyLength) 591 | } 592 | } 593 | 594 | return String(data: destinationBuffer as Data, encoding: .utf16LittleEndian) ?? self 595 | } 596 | 597 | /** 598 | Returns a new string made from the String by replacing all sequences to be escaped with the matching UTF-8 scalar codes. 599 | */ 600 | public var escapeHTML: String { 601 | let length = utf16.count 602 | let buffer = UnsafeMutablePointer.allocate(capacity: utf16.count) 603 | defer { buffer.deallocate() } 604 | NSString(string: self).getCharacters(buffer) 605 | let margin = 0 606 | guard let destinationBuffer = NSMutableData(capacity: MemoryLayout.size * (utf16.count + margin)) else { return self } 607 | var start = 0 608 | for var i in 0...size * copyLength) 613 | 614 | result.0.unescapingCodes.withUnsafeBytes { (pointer) -> Void in 615 | let p = pointer.baseAddress! 616 | destinationBuffer.append(p, length: MemoryLayout.size * result.0.count) 617 | } 618 | 619 | start = i + 1 620 | } else if i < length - 1 { 621 | if let result = convertToUnicodeScalarString(firstOfSurrogatePair: (buffer + i).pointee, second: (buffer + i + 1).pointee) { 622 | // 4byte character, surrogate pair. 623 | let copyLength = i - start 624 | destinationBuffer.append(buffer + start, length: MemoryLayout.size * copyLength) 625 | result.withUnsafeBytes { (pointer) -> Void in 626 | let p = pointer.baseAddress! 627 | destinationBuffer.append(p, length: MemoryLayout.size * result.count) 628 | } 629 | start = i + 2 630 | i += 1 631 | } 632 | } 633 | } 634 | if length - start > 0 { 635 | let copyLength = length - start 636 | destinationBuffer.append(buffer + start, length: MemoryLayout.size * copyLength) 637 | } 638 | return String(data: destinationBuffer as Data, encoding: .utf16LittleEndian) ?? self 639 | } 640 | 641 | /** 642 | Returns a new string made from the String by replacing all HTML unescaped sequences with the matching UTF-8 characters. 643 | Original code written by @norio_nomura 644 | https://gist.github.com/norio-nomura/2a79822004e7c89228300cf19595ca99 645 | */ 646 | public var unescapeHTML: String { 647 | var buffer = [unichar](repeating: 0, count: utf16.count) 648 | NSString(string: self).getCharacters(&buffer) 649 | 650 | var end = buffer.endIndex 651 | let ampersand = unichar(UInt8(ascii: "&")) 652 | let semicolon = unichar(UInt8(ascii: ";")) 653 | let sharp = unichar(UInt8(ascii: "#")) 654 | let hexPrefixes = ["X", "x"].map { unichar(UInt8(ascii: $0)) } 655 | 656 | while let begin = buffer.prefix(upTo: end).reversed().firstIndex(of: ampersand).map({ buffer.index(before: $0.base) }) { 657 | defer { end = begin } 658 | // if we don't find a semicolon in the range, we don't have a sequence 659 | guard let semicolonIndex = buffer[begin..(utf16Storage: T) throws where T: ContiguousStorage, T.Iterator.Element == unichar { 694 | self = try utf16Storage.withUnsafeBufferPointer { 695 | guard let p = $0.baseAddress else { throw HTMLSpecialCharactersError.invalidBufferSequence } 696 | return String(utf16CodeUnits: p, count: $0.count) 697 | } 698 | } 699 | } 700 | -------------------------------------------------------------------------------- /Tests/HTMLSpecialCharactersTests/HTMLSpecialCharactersTests.swift: -------------------------------------------------------------------------------- 1 | import XCTest 2 | @testable import HTMLSpecialCharacters 3 | 4 | final class HTMLSpecialCharactersTests: XCTestCase { 5 | 6 | // MARK: - Test for removing HTML tags 7 | 8 | override func setUp() { 9 | super.setUp() 10 | 11 | do { 12 | if let data = stringToBeUnescaped.data(using: .unicode) { 13 | _ = try NSAttributedString(data: data, options: [NSAttributedString.DocumentReadingOptionKey.documentType: NSAttributedString.DocumentType.html], documentAttributes: nil) 14 | } 15 | } catch { 16 | print(error) 17 | } 18 | } 19 | 20 | func testRemovingHTMLTags() { 21 | let data: [(String, String)] = [ 22 | ("aaa baaa
aaaaa", "aaa baaaaaaaa"), 23 | // ("aaa baaa
<", "aaa baaa<"), 24 | // ("", ""), 25 | // ("dhafhdsaihfiufhdsjkhfeifhhfifhiu", "dhafhdsaihfiufhdsjkhfeifhhfifhiu"), 26 | // ("<>af<<<>hdsaihfiufhdsjkhfeifhhfifhiu", "afhdsaihfiufhdsjkhfeifhhfifhiu") 27 | ] 28 | 29 | data.forEach({ 30 | let result = $0.0.removingHTMLTags 31 | let message = "source \($0.0)\nresult \(result)\nexpected \($0.1)" 32 | XCTAssert(result == $0.1, message) 33 | }) 34 | } 35 | 36 | // MARK: - Test for handling HTML emoji 37 | 38 | func testEmoji() { 39 | let escaped = "😺はかわいい" 40 | let escapedHex = "😺はかわいい" 41 | let unescaped = "😺はかわいい" 42 | XCTAssert(escaped.unescapeHTML == unescaped) 43 | XCTAssert(unescaped.escapeHTML == escapedHex) 44 | XCTAssert(escaped.unescapeHTML.escapeHTML == escapedHex) 45 | } 46 | 47 | func testAllEmoji() { 48 | let couples: [(String, String)] = [("🀄", "🀄"), ("🃏", "🃏"), ("🅰", "🅰"), ("🅱", "🅱"), ("🅾", "🅾"), ("🅿", "🅿"), ("🆎", "🆎"), ("🆑", "🆑"), ("🆒", "🆒"), ("🆓", "🆓"), ("🆔", "🆔"), ("🆕", "🆕"), ("🆖", "🆖"), ("🆗", "🆗"), ("🆘", "🆘"), ("🆙", "🆙"), ("🆚", "🆚"), ("🈁", "🈁"), ("🈂", "🈂"), ("🈚", "🈚"), ("🈯", "🈯"), ("🈲", "🈲"), ("🈳", "🈳"), ("🈴", "🈴"), ("🈵", "🈵"), ("🈶", "🈶"), ("🈷", "🈷"), ("🈸", "🈸"), ("🈹", "🈹"), ("🈺", "🈺"), ("🉐", "🉐"), ("🉑", "🉑"), ("🌀", "🌀"), ("🌁", "🌁"), ("🌂", "🌂"), ("🌃", "🌃"), ("🌄", "🌄"), ("🌅", "🌅"), ("🌆", "🌆"), ("🌇", "🌇"), ("🌈", "🌈"), ("🌉", "🌉"), ("🌊", "🌊"), ("🌋", "🌋"), ("🌌", "🌌"), ("🌏", "🌏"), ("🌑", "🌑"), ("🌓", "🌓"), ("🌔", "🌔"), ("🌕", "🌕"), ("🌙", "🌙"), ("🌛", "🌛"), ("🌟", "🌟"), ("🌠", "🌠"), ("🌰", "🌰"), ("🌱", "🌱"), ("🌴", "🌴"), ("🌵", "🌵"), ("🌷", "🌷"), ("🌸", "🌸"), ("🌹", "🌹"), ("🌺", "🌺"), ("🌻", "🌻"), ("🌼", "🌼"), ("🌽", "🌽"), ("🌾", "🌾"), ("🌿", "🌿"), ("🍀", "🍀"), ("🍁", "🍁"), ("🍂", "🍂"), ("🍃", "🍃"), ("🍄", "🍄"), ("🍅", "🍅"), ("🍆", "🍆"), ("🍇", "🍇"), ("🍈", "🍈"), ("🍉", "🍉"), ("🍊", "🍊"), ("🍌", "🍌"), ("🍍", "🍍"), ("🍎", "🍎"), ("🍏", "🍏"), ("🍑", "🍑"), ("🍒", "🍒"), ("🍓", "🍓"), ("🍔", "🍔"), ("🍕", "🍕"), ("🍖", "🍖"), ("🍗", "🍗"), ("🍘", "🍘"), ("🍙", "🍙"), ("🍚", "🍚"), ("🍛", "🍛"), ("🍜", "🍜"), ("🍝", "🍝"), ("🍞", "🍞"), ("🍟", "🍟"), ("🍠", "🍠"), ("🍡", "🍡"), ("🍢", "🍢"), ("🍣", "🍣"), ("🍤", "🍤"), ("🍥", "🍥"), ("🍦", "🍦"), ("🍧", "🍧"), ("🍨", "🍨"), ("🍩", "🍩"), ("🍪", "🍪"), ("🍫", "🍫"), ("🍬", "🍬"), ("🍭", "🍭"), ("🍮", "🍮"), ("🍯", "🍯"), ("🍰", "🍰"), ("🍱", "🍱"), ("🍲", "🍲"), ("🍳", "🍳"), ("🍴", "🍴"), ("🍵", "🍵"), ("🍶", "🍶"), ("🍷", "🍷"), ("🍸", "🍸"), ("🍹", "🍹"), ("🍺", "🍺"), ("🍻", "🍻"), ("🎀", "🎀"), ("🎁", "🎁"), ("🎂", "🎂"), ("🎃", "🎃"), ("🎄", "🎄"), ("🎅", "🎅"), ("🎆", "🎆"), ("🎇", "🎇"), ("🎈", "🎈"), ("🎉", "🎉"), ("🎊", "🎊"), ("🎋", "🎋"), ("🎌", "🎌"), ("🎍", "🎍"), ("🎎", "🎎"), ("🎏", "🎏"), ("🎐", "🎐"), ("🎑", "🎑"), ("🎒", "🎒"), ("🎓", "🎓"), ("🎠", "🎠"), ("🎡", "🎡"), ("🎢", "🎢"), ("🎣", "🎣"), ("🎤", "🎤"), ("🎥", "🎥"), ("🎦", "🎦"), ("🎧", "🎧"), ("🎨", "🎨"), ("🎩", "🎩"), ("🎪", "🎪"), ("🎫", "🎫"), ("🎬", "🎬"), ("🎭", "🎭"), ("🎮", "🎮"), ("🎯", "🎯"), ("🎰", "🎰"), ("🎱", "🎱"), ("🎲", "🎲"), ("🎳", "🎳"), ("🎴", "🎴"), ("🎵", "🎵"), ("🎶", "🎶"), ("🎷", "🎷"), ("🎸", "🎸"), ("🎹", "🎹"), ("🎺", "🎺"), ("🎻", "🎻"), ("🎼", "🎼"), ("🎽", "🎽"), ("🎾", "🎾"), ("🎿", "🎿"), ("🏀", "🏀"), ("🏁", "🏁"), ("🏂", "🏂"), ("🏃", "🏃"), ("🏄", "🏄"), ("🏆", "🏆"), ("🏈", "🏈"), ("🏊", "🏊"), ("🏠", "🏠"), ("🏡", "🏡"), ("🏢", "🏢"), ("🏣", "🏣"), ("🏥", "🏥"), ("🏦", "🏦"), ("🏧", "🏧"), ("🏨", "🏨"), ("🏩", "🏩"), ("🏪", "🏪"), ("🏫", "🏫"), ("🏬", "🏬"), ("🏭", "🏭"), ("🏮", "🏮"), ("🏯", "🏯"), ("🏰", "🏰"), ("🐌", "🐌"), ("🐍", "🐍"), ("🐎", "🐎"), ("🐑", "🐑"), ("🐒", "🐒"), ("🐔", "🐔"), ("🐗", "🐗"), ("🐘", "🐘"), ("🐙", "🐙"), ("🐚", "🐚"), ("🐛", "🐛"), ("🐜", "🐜"), ("🐝", "🐝"), ("🐞", "🐞"), ("🐟", "🐟"), ("🐠", "🐠"), ("🐡", "🐡"), ("🐢", "🐢"), ("🐣", "🐣"), ("🐤", "🐤"), ("🐥", "🐥"), ("🐦", "🐦"), ("🐧", "🐧"), ("🐨", "🐨"), ("🐩", "🐩"), ("🐫", "🐫"), ("🐬", "🐬"), ("🐭", "🐭"), ("🐮", "🐮"), ("🐯", "🐯"), ("🐰", "🐰"), ("🐱", "🐱"), ("🐲", "🐲"), ("🐳", "🐳"), ("🐴", "🐴"), ("🐵", "🐵"), ("🐶", "🐶"), ("🐷", "🐷"), ("🐸", "🐸"), ("🐹", "🐹"), ("🐺", "🐺"), ("🐻", "🐻"), ("🐼", "🐼"), ("🐽", "🐽"), ("🐾", "🐾"), ("👀", "👀"), ("👂", "👂"), ("👃", "👃"), ("👄", "👄"), ("👅", "👅"), ("👆", "👆"), ("👇", "👇"), ("👈", "👈"), ("👉", "👉"), ("👊", "👊"), ("👋", "👋"), ("👌", "👌"), ("👍", "👍"), ("👎", "👎"), ("👏", "👏"), ("👐", "👐"), ("👑", "👑"), ("👒", "👒"), ("👓", "👓"), ("👔", "👔"), ("👕", "👕"), ("👖", "👖"), ("👗", "👗"), ("👘", "👘"), ("👙", "👙"), ("👚", "👚"), ("👛", "👛"), ("👜", "👜"), ("👝", "👝"), ("👞", "👞"), ("👟", "👟"), ("👠", "👠"), ("👡", "👡"), ("👢", "👢"), ("👣", "👣"), ("👤", "👤"), ("👦", "👦"), ("👧", "👧"), ("👨", "👨"), ("👩", "👩"), ("👪", "👪"), ("👫", "👫"), ("👮", "👮"), ("👯", "👯"), ("👰", "👰"), ("👱", "👱"), ("👲", "👲"), ("👳", "👳"), ("👴", "👴"), ("👵", "👵"), ("👶", "👶"), ("👷", "👷"), ("👸", "👸"), ("👹", "👹"), ("👺", "👺"), ("👻", "👻"), ("👼", "👼"), ("👽", "👽"), ("👾", "👾"), ("👿", "👿"), ("💀", "💀"), ("💁", "💁"), ("💂", "💂"), ("💃", "💃"), ("💄", "💄"), ("💅", "💅"), ("💆", "💆"), ("💇", "💇"), ("💈", "💈"), ("💉", "💉"), ("💊", "💊"), ("💋", "💋"), ("💌", "💌"), ("💍", "💍"), ("💎", "💎"), ("💏", "💏"), ("💐", "💐"), ("💑", "💑"), ("💒", "💒"), ("💓", "💓"), ("💔", "💔"), ("💕", "💕"), ("💖", "💖"), ("💗", "💗"), ("💘", "💘"), ("💙", "💙"), ("💚", "💚"), ("💛", "💛"), ("💜", "💜"), ("💝", "💝"), ("💞", "💞"), ("💟", "💟"), ("💠", "💠"), ("💡", "💡"), ("💢", "💢"), ("💣", "💣"), ("💤", "💤"), ("💥", "💥"), ("💦", "💦"), ("💧", "💧"), ("💨", "💨"), ("💩", "💩"), ("💪", "💪"), ("💫", "💫"), ("💬", "💬"), ("💮", "💮"), ("💯", "💯"), ("💰", "💰"), ("💱", "💱"), ("💲", "💲"), ("💳", "💳"), ("💴", "💴"), ("💵", "💵"), ("💸", "💸"), ("💹", "💹"), ("💺", "💺"), ("💻", "💻"), ("💼", "💼"), ("💽", "💽"), ("💾", "💾"), ("💿", "💿"), ("📀", "📀"), ("📁", "📁"), ("📂", "📂"), ("📃", "📃"), ("📄", "📄"), ("📅", "📅"), ("📆", "📆"), ("📇", "📇"), ("📈", "📈"), ("📉", "📉"), ("📊", "📊"), ("📋", "📋"), ("📌", "📌"), ("📍", "📍"), ("📎", "📎"), ("📏", "📏"), ("📐", "📐"), ("📑", "📑"), ("📒", "📒"), ("📓", "📓"), ("📔", "📔"), ("📕", "📕"), ("📖", "📖"), ("📗", "📗"), ("📘", "📘"), ("📙", "📙"), ("📚", "📚"), ("📛", "📛"), ("📜", "📜"), ("📝", "📝"), ("📞", "📞"), ("📟", "📟"), ("📠", "📠"), ("📡", "📡"), ("📢", "📢"), ("📣", "📣"), ("📤", "📤"), ("📥", "📥"), ("📦", "📦"), ("📧", "📧"), ("📨", "📨"), ("📩", "📩"), ("📪", "📪"), ("📫", "📫"), ("📮", "📮"), ("📰", "📰"), ("📱", "📱"), ("📲", "📲"), ("📳", "📳"), ("📴", "📴"), ("📶", "📶"), ("📷", "📷"), ("📹", "📹"), ("📺", "📺"), ("📻", "📻"), ("📼", "📼"), ("🔃", "🔃"), ("🔊", "🔊"), ("🔋", "🔋"), ("🔌", "🔌"), ("🔍", "🔍"), ("🔎", "🔎"), ("🔏", "🔏"), ("🔐", "🔐"), ("🔑", "🔑"), ("🔒", "🔒"), ("🔓", "🔓"), ("🔔", "🔔"), ("🔖", "🔖"), ("🔗", "🔗"), ("🔘", "🔘"), ("🔙", "🔙"), ("🔚", "🔚"), ("🔛", "🔛"), ("🔜", "🔜"), ("🔝", "🔝"), ("🔞", "🔞"), ("🔟", "🔟"), ("🔠", "🔠"), ("🔡", "🔡"), ("🔢", "🔢"), ("🔣", "🔣"), ("🔤", "🔤"), ("🔥", "🔥"), ("🔦", "🔦"), ("🔧", "🔧"), ("🔨", "🔨"), ("🔩", "🔩"), ("🔪", "🔪"), ("🔫", "🔫"), ("🔮", "🔮"), ("🔯", "🔯"), ("🔰", "🔰"), ("🔱", "🔱"), ("🔲", "🔲"), ("🔳", "🔳"), ("🔴", "🔴"), ("🔵", "🔵"), ("🔶", "🔶"), ("🔷", "🔷"), ("🔸", "🔸"), ("🔹", "🔹"), ("🔺", "🔺"), ("🔻", "🔻"), ("🔼", "🔼"), ("🔽", "🔽"), ("🕐", "🕐"), ("🕑", "🕑"), ("🕒", "🕒"), ("🕓", "🕓"), ("🕔", "🕔"), ("🕕", "🕕"), ("🕖", "🕖"), ("🕗", "🕗"), ("🕘", "🕘"), ("🕙", "🕙"), ("🕚", "🕚"), ("🕛", "🕛"), ("🗻", "🗻"), ("🗼", "🗼"), ("🗽", "🗽"), ("🗾", "🗾"), ("🗿", "🗿"), ("😁", "😁"), ("😂", "😂"), ("😃", "😃"), ("😄", "😄"), ("😅", "😅"), ("😆", "😆"), ("😉", "😉"), ("😊", "😊"), ("😋", "😋"), ("😌", "😌"), ("😍", "😍"), ("😏", "😏"), ("😒", "😒"), ("😓", "😓"), ("😔", "😔"), ("😖", "😖"), ("😘", "😘"), ("😚", "😚"), ("😜", "😜"), ("😝", "😝"), ("😞", "😞"), ("😠", "😠"), ("😡", "😡"), ("😢", "😢"), ("😣", "😣"), ("😤", "😤"), ("😥", "😥"), ("😨", "😨"), ("😩", "😩"), ("😪", "😪"), ("😫", "😫"), ("😭", "😭"), ("😰", "😰"), ("😱", "😱"), ("😲", "😲"), ("😳", "😳"), ("😵", "😵"), ("😷", "😷"), ("😸", "😸"), ("😹", "😹"), ("😺", "😺"), ("😻", "😻"), ("😼", "😼"), ("😽", "😽"), ("😾", "😾"), ("😿", "😿"), ("🙀", "🙀"), ("🙅", "🙅"), ("🙆", "🙆"), ("🙇", "🙇"), ("🙈", "🙈"), ("🙉", "🙉"), ("🙊", "🙊"), ("🙋", "🙋"), ("🙌", "🙌"), ("🙍", "🙍"), ("🙎", "🙎"), ("🙏", "🙏"), ("🚀", "🚀"), ("🚃", "🚃"), ("🚄", "🚄"), ("🚅", "🚅"), ("🚇", "🚇"), ("🚉", "🚉"), ("🚌", "🚌"), ("🚏", "🚏"), ("🚑", "🚑"), ("🚒", "🚒"), ("🚓", "🚓"), ("🚕", "🚕"), ("🚗", "🚗"), ("🚙", "🚙"), ("🚚", "🚚"), ("🚢", "🚢"), ("🚤", "🚤"), ("🚥", "🚥"), ("🚧", "🚧"), ("🚨", "🚨"), ("🚩", "🚩"), ("🚪", "🚪"), ("🚫", "🚫"), ("🚬", "🚬"), ("🚭", "🚭"), ("🚲", "🚲"), ("🚶", "🚶"), ("🚹", "🚹"), ("🚺", "🚺"), ("🚻", "🚻"), ("🚼", "🚼"), ("🚽", "🚽"), ("🚾", "🚾"), ("🛀", "🛀")] 49 | couples.forEach({ 50 | XCTAssert($0.1.escapeHTML == $0.0, "\($0.1)->\($0.1.escapeHTML)") 51 | XCTAssert($0.0.unescapeHTML == $0.1, "\($0.1)->\($0.1.escapeHTML)") 52 | XCTAssert($0.0.escapeHTML.unescapeHTML == $0.0, "\($0.1)->\($0.1.escapeHTML)") 53 | }) 54 | } 55 | 56 | // MARK: - Test for handling HTML emoji 57 | 58 | func testStringRoundtrippingEscapedHTML() { 59 | let string = "This test &<>©`\"™®๒०᠐٧~~" 60 | XCTAssert(string.escapeHTML.unescapeHTML == string, "Error: \(string)") 61 | } 62 | 63 | // MARK: - Test for handling HTML Special characters 64 | 65 | let testCount = 1000 66 | let stringToBeUnescaped = ""&&apos;<>ŒœŠšŸˆ˜   ‌‍‎‏–—‘’‚“”„†‡‰‹›€hoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahog" 67 | let stringToBeEscaped = "\"&'<>ŒœŠšŸˆ˜   ‌‍‎‏–—‘’‚“”„†‡‰‹›€hoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahog" 68 | 69 | let escape1 = """ 70 | 71 | func testUnescapePerformance() { 72 | self.measure { 73 | for _ in 0...size * chars.count) 110 | 111 | let stringToBeEscaped = String(data: data, encoding: String.Encoding.utf16LittleEndian)! 112 | 113 | XCTAssert(stringToBeEscaped.escapeHTML == string, "HTML escaping failed") 114 | XCTAssert("".escapeHTML == "<this & that>", "HTML escaping failed") 115 | XCTAssert("パン・&ド・カンパーニュ".escapeHTML == "パン・&ド・カンパーニュ", "HTML escaping failed") 116 | XCTAssert("abcا1ب<تdef&".escapeHTML == "abcا1ب<تdef&", "HTML escaping failed") 117 | XCTAssert("".escapeHTML == "", "HTML escaping failed") 118 | } 119 | 120 | func testStringByUnescapingHTML() { 121 | let string = ""&'<> ¡¢£¤¥" 122 | + "¦§¨©ª«¬­®¯°" 123 | + "±²³´µ¶·¸¹" 124 | + "º»¼½¾¿ÀÁ" 125 | + "ÂÃÄÅÆÇÈÉ" 126 | + "ÊËÌÍÎÏÐÑÒ" 127 | + "ÓÔÕÖרÙÚ" 128 | + "ÛÜÝÞßàáâã" 129 | + "äåæçèéêëì" 130 | + "íîïðñòóôõ" 131 | + "ö÷øùúûüýþ" 132 | + "ÿŒœŠšŸƒˆ˜" 133 | + "ΑΒΓΔΕΖΗΘΙ" 134 | + "ΚΛΜΝΞΟΠΡΣΤ" 135 | + "ΥΦΧΨΩαβγδ" 136 | + "εζηθικλμνξ" 137 | + "οπρςστυφχψ" 138 | + "ωϑϒϖ   ‌‍" 139 | + "‎‏–—‘’‚“”" 140 | + "„†‡•…‰′″" 141 | + "‹›‾⁄€℘ℑℜ™" 142 | + "ℵ←↑→↓↔↵⇐⇑⇒" 143 | + "⇓⇔∀∂∃∅∇∈∉∋" 144 | + "∏∑−∗√∝∞∠∧∨" 145 | + "∩∪∫∴∼≅≈≠≡≤≥" 146 | + "⊂⊃⊄⊆⊇⊕⊗⊥⋅⌈" 147 | + "⌉⌊⌋⟨⟩◊♠♣♥" 148 | + "♦" 149 | 150 | let chars: [unichar] = [ 151 | 34, 38, 39, 60, 62, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 152 | 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 153 | 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 154 | 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 155 | 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 156 | 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 157 | 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 338, 339, 352, 353, 376, 158 | 402, 710, 732, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 159 | 925, 926, 927, 928, 929, 931, 932, 933, 934, 935, 936, 937, 945, 946, 947, 160 | 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 161 | 963, 964, 965, 966, 967, 968, 969, 977, 978, 982, 8194, 8195, 8201, 8204, 162 | 8205, 8206, 8207, 8211, 8212, 8216, 8217, 8218, 8220, 8221, 8222, 8224, 8225, 163 | 8226, 8230, 8240, 8242, 8243, 8249, 8250, 8254, 8260, 8364, 8472, 8465, 8476, 164 | 8482, 8501, 8592, 8593, 8594, 8595, 8596, 8629, 8656, 8657, 8658, 8659, 8660, 165 | 8704, 8706, 8707, 8709, 8711, 8712, 8713, 8715, 8719, 8721, 8722, 8727, 8730, 166 | 8733, 8734, 8736, 8743, 8744, 8745, 8746, 8747, 8756, 8764, 8773, 8776, 8800, 167 | 8801, 8804, 8805, 8834, 8835, 8836, 8838, 8839, 8853, 8855, 8869, 8901, 8968, 168 | 8969, 8970, 8971, 9001, 9002, 9674, 9824, 9827, 9829, 9830 169 | ] 170 | 171 | let s = string.unescapeHTML 172 | 173 | for i in 0...allocate(capacity: 1) 175 | defer { buffer.deallocate() } 176 | buffer.pointee = chars[i] 177 | guard let testString = String(bytesNoCopy: buffer, length: MemoryLayout.size, encoding: String.Encoding.utf16LittleEndian, freeWhenDone: false) else { XCTFail(); return } 178 | let r = NSRange(location: i, length: 1) 179 | XCTAssert(testString == (s as NSString).substring(with: r), "\(chars[i])=>\((s as NSString).substring(with: r).unescapeHTML)") 180 | } 181 | 182 | XCTAssert("ABC".unescapeHTML == "ABC", "HTML unescaping failed") 183 | XCTAssert("".unescapeHTML == "", "HTML unescaping failed") 184 | XCTAssert("A&Bang;C".unescapeHTML == "A&Bang;C", "HTML unescaping failed") 185 | XCTAssert("A&Bang;C".unescapeHTML == "A&Bang;C", "HTML unescaping failed") 186 | XCTAssert("A&Bang;C".unescapeHTML == "A&Bang;C", "HTML unescaping failed") 187 | XCTAssert("AA;".unescapeHTML == "AA;", "HTML unescaping failed") 188 | XCTAssert("&".unescapeHTML == "&", "HTML unescaping failed") 189 | XCTAssert("&;".unescapeHTML == "&;", "HTML unescaping failed") 190 | XCTAssert("&x;".unescapeHTML == "&x;", "HTML unescaping failed") 191 | XCTAssert("&X;".unescapeHTML == "&X;", "HTML unescaping failed") 192 | XCTAssert(";".unescapeHTML == ";", "HTML unescaping failed") 193 | XCTAssert("<this & that>".unescapeHTML == "", "HTML unescaping failed") 194 | } 195 | 196 | // MARK: - Test for the internal binary search function. 197 | 198 | func testBsearch() { 199 | let count = 1000 200 | let candidates1 = Set((0.. Int in Int(arc4random() % 10000)})) 201 | let candidates2 = Set((0.. Int in Int(arc4random() % 10000)})) 202 | 203 | let queries1 = candidates1.intersection(candidates2) 204 | let queries2 = candidates2.subtracting(queries1) 205 | 206 | func comp(v1: Int, v2: Int) -> Int { 207 | if v1 > v2 { 208 | return 1 209 | } else if v1 < v2 { 210 | return -1 211 | } else { 212 | return 0 213 | } 214 | } 215 | 216 | let array = Array(candidates1).sorted() 217 | 218 | queries1.forEach({ 219 | if let result = bsearch(with: $0, from: array, comparator: comp) { 220 | XCTAssert(result.1 < Int(log2(Double(count)) + 2), "Count of searching is wrong.") 221 | } else { 222 | XCTFail("Search is failed.") 223 | } 224 | }) 225 | 226 | queries2.forEach({ 227 | if bsearch(with: $0, from: array, comparator: comp) != nil { 228 | XCTFail("Search is failed.") 229 | } else { 230 | } 231 | }) 232 | } 233 | } 234 | -------------------------------------------------------------------------------- /Tests/HTMLSpecialCharactersTests/XCTestManifests.swift: -------------------------------------------------------------------------------- 1 | import XCTest 2 | 3 | #if !canImport(ObjectiveC) 4 | public func allTests() -> [XCTestCaseEntry] { 5 | return [ 6 | testCase(HTMLSpecialCharactersTests.allTests), 7 | ] 8 | } 9 | #endif 10 | -------------------------------------------------------------------------------- /Tests/LinuxMain.swift: -------------------------------------------------------------------------------- 1 | import XCTest 2 | 3 | import HTMLSpecialCharactersTests 4 | 5 | var tests = [XCTestCaseEntry]() 6 | tests += HTMLSpecialCharactersTests.allTests() 7 | XCTMain(tests) 8 | --------------------------------------------------------------------------------