├── .gitignore
├── .swift-version
├── .swiftlint.yml
├── LICENSE
├── Package.swift
├── README.md
├── Sources
└── HTMLSpecialCharacters
│ └── HTMLSpecialCharacters.swift
└── Tests
├── HTMLSpecialCharactersTests
├── HTMLSpecialCharactersTests.swift
└── XCTestManifests.swift
└── LinuxMain.swift
/.gitignore:
--------------------------------------------------------------------------------
1 | # Xcode
2 | #
3 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore
4 |
5 | ## Build generated
6 | build/
7 | DerivedData/
8 |
9 | ## Various settings
10 | *.pbxuser
11 | !default.pbxuser
12 | *.mode1v3
13 | !default.mode1v3
14 | *.mode2v3
15 | !default.mode2v3
16 | *.perspectivev3
17 | !default.perspectivev3
18 | xcuserdata/
19 |
20 | ## Other
21 | *.moved-aside
22 | *.xcuserstate
23 |
24 | ## Obj-C/Swift specific
25 | *.hmap
26 | *.ipa
27 | *.dSYM.zip
28 | *.dSYM
29 |
30 | ## Playgrounds
31 | timeline.xctimeline
32 | playground.xcworkspace
33 |
34 | # Swift Package Manager
35 | #
36 | # Add this line if you want to avoid checking in source code from Swift Package Manager dependencies.
37 | # Packages/
38 | .build/
39 | .swiftpm
40 |
41 | # CocoaPods
42 | #
43 | # We recommend against adding the Pods directory to your .gitignore. However
44 | # you should judge for yourself, the pros and cons are mentioned at:
45 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
46 | #
47 | # Pods/
48 |
49 | # Carthage
50 | #
51 | # Add this line if you want to avoid checking in source code from Carthage dependencies.
52 | # Carthage/Checkouts
53 |
54 | Carthage/Build
55 |
56 | # fastlane
57 | #
58 | # It is recommended to not store the screenshots in the git repo. Instead, use fastlane to re-generate the
59 | # screenshots whenever they are needed.
60 | # For more information about the recommended setup visit:
61 | # https://github.com/fastlane/fastlane/blob/master/fastlane/docs/Gitignore.md
62 |
63 | fastlane/report.xml
64 | fastlane/Preview.html
65 | fastlane/screenshots
66 | fastlane/test_output
67 |
68 | .DS_Store
69 | *~
70 | \#*
71 |
--------------------------------------------------------------------------------
/.swift-version:
--------------------------------------------------------------------------------
1 | 4.0
2 |
--------------------------------------------------------------------------------
/.swiftlint.yml:
--------------------------------------------------------------------------------
1 | disabled_rules:
2 | - trailing_whitespace
3 | - valid_docs
4 | - type_name
5 | - variable_name
6 |
7 | excluded:
8 | - reddift/vendor
9 | - playground
10 | - reddiftTests
11 | - reddiftSample
12 | - reddiftSampleTV
13 |
14 | file_length:
15 | warning: 2000
16 | error: 4000
17 |
18 | line_length:
19 | warning: 12000
20 | error: 20000
21 |
22 | type_body_length:
23 | warning: 1000
24 | error: 2000
25 |
26 | variable_name:
27 | max_length:
28 | warning: 50
29 | error: 40
30 | min_length:
31 | warning: 1
32 | error: 0
33 |
34 | function_parameter_count:
35 | warning: 10
36 | error: 20
37 |
38 | function_body_length:
39 | warning: 200
40 | error: 400
41 |
42 | cyclomatic_complexity:
43 | warning: 100
44 | error: 150
45 |
46 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 sonson
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Package.swift:
--------------------------------------------------------------------------------
1 | // swift-tools-version:5.1
2 | // The swift-tools-version declares the minimum version of Swift required to build this package.
3 |
4 | import PackageDescription
5 |
6 | let package = Package(
7 | name: "HTMLSpecialCharacters",
8 | products: [
9 | // Products define the executables and libraries produced by a package, and make them visible to other packages.
10 | .library(
11 | name: "HTMLSpecialCharacters",
12 | targets: ["HTMLSpecialCharacters"]),
13 | ],
14 | dependencies: [
15 | // Dependencies declare other packages that this package depends on.
16 | // .package(url: /* package url */, from: "1.0.0"),
17 | ],
18 | targets: [
19 | // Targets are the basic building blocks of a package. A target can define a module or a test suite.
20 | // Targets can depend on other targets in this package, and on products in packages which this package depends on.
21 | .target(
22 | name: "HTMLSpecialCharacters",
23 | dependencies: []),
24 | .testTarget(
25 | name: "HTMLSpecialCharactersTests",
26 | dependencies: ["HTMLSpecialCharacters"]),
27 | ]
28 | )
29 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # HTMLSpecialCharacters
2 |
3 | Library to escape/unescape HTML special characters in Swift.
4 | [Google Toolbox for Mac](https://github.com/google/google-toolbox-for-mac) is known as a great library which supports escaping/unescaping HTML special characters.
5 | But it's written in Objective-C.
6 |
7 | # Test code
8 |
9 | HTMLSpecialCharacters passed the alomost same test code of [Google Toolbox for Mac](https://github.com/google/google-toolbox-for-mac/blob/master/Foundation/GTMNSString%2BHTMLTest.m).
10 | Please check it.
11 |
12 | # Performance
13 |
14 | HTMLSpecialCharacters can escape/unescape HTML special chracters much faster than[ Google Toolbox for Mac](https://github.com/google/google-toolbox-for-mac).
15 | The figure below shows the performance of escaping/unescaping the test code. These score were measured on iMac (27-inch Late 2012).
16 | Test code for Google Toolbox for Mac is [here](https://github.com/sonsongithub/GTMHTMLSpecialCharacters).
17 |
18 |
19 |
20 |
21 |
22 | # Acknowledgement
23 |
24 | [@norio_nomura](https://github.com) gave me a lot of codes and comments. This project is based on [his code](https://gist.github.com/norio-nomura/2a79822004e7c89228300cf19595ca99).
25 |
26 | # License
27 |
28 | MIT License. This library includes source codes of [Google Toolbox for Mac](https://github.com/google/google-toolbox-for-mac).
--------------------------------------------------------------------------------
/Sources/HTMLSpecialCharacters/HTMLSpecialCharacters.swift:
--------------------------------------------------------------------------------
1 | //
2 | // HTMLSpecialCharacters.swift
3 | // HTMLSpecialCharacters
4 | //
5 | // Created by sonson on 2017/02/08.
6 | // Copyright © 2017年 sonson. All rights reserved.
7 | //
8 |
9 | import Foundation
10 |
11 | // MARK: - Table for unescaping
12 |
13 | // Structure as LUT(look up table)
14 | private struct HtmlUnescapeMap {
15 | let unescapingCodes: [unichar]
16 | let code: unichar
17 | init(_ u: [unichar], _ c: unichar) {
18 | unescapingCodes = u
19 | code = c
20 | }
21 | }
22 |
23 | /**
24 | Get array of HtmlUnescapeMap according to the length of HTML code.
25 | - parameter length: The length of HTML code to be unescaped.
26 | - returns: Array of HtmlUnescapeMap.
27 | */
28 | private func getUnescapeTable(length: Int) -> [HtmlUnescapeMap]? {
29 | switch length {
30 | case 2:
31 | return unicodeHtmlUnescapeMapNameLength_2
32 | case 3:
33 | return unicodeHtmlUnescapeMapNameLength_3
34 | case 4:
35 | return unicodeHtmlUnescapeMapNameLength_4
36 | case 5:
37 | return unicodeHtmlUnescapeMapNameLength_5
38 | case 6:
39 | return unicodeHtmlUnescapeMapNameLength_6
40 | case 7:
41 | return unicodeHtmlUnescapeMapNameLength_7
42 | case 8:
43 | return unicodeHtmlUnescapeMapNameLength_8
44 | default:
45 | return nil
46 | }
47 | }
48 |
49 | private let unicodeHtmlUnescapeMapNameLength_2: [HtmlUnescapeMap] = [
50 | HtmlUnescapeMap([108, 116], 60), // "lt" => "<"
51 | HtmlUnescapeMap([103, 116], 62), // "gt" => ">"
52 | HtmlUnescapeMap([77, 117], 924), // "Mu" => "Μ"
53 | HtmlUnescapeMap([78, 117], 925), // "Nu" => "Ν"
54 | HtmlUnescapeMap([88, 105], 926), // "Xi" => "Ξ"
55 | HtmlUnescapeMap([80, 105], 928), // "Pi" => "Π"
56 | HtmlUnescapeMap([109, 117], 956), // "mu" => "μ"
57 | HtmlUnescapeMap([110, 117], 957), // "nu" => "ν"
58 | HtmlUnescapeMap([120, 105], 958), // "xi" => "ξ"
59 | HtmlUnescapeMap([112, 105], 960), // "pi" => "π"
60 | HtmlUnescapeMap([110, 105], 8715), // "ni" => "∋"
61 | HtmlUnescapeMap([111, 114], 8744), // "or" => "∨"
62 | HtmlUnescapeMap([110, 101], 8800), // "ne" => "≠"
63 | HtmlUnescapeMap([108, 101], 8804), // "le" => "≤"
64 | HtmlUnescapeMap([103, 101], 8805) // "ge" => "≥"
65 | ]
66 |
67 | private let unicodeHtmlUnescapeMapNameLength_3: [HtmlUnescapeMap] = [
68 | HtmlUnescapeMap([97, 109, 112], 38), // "amp" => "&"
69 | HtmlUnescapeMap([121, 101, 110], 165), // "yen" => "¥"
70 | HtmlUnescapeMap([117, 109, 108], 168), // "uml" => "¨"
71 | HtmlUnescapeMap([110, 111, 116], 172), // "not" => "¬"
72 | HtmlUnescapeMap([115, 104, 121], 173), // "shy" => ""
73 | HtmlUnescapeMap([114, 101, 103], 174), // "reg" => "®"
74 | HtmlUnescapeMap([100, 101, 103], 176), // "deg" => "°"
75 | HtmlUnescapeMap([69, 84, 72], 208), // "ETH" => "Ð"
76 | HtmlUnescapeMap([101, 116, 104], 240), // "eth" => "ð"
77 | HtmlUnescapeMap([69, 116, 97], 919), // "Eta" => "Η"
78 | HtmlUnescapeMap([82, 104, 111], 929), // "Rho" => "Ρ"
79 | HtmlUnescapeMap([84, 97, 117], 932), // "Tau" => "Τ"
80 | HtmlUnescapeMap([80, 104, 105], 934), // "Phi" => "Φ"
81 | HtmlUnescapeMap([67, 104, 105], 935), // "Chi" => "Χ"
82 | HtmlUnescapeMap([80, 115, 105], 936), // "Psi" => "Ψ"
83 | HtmlUnescapeMap([101, 116, 97], 951), // "eta" => "η"
84 | HtmlUnescapeMap([114, 104, 111], 961), // "rho" => "ρ"
85 | HtmlUnescapeMap([116, 97, 117], 964), // "tau" => "τ"
86 | HtmlUnescapeMap([112, 104, 105], 966), // "phi" => "φ"
87 | HtmlUnescapeMap([99, 104, 105], 967), // "chi" => "χ"
88 | HtmlUnescapeMap([112, 115, 105], 968), // "psi" => "ψ"
89 | HtmlUnescapeMap([112, 105, 118], 982), // "piv" => "ϖ"
90 | HtmlUnescapeMap([122, 119, 106], 8205), // "zwj" => ""
91 | HtmlUnescapeMap([108, 114, 109], 8206), // "lrm" => ""
92 | HtmlUnescapeMap([114, 108, 109], 8207), // "rlm" => ""
93 | HtmlUnescapeMap([115, 117, 109], 8721), // "sum" => "∑"
94 | HtmlUnescapeMap([97, 110, 103], 8736), // "ang" => "∠"
95 | HtmlUnescapeMap([97, 110, 100], 8743), // "and" => "∧"
96 | HtmlUnescapeMap([99, 97, 112], 8745), // "cap" => "∩"
97 | HtmlUnescapeMap([99, 117, 112], 8746), // "cup" => "∪"
98 | HtmlUnescapeMap([105, 110, 116], 8747), // "int" => "∫"
99 | HtmlUnescapeMap([115, 105, 109], 8764), // "sim" => "∼"
100 | HtmlUnescapeMap([115, 117, 98], 8834), // "sub" => "⊂"
101 | HtmlUnescapeMap([115, 117, 112], 8835), // "sup" => "⊃"
102 | HtmlUnescapeMap([108, 111, 122], 9674) // "loz" => "◊"
103 | ]
104 |
105 | private let unicodeHtmlUnescapeMapNameLength_4: [HtmlUnescapeMap] = [
106 | HtmlUnescapeMap([113, 117, 111, 116], 34), // "quot" => """
107 | HtmlUnescapeMap([97, 112, 111, 115], 39), // "apos" => "'"
108 | HtmlUnescapeMap([110, 98, 115, 112], 160), // "nbsp" => " "
109 | HtmlUnescapeMap([99, 101, 110, 116], 162), // "cent" => "¢"
110 | HtmlUnescapeMap([115, 101, 99, 116], 167), // "sect" => "§"
111 | HtmlUnescapeMap([99, 111, 112, 121], 169), // "copy" => "©"
112 | HtmlUnescapeMap([111, 114, 100, 102], 170), // "ordf" => "ª"
113 | HtmlUnescapeMap([109, 97, 99, 114], 175), // "macr" => "¯"
114 | HtmlUnescapeMap([115, 117, 112, 50], 178), // "sup2" => "²"
115 | HtmlUnescapeMap([115, 117, 112, 51], 179), // "sup3" => "³"
116 | HtmlUnescapeMap([112, 97, 114, 97], 182), // "para" => "¶"
117 | HtmlUnescapeMap([115, 117, 112, 49], 185), // "sup1" => "¹"
118 | HtmlUnescapeMap([111, 114, 100, 109], 186), // "ordm" => "º"
119 | HtmlUnescapeMap([65, 117, 109, 108], 196), // "Auml" => "Ä"
120 | HtmlUnescapeMap([69, 117, 109, 108], 203), // "Euml" => "Ë"
121 | HtmlUnescapeMap([73, 117, 109, 108], 207), // "Iuml" => "Ï"
122 | HtmlUnescapeMap([79, 117, 109, 108], 214), // "Ouml" => "Ö"
123 | HtmlUnescapeMap([85, 117, 109, 108], 220), // "Uuml" => "Ü"
124 | HtmlUnescapeMap([97, 117, 109, 108], 228), // "auml" => "ä"
125 | HtmlUnescapeMap([101, 117, 109, 108], 235), // "euml" => "ë"
126 | HtmlUnescapeMap([105, 117, 109, 108], 239), // "iuml" => "ï"
127 | HtmlUnescapeMap([111, 117, 109, 108], 246), // "ouml" => "ö"
128 | HtmlUnescapeMap([117, 117, 109, 108], 252), // "uuml" => "ü"
129 | HtmlUnescapeMap([121, 117, 109, 108], 255), // "yuml" => "ÿ"
130 | HtmlUnescapeMap([89, 117, 109, 108], 376), // "Yuml" => "Ÿ"
131 | HtmlUnescapeMap([102, 110, 111, 102], 402), // "fnof" => "ƒ"
132 | HtmlUnescapeMap([99, 105, 114, 99], 710), // "circ" => "ˆ"
133 | HtmlUnescapeMap([66, 101, 116, 97], 914), // "Beta" => "Β"
134 | HtmlUnescapeMap([90, 101, 116, 97], 918), // "Zeta" => "Ζ"
135 | HtmlUnescapeMap([73, 111, 116, 97], 921), // "Iota" => "Ι"
136 | HtmlUnescapeMap([98, 101, 116, 97], 946), // "beta" => "β"
137 | HtmlUnescapeMap([122, 101, 116, 97], 950), // "zeta" => "ζ"
138 | HtmlUnescapeMap([105, 111, 116, 97], 953), // "iota" => "ι"
139 | HtmlUnescapeMap([101, 110, 115, 112], 8194), // "ensp" => " "
140 | HtmlUnescapeMap([101, 109, 115, 112], 8195), // "emsp" => " "
141 | HtmlUnescapeMap([122, 119, 110, 106], 8204), // "zwnj" => ""
142 | HtmlUnescapeMap([98, 117, 108, 108], 8226), // "bull" => "•"
143 | HtmlUnescapeMap([101, 117, 114, 111], 8364), // "euro" => "€"
144 | HtmlUnescapeMap([114, 101, 97, 108], 8476), // "real" => "ℜ"
145 | HtmlUnescapeMap([108, 97, 114, 114], 8592), // "larr" => "←"
146 | HtmlUnescapeMap([117, 97, 114, 114], 8593), // "uarr" => "↑"
147 | HtmlUnescapeMap([114, 97, 114, 114], 8594), // "rarr" => "→"
148 | HtmlUnescapeMap([100, 97, 114, 114], 8595), // "darr" => "↓"
149 | HtmlUnescapeMap([104, 97, 114, 114], 8596), // "harr" => "↔"
150 | HtmlUnescapeMap([108, 65, 114, 114], 8656), // "lArr" => "⇐"
151 | HtmlUnescapeMap([117, 65, 114, 114], 8657), // "uArr" => "⇑"
152 | HtmlUnescapeMap([114, 65, 114, 114], 8658), // "rArr" => "⇒"
153 | HtmlUnescapeMap([100, 65, 114, 114], 8659), // "dArr" => "⇓"
154 | HtmlUnescapeMap([104, 65, 114, 114], 8660), // "hArr" => "⇔"
155 | HtmlUnescapeMap([112, 97, 114, 116], 8706), // "part" => "∂"
156 | HtmlUnescapeMap([105, 115, 105, 110], 8712), // "isin" => "∈"
157 | HtmlUnescapeMap([112, 114, 111, 100], 8719), // "prod" => "∏"
158 | HtmlUnescapeMap([112, 114, 111, 112], 8733), // "prop" => "∝"
159 | HtmlUnescapeMap([99, 111, 110, 103], 8773), // "cong" => "≅"
160 | HtmlUnescapeMap([110, 115, 117, 98], 8836), // "nsub" => "⊄"
161 | HtmlUnescapeMap([115, 117, 98, 101], 8838), // "sube" => "⊆"
162 | HtmlUnescapeMap([115, 117, 112, 101], 8839), // "supe" => "⊇"
163 | HtmlUnescapeMap([112, 101, 114, 112], 8869), // "perp" => "⊥"
164 | HtmlUnescapeMap([115, 100, 111, 116], 8901), // "sdot" => "⋅"
165 | HtmlUnescapeMap([108, 97, 110, 103], 9001), // "lang" => "〈"
166 | HtmlUnescapeMap([114, 97, 110, 103], 9002) // "rang" => "〉"
167 | ]
168 |
169 | private let unicodeHtmlUnescapeMapNameLength_5: [HtmlUnescapeMap] = [
170 | HtmlUnescapeMap([105, 101, 120, 99, 108], 161), // "iexcl" => "¡"
171 | HtmlUnescapeMap([112, 111, 117, 110, 100], 163), // "pound" => "£"
172 | HtmlUnescapeMap([108, 97, 113, 117, 111], 171), // "laquo" => "«"
173 | HtmlUnescapeMap([97, 99, 117, 116, 101], 180), // "acute" => "´"
174 | HtmlUnescapeMap([109, 105, 99, 114, 111], 181), // "micro" => "µ"
175 | HtmlUnescapeMap([99, 101, 100, 105, 108], 184), // "cedil" => "¸"
176 | HtmlUnescapeMap([114, 97, 113, 117, 111], 187), // "raquo" => "»"
177 | HtmlUnescapeMap([65, 99, 105, 114, 99], 194), // "Acirc" => "Â"
178 | HtmlUnescapeMap([65, 114, 105, 110, 103], 197), // "Aring" => "Å"
179 | HtmlUnescapeMap([65, 69, 108, 105, 103], 198), // "AElig" => "Æ"
180 | HtmlUnescapeMap([69, 99, 105, 114, 99], 202), // "Ecirc" => "Ê"
181 | HtmlUnescapeMap([73, 99, 105, 114, 99], 206), // "Icirc" => "Î"
182 | HtmlUnescapeMap([79, 99, 105, 114, 99], 212), // "Ocirc" => "Ô"
183 | HtmlUnescapeMap([116, 105, 109, 101, 115], 215), // "times" => "×"
184 | HtmlUnescapeMap([85, 99, 105, 114, 99], 219), // "Ucirc" => "Û"
185 | HtmlUnescapeMap([84, 72, 79, 82, 78], 222), // "THORN" => "Þ"
186 | HtmlUnescapeMap([115, 122, 108, 105, 103], 223), // "szlig" => "ß"
187 | HtmlUnescapeMap([97, 99, 105, 114, 99], 226), // "acirc" => "â"
188 | HtmlUnescapeMap([97, 114, 105, 110, 103], 229), // "aring" => "å"
189 | HtmlUnescapeMap([97, 101, 108, 105, 103], 230), // "aelig" => "æ"
190 | HtmlUnescapeMap([101, 99, 105, 114, 99], 234), // "ecirc" => "ê"
191 | HtmlUnescapeMap([105, 99, 105, 114, 99], 238), // "icirc" => "î"
192 | HtmlUnescapeMap([111, 99, 105, 114, 99], 244), // "ocirc" => "ô"
193 | HtmlUnescapeMap([117, 99, 105, 114, 99], 251), // "ucirc" => "û"
194 | HtmlUnescapeMap([116, 104, 111, 114, 110], 254), // "thorn" => "þ"
195 | HtmlUnescapeMap([79, 69, 108, 105, 103], 338), // "OElig" => "Œ"
196 | HtmlUnescapeMap([111, 101, 108, 105, 103], 339), // "oelig" => "œ"
197 | HtmlUnescapeMap([116, 105, 108, 100, 101], 732), // "tilde" => "˜"
198 | HtmlUnescapeMap([65, 108, 112, 104, 97], 913), // "Alpha" => "Α"
199 | HtmlUnescapeMap([71, 97, 109, 109, 97], 915), // "Gamma" => "Γ"
200 | HtmlUnescapeMap([68, 101, 108, 116, 97], 916), // "Delta" => "Δ"
201 | HtmlUnescapeMap([84, 104, 101, 116, 97], 920), // "Theta" => "Θ"
202 | HtmlUnescapeMap([75, 97, 112, 112, 97], 922), // "Kappa" => "Κ"
203 | HtmlUnescapeMap([83, 105, 103, 109, 97], 931), // "Sigma" => "Σ"
204 | HtmlUnescapeMap([79, 109, 101, 103, 97], 937), // "Omega" => "Ω"
205 | HtmlUnescapeMap([97, 108, 112, 104, 97], 945), // "alpha" => "α"
206 | HtmlUnescapeMap([103, 97, 109, 109, 97], 947), // "gamma" => "γ"
207 | HtmlUnescapeMap([100, 101, 108, 116, 97], 948), // "delta" => "δ"
208 | HtmlUnescapeMap([116, 104, 101, 116, 97], 952), // "theta" => "θ"
209 | HtmlUnescapeMap([107, 97, 112, 112, 97], 954), // "kappa" => "κ"
210 | HtmlUnescapeMap([115, 105, 103, 109, 97], 963), // "sigma" => "σ"
211 | HtmlUnescapeMap([111, 109, 101, 103, 97], 969), // "omega" => "ω"
212 | HtmlUnescapeMap([117, 112, 115, 105, 104], 978), // "upsih" => "ϒ"
213 | HtmlUnescapeMap([110, 100, 97, 115, 104], 8211), // "ndash" => "–"
214 | HtmlUnescapeMap([109, 100, 97, 115, 104], 8212), // "mdash" => "—"
215 | HtmlUnescapeMap([108, 115, 113, 117, 111], 8216), // "lsquo" => "‘"
216 | HtmlUnescapeMap([114, 115, 113, 117, 111], 8217), // "rsquo" => "’"
217 | HtmlUnescapeMap([115, 98, 113, 117, 111], 8218), // "sbquo" => "‚"
218 | HtmlUnescapeMap([108, 100, 113, 117, 111], 8220), // "ldquo" => "“"
219 | HtmlUnescapeMap([114, 100, 113, 117, 111], 8221), // "rdquo" => "”"
220 | HtmlUnescapeMap([98, 100, 113, 117, 111], 8222), // "bdquo" => "„"
221 | HtmlUnescapeMap([112, 114, 105, 109, 101], 8242), // "prime" => "′"
222 | HtmlUnescapeMap([80, 114, 105, 109, 101], 8243), // "Prime" => "″"
223 | HtmlUnescapeMap([111, 108, 105, 110, 101], 8254), // "oline" => "‾"
224 | HtmlUnescapeMap([102, 114, 97, 115, 108], 8260), // "frasl" => "⁄"
225 | HtmlUnescapeMap([105, 109, 97, 103, 101], 8465), // "image" => "ℑ"
226 | HtmlUnescapeMap([116, 114, 97, 100, 101], 8482), // "trade" => "™"
227 | HtmlUnescapeMap([99, 114, 97, 114, 114], 8629), // "crarr" => "↵"
228 | HtmlUnescapeMap([101, 120, 105, 115, 116], 8707), // "exist" => "∃"
229 | HtmlUnescapeMap([101, 109, 112, 116, 121], 8709), // "empty" => "∅"
230 | HtmlUnescapeMap([110, 97, 98, 108, 97], 8711), // "nabla" => "∇"
231 | HtmlUnescapeMap([110, 111, 116, 105, 110], 8713), // "notin" => "∉"
232 | HtmlUnescapeMap([109, 105, 110, 117, 115], 8722), // "minus" => "−"
233 | HtmlUnescapeMap([114, 97, 100, 105, 99], 8730), // "radic" => "√"
234 | HtmlUnescapeMap([105, 110, 102, 105, 110], 8734), // "infin" => "∞"
235 | HtmlUnescapeMap([97, 115, 121, 109, 112], 8776), // "asymp" => "≈"
236 | HtmlUnescapeMap([101, 113, 117, 105, 118], 8801), // "equiv" => "≡"
237 | HtmlUnescapeMap([111, 112, 108, 117, 115], 8853), // "oplus" => "⊕"
238 | HtmlUnescapeMap([108, 99, 101, 105, 108], 8968), // "lceil" => "⌈"
239 | HtmlUnescapeMap([114, 99, 101, 105, 108], 8969), // "rceil" => "⌉"
240 | HtmlUnescapeMap([99, 108, 117, 98, 115], 9827), // "clubs" => "♣"
241 | HtmlUnescapeMap([100, 105, 97, 109, 115], 9830) // "diams" => "♦"
242 | ]
243 |
244 | private let unicodeHtmlUnescapeMapNameLength_6: [HtmlUnescapeMap] = [
245 | HtmlUnescapeMap([99, 117, 114, 114, 101, 110], 164), // "curren" => "¤"
246 | HtmlUnescapeMap([98, 114, 118, 98, 97, 114], 166), // "brvbar" => "¦"
247 | HtmlUnescapeMap([112, 108, 117, 115, 109, 110], 177), // "plusmn" => "±"
248 | HtmlUnescapeMap([109, 105, 100, 100, 111, 116], 183), // "middot" => "·"
249 | HtmlUnescapeMap([102, 114, 97, 99, 49, 52], 188), // "frac14" => "¼"
250 | HtmlUnescapeMap([102, 114, 97, 99, 49, 50], 189), // "frac12" => "½"
251 | HtmlUnescapeMap([102, 114, 97, 99, 51, 52], 190), // "frac34" => "¾"
252 | HtmlUnescapeMap([105, 113, 117, 101, 115, 116], 191), // "iquest" => "¿"
253 | HtmlUnescapeMap([65, 103, 114, 97, 118, 101], 192), // "Agrave" => "À"
254 | HtmlUnescapeMap([65, 97, 99, 117, 116, 101], 193), // "Aacute" => "Á"
255 | HtmlUnescapeMap([65, 116, 105, 108, 100, 101], 195), // "Atilde" => "Ã"
256 | HtmlUnescapeMap([67, 99, 101, 100, 105, 108], 199), // "Ccedil" => "Ç"
257 | HtmlUnescapeMap([69, 103, 114, 97, 118, 101], 200), // "Egrave" => "È"
258 | HtmlUnescapeMap([69, 97, 99, 117, 116, 101], 201), // "Eacute" => "É"
259 | HtmlUnescapeMap([73, 103, 114, 97, 118, 101], 204), // "Igrave" => "Ì"
260 | HtmlUnescapeMap([73, 97, 99, 117, 116, 101], 205), // "Iacute" => "Í"
261 | HtmlUnescapeMap([78, 116, 105, 108, 100, 101], 209), // "Ntilde" => "Ñ"
262 | HtmlUnescapeMap([79, 103, 114, 97, 118, 101], 210), // "Ograve" => "Ò"
263 | HtmlUnescapeMap([79, 97, 99, 117, 116, 101], 211), // "Oacute" => "Ó"
264 | HtmlUnescapeMap([79, 116, 105, 108, 100, 101], 213), // "Otilde" => "Õ"
265 | HtmlUnescapeMap([79, 115, 108, 97, 115, 104], 216), // "Oslash" => "Ø"
266 | HtmlUnescapeMap([85, 103, 114, 97, 118, 101], 217), // "Ugrave" => "Ù"
267 | HtmlUnescapeMap([85, 97, 99, 117, 116, 101], 218), // "Uacute" => "Ú"
268 | HtmlUnescapeMap([89, 97, 99, 117, 116, 101], 221), // "Yacute" => "Ý"
269 | HtmlUnescapeMap([97, 103, 114, 97, 118, 101], 224), // "agrave" => "à"
270 | HtmlUnescapeMap([97, 97, 99, 117, 116, 101], 225), // "aacute" => "á"
271 | HtmlUnescapeMap([97, 116, 105, 108, 100, 101], 227), // "atilde" => "ã"
272 | HtmlUnescapeMap([99, 99, 101, 100, 105, 108], 231), // "ccedil" => "ç"
273 | HtmlUnescapeMap([101, 103, 114, 97, 118, 101], 232), // "egrave" => "è"
274 | HtmlUnescapeMap([101, 97, 99, 117, 116, 101], 233), // "eacute" => "é"
275 | HtmlUnescapeMap([105, 103, 114, 97, 118, 101], 236), // "igrave" => "ì"
276 | HtmlUnescapeMap([105, 97, 99, 117, 116, 101], 237), // "iacute" => "í"
277 | HtmlUnescapeMap([110, 116, 105, 108, 100, 101], 241), // "ntilde" => "ñ"
278 | HtmlUnescapeMap([111, 103, 114, 97, 118, 101], 242), // "ograve" => "ò"
279 | HtmlUnescapeMap([111, 97, 99, 117, 116, 101], 243), // "oacute" => "ó"
280 | HtmlUnescapeMap([111, 116, 105, 108, 100, 101], 245), // "otilde" => "õ"
281 | HtmlUnescapeMap([100, 105, 118, 105, 100, 101], 247), // "divide" => "÷"
282 | HtmlUnescapeMap([111, 115, 108, 97, 115, 104], 248), // "oslash" => "ø"
283 | HtmlUnescapeMap([117, 103, 114, 97, 118, 101], 249), // "ugrave" => "ù"
284 | HtmlUnescapeMap([117, 97, 99, 117, 116, 101], 250), // "uacute" => "ú"
285 | HtmlUnescapeMap([121, 97, 99, 117, 116, 101], 253), // "yacute" => "ý"
286 | HtmlUnescapeMap([83, 99, 97, 114, 111, 110], 352), // "Scaron" => "Š"
287 | HtmlUnescapeMap([115, 99, 97, 114, 111, 110], 353), // "scaron" => "š"
288 | HtmlUnescapeMap([76, 97, 109, 98, 100, 97], 923), // "Lambda" => "Λ"
289 | HtmlUnescapeMap([108, 97, 109, 98, 100, 97], 955), // "lambda" => "λ"
290 | HtmlUnescapeMap([115, 105, 103, 109, 97, 102], 962), // "sigmaf" => "ς"
291 | HtmlUnescapeMap([116, 104, 105, 110, 115, 112], 8201), // "thinsp" => " "
292 | HtmlUnescapeMap([100, 97, 103, 103, 101, 114], 8224), // "dagger" => "†"
293 | HtmlUnescapeMap([68, 97, 103, 103, 101, 114], 8225), // "Dagger" => "‡"
294 | HtmlUnescapeMap([104, 101, 108, 108, 105, 112], 8230), // "hellip" => "…"
295 | HtmlUnescapeMap([112, 101, 114, 109, 105, 108], 8240), // "permil" => "‰"
296 | HtmlUnescapeMap([108, 115, 97, 113, 117, 111], 8249), // "lsaquo" => "‹"
297 | HtmlUnescapeMap([114, 115, 97, 113, 117, 111], 8250), // "rsaquo" => "›"
298 | HtmlUnescapeMap([119, 101, 105, 101, 114, 112], 8472), // "weierp" => "℘"
299 | HtmlUnescapeMap([102, 111, 114, 97, 108, 108], 8704), // "forall" => "∀"
300 | HtmlUnescapeMap([108, 111, 119, 97, 115, 116], 8727), // "lowast" => "∗"
301 | HtmlUnescapeMap([116, 104, 101, 114, 101, 52], 8756), // "there4" => "∴"
302 | HtmlUnescapeMap([111, 116, 105, 109, 101, 115], 8855), // "otimes" => "⊗"
303 | HtmlUnescapeMap([108, 102, 108, 111, 111, 114], 8970), // "lfloor" => "⌊"
304 | HtmlUnescapeMap([114, 102, 108, 111, 111, 114], 8971), // "rfloor" => "⌋"
305 | HtmlUnescapeMap([115, 112, 97, 100, 101, 115], 9824), // "spades" => "♠"
306 | HtmlUnescapeMap([104, 101, 97, 114, 116, 115], 9829) // "hearts" => "♥"
307 | ]
308 |
309 | private let unicodeHtmlUnescapeMapNameLength_7: [HtmlUnescapeMap] = [
310 | HtmlUnescapeMap([69, 112, 115, 105, 108, 111, 110], 917), // "Epsilon" => "Ε"
311 | HtmlUnescapeMap([79, 109, 105, 99, 114, 111, 110], 927), // "Omicron" => "Ο"
312 | HtmlUnescapeMap([85, 112, 115, 105, 108, 111, 110], 933), // "Upsilon" => "Υ"
313 | HtmlUnescapeMap([101, 112, 115, 105, 108, 111, 110], 949), // "epsilon" => "ε"
314 | HtmlUnescapeMap([111, 109, 105, 99, 114, 111, 110], 959), // "omicron" => "ο"
315 | HtmlUnescapeMap([117, 112, 115, 105, 108, 111, 110], 965), // "upsilon" => "υ"
316 | HtmlUnescapeMap([97, 108, 101, 102, 115, 121, 109], 8501) // "alefsym" => "ℵ"
317 | ]
318 |
319 | private let unicodeHtmlUnescapeMapNameLength_8: [HtmlUnescapeMap] = [
320 | HtmlUnescapeMap([116, 104, 101, 116, 97, 115, 121, 109], 977) // "thetasym" => "ϑ"
321 | ]
322 |
323 | // MARK: - Table for escaping
324 |
325 | // Structure as LUT(look up table)
326 | private struct HtmlEscapeMap {
327 | let unescapingCodes: [unichar]
328 | let code: unichar
329 | let count: Int
330 | init(_ c: unichar, _ u: [unichar]) {
331 | unescapingCodes = u
332 | code = c
333 | count = unescapingCodes.count
334 | }
335 | }
336 |
337 | private let unicodeHtmlEscapeMapForUTF8: [HtmlEscapeMap] = [
338 | HtmlEscapeMap(34, [38, 113, 117, 111, 116, 59]), // => "quot"
339 | HtmlEscapeMap(38, [38, 97, 109, 112, 59]), // => "amp"
340 | HtmlEscapeMap(39, [38, 97, 112, 111, 115, 59]), // => "apos"
341 | HtmlEscapeMap(60, [38, 108, 116, 59]), // => "lt"
342 | HtmlEscapeMap(62, [38, 103, 116, 59]), // => "gt"
343 | HtmlEscapeMap(338, [38, 79, 69, 108, 105, 103, 59]), // => "OElig"
344 | HtmlEscapeMap(339, [38, 111, 101, 108, 105, 103, 59]), // => "oelig"
345 | HtmlEscapeMap(352, [38, 83, 99, 97, 114, 111, 110, 59]), // => "Scaron"
346 | HtmlEscapeMap(353, [38, 115, 99, 97, 114, 111, 110, 59]), // => "scaron"
347 | HtmlEscapeMap(376, [38, 89, 117, 109, 108, 59]), // => "Yuml"
348 | HtmlEscapeMap(710, [38, 99, 105, 114, 99, 59]), // => "circ"
349 | HtmlEscapeMap(732, [38, 116, 105, 108, 100, 101, 59]), // => "tilde"
350 | HtmlEscapeMap(8194, [38, 101, 110, 115, 112, 59]), // => "ensp"
351 | HtmlEscapeMap(8195, [38, 101, 109, 115, 112, 59]), // => "emsp"
352 | HtmlEscapeMap(8201, [38, 116, 104, 105, 110, 115, 112, 59]), // => "thinsp"
353 | HtmlEscapeMap(8204, [38, 122, 119, 110, 106, 59]), // => "zwnj"
354 | HtmlEscapeMap(8205, [38, 122, 119, 106, 59]), // => "zwj"
355 | HtmlEscapeMap(8206, [38, 108, 114, 109, 59]), // => "lrm"
356 | HtmlEscapeMap(8207, [38, 114, 108, 109, 59]), // => "rlm"
357 | HtmlEscapeMap(8211, [38, 110, 100, 97, 115, 104, 59]), // => "ndash"
358 | HtmlEscapeMap(8212, [38, 109, 100, 97, 115, 104, 59]), // => "mdash"
359 | HtmlEscapeMap(8216, [38, 108, 115, 113, 117, 111, 59]), // => "lsquo"
360 | HtmlEscapeMap(8217, [38, 114, 115, 113, 117, 111, 59]), // => "rsquo"
361 | HtmlEscapeMap(8218, [38, 115, 98, 113, 117, 111, 59]), // => "sbquo"
362 | HtmlEscapeMap(8220, [38, 108, 100, 113, 117, 111, 59]), // => "ldquo"
363 | HtmlEscapeMap(8221, [38, 114, 100, 113, 117, 111, 59]), // => "rdquo"
364 | HtmlEscapeMap(8222, [38, 98, 100, 113, 117, 111, 59]), // => "bdquo"
365 | HtmlEscapeMap(8224, [38, 100, 97, 103, 103, 101, 114, 59]), // => "dagger"
366 | HtmlEscapeMap(8225, [38, 68, 97, 103, 103, 101, 114, 59]), // => "Dagger"
367 | HtmlEscapeMap(8240, [38, 112, 101, 114, 109, 105, 108, 59]), // => "permil"
368 | HtmlEscapeMap(8249, [38, 108, 115, 97, 113, 117, 111, 59]), // => "lsaquo"
369 | HtmlEscapeMap(8250, [38, 114, 115, 97, 113, 117, 111, 59]), // => "rsaquo"
370 | HtmlEscapeMap(8364, [38, 101, 117, 114, 111, 59]) // => "euro"
371 | ]
372 |
373 | // MARK: -
374 |
375 | /// Error
376 | private enum HTMLSpecialCharactersError: Error {
377 | case invalidHexSquence
378 | case invalidDecimalSquence
379 | case invalidEscapeSquence
380 | case invalidBufferSequence
381 | case notErrorMatchedUnicode(code: unichar)
382 | }
383 |
384 | /**
385 | Comparator for HtmlEscapeMap structure.
386 | */
387 | private func comp(v1: unichar, v2: HtmlEscapeMap) -> Int {
388 | if v1 > v2.code {
389 | return 1
390 | } else if v1 < v2.code {
391 | return -1
392 | } else {
393 | return 0
394 | }
395 | }
396 |
397 | /**
398 | Binary search.
399 | - parameter key: Query.
400 | - parameter sortedArray: Must be sorted in ascending order.
401 | - parameter comparator: Comparator for each pair.
402 | - returns: Result element among sortedArray and a number of execution of the compator. If no one is matched in the sortedArray, return nil.
403 | */
404 | internal func bsearch(with key: T, from sortedArray: [U], comparator: (T, U) -> Int) -> (U, Int)? {
405 | var searchCount = 0
406 | var startIndex = sortedArray.startIndex
407 | var endIndex = sortedArray.endIndex
408 | var range = startIndex.. 1
425 | return nil
426 | }
427 |
428 | // MARK: - Unicode
429 |
430 | /**
431 | Decode, convert unicode scalar value to UTF16 code.
432 | - parameter unicode: Unicode scalar value to be decoded.
433 | - returns: Array of `unichar`, which contains UTF16 code.
434 | */
435 | private func convertToSurrogatePair(unicodeScalar: UInt) -> [unichar] {
436 | // This convert algorithm is based on https://en.wikipedia.org/wiki/UTF-16
437 | let w: UInt = (unicodeScalar & 0b00000000000111110000000000000000) >> 16 - 1
438 | let x1: UInt = (unicodeScalar & 0b00000000000000001111110000000000) >> 10
439 | let x2: UInt = (unicodeScalar & 0b00000000000000000000001111111111) >> 0
440 | let u1_UInt: UInt = UInt(0b11011000 << 8) + UInt(w << 6) + x1
441 | let u1: UInt16 = UInt16(u1_UInt)
442 | let u2: UInt16 = UInt16(UInt(0b11011100 << 8) + x2)
443 | return [u1, u2]
444 | }
445 |
446 | /**
447 | Encode, convert UTF16 code to unicode scalar value.
448 | - parameter first: First one of a surrogate pair.
449 | - parameter second: Second one of a surrogate pair.
450 | - returns: Unicode scalar value.
451 | */
452 | private func convertToUnicodeScalar(firstOfSurrogatePair u1: unichar, second u2: unichar) -> UInt? {
453 | // This convert algorithm is based on https://en.wikipedia.org/wiki/UTF-16
454 | guard u1 > (0b11011000 << 8) else { return nil }
455 | guard u1 < (0b11011100 << 8) else { return nil }
456 | guard u2 > (0b11011100 << 8) else { return nil }
457 | guard u2 < (0b11100000 << 8) else { return nil }
458 |
459 | let u = (u1 & 0b0000001111000000) >> 6 + 1
460 | let x1 = (u1 & 0b0000000000111111)
461 | let x2 = (u2 & 0b0000001111111111)
462 | return (UInt(u) << 16) + (UInt(x1) << 10) + UInt(x2)
463 | }
464 |
465 | /**
466 | Convert a surrogate pair to HTML escaping string which is composed of Unicode scalar value.
467 | - parameter first: First one of a surrogate pair.
468 | - parameter second: Second one of a surrogate pair.
469 | - returns: String, HTML escaping string which is composed of Unicode scalar value.
470 | */
471 | private func convertToUnicodeScalarString(firstOfSurrogatePair u1: unichar, second u2: unichar) -> [unichar]? {
472 | guard let unicodeScalar = convertToUnicodeScalar(firstOfSurrogatePair: u1, second: u2) else { return nil }
473 |
474 | let hexArray = (0...3).reversed().map({ (unicodeScalar >> ($0 * 8)) & 255 })
475 |
476 | let ampersand = unichar(UInt8(ascii: "&"))
477 | let semicolon = unichar(UInt8(ascii: ";"))
478 | let sharp = unichar(UInt8(ascii: "#"))
479 | let x = unichar(UInt8(ascii: "x"))
480 | let uc: [unichar] = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F"].map({unichar(UInt8(ascii: $0))})
481 | let hexCharacters = hexArray
482 | .map({ [Int($0 / 16), Int($0 % 16)] })
483 | .flatMap({$0})
484 | .reduce([]) { (result, value) -> [Int] in
485 | if result.count != 0 || value != 0 { return result + [value] }
486 | return result
487 | }
488 | .map({uc[$0]})
489 | return [ampersand, sharp, x] + hexCharacters + [semicolon]
490 | }
491 |
492 | /**
493 | Convert a hex UTF code string to the UTF16 code which includes matching UTF-8 characters.
494 | */
495 | private func convertToUTF16Codes(hexCodeStorage utf16Storage: T) throws -> [unichar] where T: ContiguousStorage, T.Iterator.Element == unichar {
496 | let utf16: UInt = try utf16Storage.reduce(0) {
497 | switch $1 {
498 | case 48...57: return UInt($0) << 4 + UInt($1) - 48
499 | case 65...70: return UInt($0) << 4 + UInt($1) - 65 + 10
500 | case 97...102: return UInt($0) << 4 + UInt($1) - 97 + 10
501 | default: throw HTMLSpecialCharactersError.invalidHexSquence
502 | }
503 | }
504 | if utf16 < UInt(unichar.max) {
505 | return [unichar(utf16)]
506 | } else if utf16 < UInt(0x110000) {
507 | return convertToSurrogatePair(unicodeScalar: utf16)
508 | } else {
509 | throw HTMLSpecialCharactersError.invalidDecimalSquence
510 | }
511 | }
512 |
513 | /**
514 | Convert a decimal UTF code string to the UTF16 code which includes matching UTF-8 characters.
515 | */
516 | private func convertToUTF16Codes(decimalCodeStorage utf16Storage: T) throws -> [unichar] where T: ContiguousStorage, T.Iterator.Element == unichar {
517 | let utf16: UInt = try utf16Storage.reduce(0) {
518 | switch $1 {
519 | case 48...57: return UInt($0 * 10) + UInt($1) - 48
520 | default: throw HTMLSpecialCharactersError.invalidDecimalSquence
521 | }
522 | }
523 | if utf16 < UInt(unichar.max) {
524 | return [unichar(utf16)]
525 | } else if utf16 < UInt(0x110000) {
526 | return convertToSurrogatePair(unicodeScalar: utf16)
527 | } else {
528 | throw HTMLSpecialCharactersError.invalidDecimalSquence
529 | }
530 | }
531 |
532 | /**
533 | Convert a standard sequence code string to the UTF16 code which includes matching UTF-8 characters.
534 | */
535 | private func convertToUTF16Codes(standardSequence utf16Storage: T) throws -> unichar where T: ContiguousStorage, T.Iterator.Element == unichar {
536 | return try utf16Storage.withUnsafeBufferPointer {
537 | let length = $0.count
538 | guard let unichars = $0.baseAddress
539 | else { throw HTMLSpecialCharactersError.invalidEscapeSquence }
540 | guard let table = getUnescapeTable(length: $0.count)
541 | else { throw HTMLSpecialCharactersError.invalidEscapeSquence }
542 | guard let entry = table.first(where: {memcmp($0.unescapingCodes, unichars, MemoryLayout.size * length) == 0})
543 | else { throw HTMLSpecialCharactersError.invalidEscapeSquence }
544 | return entry.code
545 | }
546 | }
547 |
548 | // MARK: - Extension
549 |
550 | private protocol ContiguousStorage: Sequence {
551 | func withUnsafeBufferPointer(_ body: (UnsafeBufferPointer) throws -> R) rethrows -> R
552 | }
553 | extension Array: ContiguousStorage {}
554 | extension ArraySlice: ContiguousStorage {}
555 | extension ContiguousArray: ContiguousStorage {}
556 |
557 | // MARK: -
558 |
559 | extension String {
560 |
561 | /**
562 | Returns a new string made from the String by removing all HTML tag.
563 | */
564 | public var removingHTMLTags: String {
565 | let length = utf16.count
566 | var buffer = [unichar](repeating: 0, count: utf16.count)
567 |
568 | NSString(string: self).getCharacters(&buffer)
569 |
570 | guard let destinationBuffer = NSMutableData(capacity: MemoryLayout.size * utf16.count) else { return self }
571 |
572 | // let p = UnsafeMutablePointer(&buffer)
573 |
574 | buffer.withUnsafeBufferPointer { (pointer) -> Void in
575 | let p = pointer.baseAddress!
576 | let leftParenthesis = unichar(UInt8(ascii: "<"))
577 | let rightParenthesis = unichar(UInt8(ascii: ">"))
578 |
579 | var begin = 0
580 | let end = length
581 | while let leftIndex = buffer.suffix(from: begin).firstIndex(of: leftParenthesis) {
582 | guard let rightIndex = buffer[leftIndex...size)
585 | destinationBuffer.append(p + begin, length: MemoryLayout.size * range.count)
586 | begin = rightIndex
587 | }
588 | if length - begin > 0 {
589 | let copyLength = length - begin
590 | destinationBuffer.append(p + begin, length: MemoryLayout.size * copyLength)
591 | }
592 | }
593 |
594 | return String(data: destinationBuffer as Data, encoding: .utf16LittleEndian) ?? self
595 | }
596 |
597 | /**
598 | Returns a new string made from the String by replacing all sequences to be escaped with the matching UTF-8 scalar codes.
599 | */
600 | public var escapeHTML: String {
601 | let length = utf16.count
602 | let buffer = UnsafeMutablePointer.allocate(capacity: utf16.count)
603 | defer { buffer.deallocate() }
604 | NSString(string: self).getCharacters(buffer)
605 | let margin = 0
606 | guard let destinationBuffer = NSMutableData(capacity: MemoryLayout.size * (utf16.count + margin)) else { return self }
607 | var start = 0
608 | for var i in 0...size * copyLength)
613 |
614 | result.0.unescapingCodes.withUnsafeBytes { (pointer) -> Void in
615 | let p = pointer.baseAddress!
616 | destinationBuffer.append(p, length: MemoryLayout.size * result.0.count)
617 | }
618 |
619 | start = i + 1
620 | } else if i < length - 1 {
621 | if let result = convertToUnicodeScalarString(firstOfSurrogatePair: (buffer + i).pointee, second: (buffer + i + 1).pointee) {
622 | // 4byte character, surrogate pair.
623 | let copyLength = i - start
624 | destinationBuffer.append(buffer + start, length: MemoryLayout.size * copyLength)
625 | result.withUnsafeBytes { (pointer) -> Void in
626 | let p = pointer.baseAddress!
627 | destinationBuffer.append(p, length: MemoryLayout.size * result.count)
628 | }
629 | start = i + 2
630 | i += 1
631 | }
632 | }
633 | }
634 | if length - start > 0 {
635 | let copyLength = length - start
636 | destinationBuffer.append(buffer + start, length: MemoryLayout.size * copyLength)
637 | }
638 | return String(data: destinationBuffer as Data, encoding: .utf16LittleEndian) ?? self
639 | }
640 |
641 | /**
642 | Returns a new string made from the String by replacing all HTML unescaped sequences with the matching UTF-8 characters.
643 | Original code written by @norio_nomura
644 | https://gist.github.com/norio-nomura/2a79822004e7c89228300cf19595ca99
645 | */
646 | public var unescapeHTML: String {
647 | var buffer = [unichar](repeating: 0, count: utf16.count)
648 | NSString(string: self).getCharacters(&buffer)
649 |
650 | var end = buffer.endIndex
651 | let ampersand = unichar(UInt8(ascii: "&"))
652 | let semicolon = unichar(UInt8(ascii: ";"))
653 | let sharp = unichar(UInt8(ascii: "#"))
654 | let hexPrefixes = ["X", "x"].map { unichar(UInt8(ascii: $0)) }
655 |
656 | while let begin = buffer.prefix(upTo: end).reversed().firstIndex(of: ampersand).map({ buffer.index(before: $0.base) }) {
657 | defer { end = begin }
658 | // if we don't find a semicolon in the range, we don't have a sequence
659 | guard let semicolonIndex = buffer[begin..(utf16Storage: T) throws where T: ContiguousStorage, T.Iterator.Element == unichar {
694 | self = try utf16Storage.withUnsafeBufferPointer {
695 | guard let p = $0.baseAddress else { throw HTMLSpecialCharactersError.invalidBufferSequence }
696 | return String(utf16CodeUnits: p, count: $0.count)
697 | }
698 | }
699 | }
700 |
--------------------------------------------------------------------------------
/Tests/HTMLSpecialCharactersTests/HTMLSpecialCharactersTests.swift:
--------------------------------------------------------------------------------
1 | import XCTest
2 | @testable import HTMLSpecialCharacters
3 |
4 | final class HTMLSpecialCharactersTests: XCTestCase {
5 |
6 | // MARK: - Test for removing HTML tags
7 |
8 | override func setUp() {
9 | super.setUp()
10 |
11 | do {
12 | if let data = stringToBeUnescaped.data(using: .unicode) {
13 | _ = try NSAttributedString(data: data, options: [NSAttributedString.DocumentReadingOptionKey.documentType: NSAttributedString.DocumentType.html], documentAttributes: nil)
14 | }
15 | } catch {
16 | print(error)
17 | }
18 | }
19 |
20 | func testRemovingHTMLTags() {
21 | let data: [(String, String)] = [
22 | ("aaa baaa
aaaaa", "aaa baaaaaaaa"),
23 | // ("aaa baaa
<", "aaa baaa<"),
24 | // ("", ""),
25 | // ("dhafhdsaihfiufhdsjkhfeifhhfifhiu", "dhafhdsaihfiufhdsjkhfeifhhfifhiu"),
26 | // ("<>af<<<>hdsaihfiufhdsjkhfeifhhfifhiu", "afhdsaihfiufhdsjkhfeifhhfifhiu")
27 | ]
28 |
29 | data.forEach({
30 | let result = $0.0.removingHTMLTags
31 | let message = "source \($0.0)\nresult \(result)\nexpected \($0.1)"
32 | XCTAssert(result == $0.1, message)
33 | })
34 | }
35 |
36 | // MARK: - Test for handling HTML emoji
37 |
38 | func testEmoji() {
39 | let escaped = "😺はかわいい"
40 | let escapedHex = "😺はかわいい"
41 | let unescaped = "😺はかわいい"
42 | XCTAssert(escaped.unescapeHTML == unescaped)
43 | XCTAssert(unescaped.escapeHTML == escapedHex)
44 | XCTAssert(escaped.unescapeHTML.escapeHTML == escapedHex)
45 | }
46 |
47 | func testAllEmoji() {
48 | let couples: [(String, String)] = [("🀄", "🀄"), ("🃏", "🃏"), ("🅰", "🅰"), ("🅱", "🅱"), ("🅾", "🅾"), ("🅿", "🅿"), ("🆎", "🆎"), ("🆑", "🆑"), ("🆒", "🆒"), ("🆓", "🆓"), ("🆔", "🆔"), ("🆕", "🆕"), ("🆖", "🆖"), ("🆗", "🆗"), ("🆘", "🆘"), ("🆙", "🆙"), ("🆚", "🆚"), ("🈁", "🈁"), ("🈂", "🈂"), ("🈚", "🈚"), ("🈯", "🈯"), ("🈲", "🈲"), ("🈳", "🈳"), ("🈴", "🈴"), ("🈵", "🈵"), ("🈶", "🈶"), ("🈷", "🈷"), ("🈸", "🈸"), ("🈹", "🈹"), ("🈺", "🈺"), ("🉐", "🉐"), ("🉑", "🉑"), ("🌀", "🌀"), ("🌁", "🌁"), ("🌂", "🌂"), ("🌃", "🌃"), ("🌄", "🌄"), ("🌅", "🌅"), ("🌆", "🌆"), ("🌇", "🌇"), ("🌈", "🌈"), ("🌉", "🌉"), ("🌊", "🌊"), ("🌋", "🌋"), ("🌌", "🌌"), ("🌏", "🌏"), ("🌑", "🌑"), ("🌓", "🌓"), ("🌔", "🌔"), ("🌕", "🌕"), ("🌙", "🌙"), ("🌛", "🌛"), ("🌟", "🌟"), ("🌠", "🌠"), ("🌰", "🌰"), ("🌱", "🌱"), ("🌴", "🌴"), ("🌵", "🌵"), ("🌷", "🌷"), ("🌸", "🌸"), ("🌹", "🌹"), ("🌺", "🌺"), ("🌻", "🌻"), ("🌼", "🌼"), ("🌽", "🌽"), ("🌾", "🌾"), ("🌿", "🌿"), ("🍀", "🍀"), ("🍁", "🍁"), ("🍂", "🍂"), ("🍃", "🍃"), ("🍄", "🍄"), ("🍅", "🍅"), ("🍆", "🍆"), ("🍇", "🍇"), ("🍈", "🍈"), ("🍉", "🍉"), ("🍊", "🍊"), ("🍌", "🍌"), ("🍍", "🍍"), ("🍎", "🍎"), ("🍏", "🍏"), ("🍑", "🍑"), ("🍒", "🍒"), ("🍓", "🍓"), ("🍔", "🍔"), ("🍕", "🍕"), ("🍖", "🍖"), ("🍗", "🍗"), ("🍘", "🍘"), ("🍙", "🍙"), ("🍚", "🍚"), ("🍛", "🍛"), ("🍜", "🍜"), ("🍝", "🍝"), ("🍞", "🍞"), ("🍟", "🍟"), ("🍠", "🍠"), ("🍡", "🍡"), ("🍢", "🍢"), ("🍣", "🍣"), ("🍤", "🍤"), ("🍥", "🍥"), ("🍦", "🍦"), ("🍧", "🍧"), ("🍨", "🍨"), ("🍩", "🍩"), ("🍪", "🍪"), ("🍫", "🍫"), ("🍬", "🍬"), ("🍭", "🍭"), ("🍮", "🍮"), ("🍯", "🍯"), ("🍰", "🍰"), ("🍱", "🍱"), ("🍲", "🍲"), ("🍳", "🍳"), ("🍴", "🍴"), ("🍵", "🍵"), ("🍶", "🍶"), ("🍷", "🍷"), ("🍸", "🍸"), ("🍹", "🍹"), ("🍺", "🍺"), ("🍻", "🍻"), ("🎀", "🎀"), ("🎁", "🎁"), ("🎂", "🎂"), ("🎃", "🎃"), ("🎄", "🎄"), ("🎅", "🎅"), ("🎆", "🎆"), ("🎇", "🎇"), ("🎈", "🎈"), ("🎉", "🎉"), ("🎊", "🎊"), ("🎋", "🎋"), ("🎌", "🎌"), ("🎍", "🎍"), ("🎎", "🎎"), ("🎏", "🎏"), ("🎐", "🎐"), ("🎑", "🎑"), ("🎒", "🎒"), ("🎓", "🎓"), ("🎠", "🎠"), ("🎡", "🎡"), ("🎢", "🎢"), ("🎣", "🎣"), ("🎤", "🎤"), ("🎥", "🎥"), ("🎦", "🎦"), ("🎧", "🎧"), ("🎨", "🎨"), ("🎩", "🎩"), ("🎪", "🎪"), ("🎫", "🎫"), ("🎬", "🎬"), ("🎭", "🎭"), ("🎮", "🎮"), ("🎯", "🎯"), ("🎰", "🎰"), ("🎱", "🎱"), ("🎲", "🎲"), ("🎳", "🎳"), ("🎴", "🎴"), ("🎵", "🎵"), ("🎶", "🎶"), ("🎷", "🎷"), ("🎸", "🎸"), ("🎹", "🎹"), ("🎺", "🎺"), ("🎻", "🎻"), ("🎼", "🎼"), ("🎽", "🎽"), ("🎾", "🎾"), ("🎿", "🎿"), ("🏀", "🏀"), ("🏁", "🏁"), ("🏂", "🏂"), ("🏃", "🏃"), ("🏄", "🏄"), ("🏆", "🏆"), ("🏈", "🏈"), ("🏊", "🏊"), ("🏠", "🏠"), ("🏡", "🏡"), ("🏢", "🏢"), ("🏣", "🏣"), ("🏥", "🏥"), ("🏦", "🏦"), ("🏧", "🏧"), ("🏨", "🏨"), ("🏩", "🏩"), ("🏪", "🏪"), ("🏫", "🏫"), ("🏬", "🏬"), ("🏭", "🏭"), ("🏮", "🏮"), ("🏯", "🏯"), ("🏰", "🏰"), ("🐌", "🐌"), ("🐍", "🐍"), ("🐎", "🐎"), ("🐑", "🐑"), ("🐒", "🐒"), ("🐔", "🐔"), ("🐗", "🐗"), ("🐘", "🐘"), ("🐙", "🐙"), ("🐚", "🐚"), ("🐛", "🐛"), ("🐜", "🐜"), ("🐝", "🐝"), ("🐞", "🐞"), ("🐟", "🐟"), ("🐠", "🐠"), ("🐡", "🐡"), ("🐢", "🐢"), ("🐣", "🐣"), ("🐤", "🐤"), ("🐥", "🐥"), ("🐦", "🐦"), ("🐧", "🐧"), ("🐨", "🐨"), ("🐩", "🐩"), ("🐫", "🐫"), ("🐬", "🐬"), ("🐭", "🐭"), ("🐮", "🐮"), ("🐯", "🐯"), ("🐰", "🐰"), ("🐱", "🐱"), ("🐲", "🐲"), ("🐳", "🐳"), ("🐴", "🐴"), ("🐵", "🐵"), ("🐶", "🐶"), ("🐷", "🐷"), ("🐸", "🐸"), ("🐹", "🐹"), ("🐺", "🐺"), ("🐻", "🐻"), ("🐼", "🐼"), ("🐽", "🐽"), ("🐾", "🐾"), ("👀", "👀"), ("👂", "👂"), ("👃", "👃"), ("👄", "👄"), ("👅", "👅"), ("👆", "👆"), ("👇", "👇"), ("👈", "👈"), ("👉", "👉"), ("👊", "👊"), ("👋", "👋"), ("👌", "👌"), ("👍", "👍"), ("👎", "👎"), ("👏", "👏"), ("👐", "👐"), ("👑", "👑"), ("👒", "👒"), ("👓", "👓"), ("👔", "👔"), ("👕", "👕"), ("👖", "👖"), ("👗", "👗"), ("👘", "👘"), ("👙", "👙"), ("👚", "👚"), ("👛", "👛"), ("👜", "👜"), ("👝", "👝"), ("👞", "👞"), ("👟", "👟"), ("👠", "👠"), ("👡", "👡"), ("👢", "👢"), ("👣", "👣"), ("👤", "👤"), ("👦", "👦"), ("👧", "👧"), ("👨", "👨"), ("👩", "👩"), ("👪", "👪"), ("👫", "👫"), ("👮", "👮"), ("👯", "👯"), ("👰", "👰"), ("👱", "👱"), ("👲", "👲"), ("👳", "👳"), ("👴", "👴"), ("👵", "👵"), ("👶", "👶"), ("👷", "👷"), ("👸", "👸"), ("👹", "👹"), ("👺", "👺"), ("👻", "👻"), ("👼", "👼"), ("👽", "👽"), ("👾", "👾"), ("👿", "👿"), ("💀", "💀"), ("💁", "💁"), ("💂", "💂"), ("💃", "💃"), ("💄", "💄"), ("💅", "💅"), ("💆", "💆"), ("💇", "💇"), ("💈", "💈"), ("💉", "💉"), ("💊", "💊"), ("💋", "💋"), ("💌", "💌"), ("💍", "💍"), ("💎", "💎"), ("💏", "💏"), ("💐", "💐"), ("💑", "💑"), ("💒", "💒"), ("💓", "💓"), ("💔", "💔"), ("💕", "💕"), ("💖", "💖"), ("💗", "💗"), ("💘", "💘"), ("💙", "💙"), ("💚", "💚"), ("💛", "💛"), ("💜", "💜"), ("💝", "💝"), ("💞", "💞"), ("💟", "💟"), ("💠", "💠"), ("💡", "💡"), ("💢", "💢"), ("💣", "💣"), ("💤", "💤"), ("💥", "💥"), ("💦", "💦"), ("💧", "💧"), ("💨", "💨"), ("💩", "💩"), ("💪", "💪"), ("💫", "💫"), ("💬", "💬"), ("💮", "💮"), ("💯", "💯"), ("💰", "💰"), ("💱", "💱"), ("💲", "💲"), ("💳", "💳"), ("💴", "💴"), ("💵", "💵"), ("💸", "💸"), ("💹", "💹"), ("💺", "💺"), ("💻", "💻"), ("💼", "💼"), ("💽", "💽"), ("💾", "💾"), ("💿", "💿"), ("📀", "📀"), ("📁", "📁"), ("📂", "📂"), ("📃", "📃"), ("📄", "📄"), ("📅", "📅"), ("📆", "📆"), ("📇", "📇"), ("📈", "📈"), ("📉", "📉"), ("📊", "📊"), ("📋", "📋"), ("📌", "📌"), ("📍", "📍"), ("📎", "📎"), ("📏", "📏"), ("📐", "📐"), ("📑", "📑"), ("📒", "📒"), ("📓", "📓"), ("📔", "📔"), ("📕", "📕"), ("📖", "📖"), ("📗", "📗"), ("📘", "📘"), ("📙", "📙"), ("📚", "📚"), ("📛", "📛"), ("📜", "📜"), ("📝", "📝"), ("📞", "📞"), ("📟", "📟"), ("📠", "📠"), ("📡", "📡"), ("📢", "📢"), ("📣", "📣"), ("📤", "📤"), ("📥", "📥"), ("📦", "📦"), ("📧", "📧"), ("📨", "📨"), ("📩", "📩"), ("📪", "📪"), ("📫", "📫"), ("📮", "📮"), ("📰", "📰"), ("📱", "📱"), ("📲", "📲"), ("📳", "📳"), ("📴", "📴"), ("📶", "📶"), ("📷", "📷"), ("📹", "📹"), ("📺", "📺"), ("📻", "📻"), ("📼", "📼"), ("🔃", "🔃"), ("🔊", "🔊"), ("🔋", "🔋"), ("🔌", "🔌"), ("🔍", "🔍"), ("🔎", "🔎"), ("🔏", "🔏"), ("🔐", "🔐"), ("🔑", "🔑"), ("🔒", "🔒"), ("🔓", "🔓"), ("🔔", "🔔"), ("🔖", "🔖"), ("🔗", "🔗"), ("🔘", "🔘"), ("🔙", "🔙"), ("🔚", "🔚"), ("🔛", "🔛"), ("🔜", "🔜"), ("🔝", "🔝"), ("🔞", "🔞"), ("🔟", "🔟"), ("🔠", "🔠"), ("🔡", "🔡"), ("🔢", "🔢"), ("🔣", "🔣"), ("🔤", "🔤"), ("🔥", "🔥"), ("🔦", "🔦"), ("🔧", "🔧"), ("🔨", "🔨"), ("🔩", "🔩"), ("🔪", "🔪"), ("🔫", "🔫"), ("🔮", "🔮"), ("🔯", "🔯"), ("🔰", "🔰"), ("🔱", "🔱"), ("🔲", "🔲"), ("🔳", "🔳"), ("🔴", "🔴"), ("🔵", "🔵"), ("🔶", "🔶"), ("🔷", "🔷"), ("🔸", "🔸"), ("🔹", "🔹"), ("🔺", "🔺"), ("🔻", "🔻"), ("🔼", "🔼"), ("🔽", "🔽"), ("🕐", "🕐"), ("🕑", "🕑"), ("🕒", "🕒"), ("🕓", "🕓"), ("🕔", "🕔"), ("🕕", "🕕"), ("🕖", "🕖"), ("🕗", "🕗"), ("🕘", "🕘"), ("🕙", "🕙"), ("🕚", "🕚"), ("🕛", "🕛"), ("🗻", "🗻"), ("🗼", "🗼"), ("🗽", "🗽"), ("🗾", "🗾"), ("🗿", "🗿"), ("😁", "😁"), ("😂", "😂"), ("😃", "😃"), ("😄", "😄"), ("😅", "😅"), ("😆", "😆"), ("😉", "😉"), ("😊", "😊"), ("😋", "😋"), ("😌", "😌"), ("😍", "😍"), ("😏", "😏"), ("😒", "😒"), ("😓", "😓"), ("😔", "😔"), ("😖", "😖"), ("😘", "😘"), ("😚", "😚"), ("😜", "😜"), ("😝", "😝"), ("😞", "😞"), ("😠", "😠"), ("😡", "😡"), ("😢", "😢"), ("😣", "😣"), ("😤", "😤"), ("😥", "😥"), ("😨", "😨"), ("😩", "😩"), ("😪", "😪"), ("😫", "😫"), ("😭", "😭"), ("😰", "😰"), ("😱", "😱"), ("😲", "😲"), ("😳", "😳"), ("😵", "😵"), ("😷", "😷"), ("😸", "😸"), ("😹", "😹"), ("😺", "😺"), ("😻", "😻"), ("😼", "😼"), ("😽", "😽"), ("😾", "😾"), ("😿", "😿"), ("🙀", "🙀"), ("🙅", "🙅"), ("🙆", "🙆"), ("🙇", "🙇"), ("🙈", "🙈"), ("🙉", "🙉"), ("🙊", "🙊"), ("🙋", "🙋"), ("🙌", "🙌"), ("🙍", "🙍"), ("🙎", "🙎"), ("🙏", "🙏"), ("🚀", "🚀"), ("🚃", "🚃"), ("🚄", "🚄"), ("🚅", "🚅"), ("🚇", "🚇"), ("🚉", "🚉"), ("🚌", "🚌"), ("🚏", "🚏"), ("🚑", "🚑"), ("🚒", "🚒"), ("🚓", "🚓"), ("🚕", "🚕"), ("🚗", "🚗"), ("🚙", "🚙"), ("🚚", "🚚"), ("🚢", "🚢"), ("🚤", "🚤"), ("🚥", "🚥"), ("🚧", "🚧"), ("🚨", "🚨"), ("🚩", "🚩"), ("🚪", "🚪"), ("🚫", "🚫"), ("🚬", "🚬"), ("🚭", "🚭"), ("🚲", "🚲"), ("🚶", "🚶"), ("🚹", "🚹"), ("🚺", "🚺"), ("🚻", "🚻"), ("🚼", "🚼"), ("🚽", "🚽"), ("🚾", "🚾"), ("🛀", "🛀")]
49 | couples.forEach({
50 | XCTAssert($0.1.escapeHTML == $0.0, "\($0.1)->\($0.1.escapeHTML)")
51 | XCTAssert($0.0.unescapeHTML == $0.1, "\($0.1)->\($0.1.escapeHTML)")
52 | XCTAssert($0.0.escapeHTML.unescapeHTML == $0.0, "\($0.1)->\($0.1.escapeHTML)")
53 | })
54 | }
55 |
56 | // MARK: - Test for handling HTML emoji
57 |
58 | func testStringRoundtrippingEscapedHTML() {
59 | let string = "This test &<>©`\"™®๒०᠐٧~~"
60 | XCTAssert(string.escapeHTML.unescapeHTML == string, "Error: \(string)")
61 | }
62 |
63 | // MARK: - Test for handling HTML Special characters
64 |
65 | let testCount = 1000
66 | let stringToBeUnescaped = ""&'<>ŒœŠšŸˆ˜ –—‘’‚“”„†‡‰‹›€hoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahog"
67 | let stringToBeEscaped = "\"&'<>ŒœŠšŸˆ˜ –—‘’‚“”„†‡‰‹›€hoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahoghoge©a©aaaaa©aaaaahog"
68 |
69 | let escape1 = """
70 |
71 | func testUnescapePerformance() {
72 | self.measure {
73 | for _ in 0...size * chars.count)
110 |
111 | let stringToBeEscaped = String(data: data, encoding: String.Encoding.utf16LittleEndian)!
112 |
113 | XCTAssert(stringToBeEscaped.escapeHTML == string, "HTML escaping failed")
114 | XCTAssert("".escapeHTML == "<this & that>", "HTML escaping failed")
115 | XCTAssert("パン・&ド・カンパーニュ".escapeHTML == "パン・&ド・カンパーニュ", "HTML escaping failed")
116 | XCTAssert("abcا1ب<تdef&".escapeHTML == "abcا1ب<تdef&", "HTML escaping failed")
117 | XCTAssert("".escapeHTML == "", "HTML escaping failed")
118 | }
119 |
120 | func testStringByUnescapingHTML() {
121 | let string = ""&'<> ¡¢£¤¥"
122 | + "¦§¨©ª«¬®¯°"
123 | + "±²³´µ¶·¸¹"
124 | + "º»¼½¾¿ÀÁ"
125 | + "ÂÃÄÅÆÇÈÉ"
126 | + "ÊËÌÍÎÏÐÑÒ"
127 | + "ÓÔÕÖרÙÚ"
128 | + "ÛÜÝÞßàáâã"
129 | + "äåæçèéêëì"
130 | + "íîïðñòóôõ"
131 | + "ö÷øùúûüýþ"
132 | + "ÿŒœŠšŸƒˆ˜"
133 | + "ΑΒΓΔΕΖΗΘΙ"
134 | + "ΚΛΜΝΞΟΠΡΣΤ"
135 | + "ΥΦΧΨΩαβγδ"
136 | + "εζηθικλμνξ"
137 | + "οπρςστυφχψ"
138 | + "ωϑϒϖ "
139 | + "–—‘’‚“”"
140 | + "„†‡•…‰′″"
141 | + "‹›‾⁄€℘ℑℜ™"
142 | + "ℵ←↑→↓↔↵⇐⇑⇒"
143 | + "⇓⇔∀∂∃∅∇∈∉∋"
144 | + "∏∑−∗√∝∞∠∧∨"
145 | + "∩∪∫∴∼≅≈≠≡≤≥"
146 | + "⊂⊃⊄⊆⊇⊕⊗⊥⋅⌈"
147 | + "⌉⌊⌋〈〉◊♠♣♥"
148 | + "♦"
149 |
150 | let chars: [unichar] = [
151 | 34, 38, 39, 60, 62, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170,
152 | 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185,
153 | 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200,
154 | 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215,
155 | 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230,
156 | 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245,
157 | 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 338, 339, 352, 353, 376,
158 | 402, 710, 732, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924,
159 | 925, 926, 927, 928, 929, 931, 932, 933, 934, 935, 936, 937, 945, 946, 947,
160 | 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962,
161 | 963, 964, 965, 966, 967, 968, 969, 977, 978, 982, 8194, 8195, 8201, 8204,
162 | 8205, 8206, 8207, 8211, 8212, 8216, 8217, 8218, 8220, 8221, 8222, 8224, 8225,
163 | 8226, 8230, 8240, 8242, 8243, 8249, 8250, 8254, 8260, 8364, 8472, 8465, 8476,
164 | 8482, 8501, 8592, 8593, 8594, 8595, 8596, 8629, 8656, 8657, 8658, 8659, 8660,
165 | 8704, 8706, 8707, 8709, 8711, 8712, 8713, 8715, 8719, 8721, 8722, 8727, 8730,
166 | 8733, 8734, 8736, 8743, 8744, 8745, 8746, 8747, 8756, 8764, 8773, 8776, 8800,
167 | 8801, 8804, 8805, 8834, 8835, 8836, 8838, 8839, 8853, 8855, 8869, 8901, 8968,
168 | 8969, 8970, 8971, 9001, 9002, 9674, 9824, 9827, 9829, 9830
169 | ]
170 |
171 | let s = string.unescapeHTML
172 |
173 | for i in 0...allocate(capacity: 1)
175 | defer { buffer.deallocate() }
176 | buffer.pointee = chars[i]
177 | guard let testString = String(bytesNoCopy: buffer, length: MemoryLayout.size, encoding: String.Encoding.utf16LittleEndian, freeWhenDone: false) else { XCTFail(); return }
178 | let r = NSRange(location: i, length: 1)
179 | XCTAssert(testString == (s as NSString).substring(with: r), "\(chars[i])=>\((s as NSString).substring(with: r).unescapeHTML)")
180 | }
181 |
182 | XCTAssert("ABC".unescapeHTML == "ABC", "HTML unescaping failed")
183 | XCTAssert("".unescapeHTML == "", "HTML unescaping failed")
184 | XCTAssert("A&Bang;C".unescapeHTML == "A&Bang;C", "HTML unescaping failed")
185 | XCTAssert("A&Bang;C".unescapeHTML == "A&Bang;C", "HTML unescaping failed")
186 | XCTAssert("A&Bang;C".unescapeHTML == "A&Bang;C", "HTML unescaping failed")
187 | XCTAssert("AA;".unescapeHTML == "AA;", "HTML unescaping failed")
188 | XCTAssert("&".unescapeHTML == "&", "HTML unescaping failed")
189 | XCTAssert("&;".unescapeHTML == "&;", "HTML unescaping failed")
190 | XCTAssert("&x;".unescapeHTML == "&x;", "HTML unescaping failed")
191 | XCTAssert("&X;".unescapeHTML == "&X;", "HTML unescaping failed")
192 | XCTAssert(";".unescapeHTML == ";", "HTML unescaping failed")
193 | XCTAssert("<this & that>".unescapeHTML == "", "HTML unescaping failed")
194 | }
195 |
196 | // MARK: - Test for the internal binary search function.
197 |
198 | func testBsearch() {
199 | let count = 1000
200 | let candidates1 = Set((0.. Int in Int(arc4random() % 10000)}))
201 | let candidates2 = Set((0.. Int in Int(arc4random() % 10000)}))
202 |
203 | let queries1 = candidates1.intersection(candidates2)
204 | let queries2 = candidates2.subtracting(queries1)
205 |
206 | func comp(v1: Int, v2: Int) -> Int {
207 | if v1 > v2 {
208 | return 1
209 | } else if v1 < v2 {
210 | return -1
211 | } else {
212 | return 0
213 | }
214 | }
215 |
216 | let array = Array(candidates1).sorted()
217 |
218 | queries1.forEach({
219 | if let result = bsearch(with: $0, from: array, comparator: comp) {
220 | XCTAssert(result.1 < Int(log2(Double(count)) + 2), "Count of searching is wrong.")
221 | } else {
222 | XCTFail("Search is failed.")
223 | }
224 | })
225 |
226 | queries2.forEach({
227 | if bsearch(with: $0, from: array, comparator: comp) != nil {
228 | XCTFail("Search is failed.")
229 | } else {
230 | }
231 | })
232 | }
233 | }
234 |
--------------------------------------------------------------------------------
/Tests/HTMLSpecialCharactersTests/XCTestManifests.swift:
--------------------------------------------------------------------------------
1 | import XCTest
2 |
3 | #if !canImport(ObjectiveC)
4 | public func allTests() -> [XCTestCaseEntry] {
5 | return [
6 | testCase(HTMLSpecialCharactersTests.allTests),
7 | ]
8 | }
9 | #endif
10 |
--------------------------------------------------------------------------------
/Tests/LinuxMain.swift:
--------------------------------------------------------------------------------
1 | import XCTest
2 |
3 | import HTMLSpecialCharactersTests
4 |
5 | var tests = [XCTestCaseEntry]()
6 | tests += HTMLSpecialCharactersTests.allTests()
7 | XCTMain(tests)
8 |
--------------------------------------------------------------------------------