├── .gitignore ├── LICENSE.html ├── LICENSE_files ├── css.css ├── css_002.css └── css_003.css ├── README.md ├── index.html ├── index.js ├── locale ├── en_US.json ├── index.css ├── index.html └── index.js ├── package.json ├── slowparse.js ├── src ├── CSSParser.js ├── DOMBuilder.js ├── DocumentFragment.js ├── HTMLParser.js ├── Node.js ├── ParseError.js ├── ParseErrorBuilders.js ├── Stream.js ├── checkMixedContent.js ├── index.js ├── shim │ └── errors.jquery.js └── voidHtmlElements.js ├── style.css ├── test.js └── test ├── node └── qunit-shim.js └── test-slowparse.js /.gitignore: -------------------------------------------------------------------------------- 1 | docs 2 | /node_modules/ 3 | -------------------------------------------------------------------------------- /LICENSE.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Mozilla Public License, version 2.0 7 | 10 | 11 | 12 | 13 | 14 | 15 | 75 | 76 | 77 |

Mozilla Public License
Version 2.0

78 |

1. Definitions

79 |
80 |
1.1. “Contributor”
81 |

means each individual or legal entity that creates, contributes to the creation of, or owns Covered Software.

82 |
83 |
1.2. “Contributor Version”
84 |

means the combination of the Contributions of others (if any) 85 | used by a Contributor and that particular Contributor’s Contribution.

86 |
87 |
1.3. “Contribution”
88 |

means Covered Software of a particular Contributor.

89 |
90 |
1.4. “Covered Software”
91 |

means Source Code Form to which the initial Contributor has 92 | attached the notice in Exhibit A, the Executable Form of such Source 93 | Code Form, and Modifications of such Source Code Form, in each case 94 | including portions thereof.

95 |
96 |
1.5. “Incompatible With Secondary Licenses”
97 |

means

98 |
    99 |
  1. that the initial Contributor has attached the notice described in Exhibit B to the Covered Software; or

  2. 100 |
  3. that the Covered Software was made available under the terms of 101 | version 1.1 or earlier of the License, but not also under the terms of a 102 | Secondary License.

  4. 103 |
104 |
105 |
1.6. “Executable Form”
106 |

means any form of the work other than Source Code Form.

107 |
108 |
1.7. “Larger Work”
109 |

means a work that combines Covered Software with other material, in a separate file or files, that is not Covered Software.

110 |
111 |
1.8. “License”
112 |

means this document.

113 |
114 |
1.9. “Licensable”
115 |

means having the right to grant, to the maximum extent possible, 116 | whether at the time of the initial grant or subsequently, any and all of 117 | the rights conveyed by this License.

118 |
119 |
1.10. “Modifications”
120 |

means any of the following:

121 |
    122 |
  1. any file in Source Code Form that results from an addition to, 123 | deletion from, or modification of the contents of Covered Software; or

  2. 124 |
  3. any new file in Source Code Form that contains any Covered Software.

  4. 125 |
126 |
127 |
1.11. “Patent Claims” of a Contributor
128 |

means any patent claim(s), including without limitation, method, 129 | process, and apparatus claims, in any patent Licensable by such 130 | Contributor that would be infringed, but for the grant of the License, 131 | by the making, using, selling, offering for sale, having made, import, 132 | or transfer of either its Contributions or its Contributor Version.

133 |
134 |
1.12. “Secondary License”
135 |

means either the GNU General Public License, Version 2.0, the GNU 136 | Lesser General Public License, Version 2.1, the GNU Affero General 137 | Public License, Version 3.0, or any later versions of those licenses.

138 |
139 |
1.13. “Source Code Form”
140 |

means the form of the work preferred for making modifications.

141 |
142 |
1.14. “You” (or “Your”)
143 |

means an individual or a legal entity exercising rights under 144 | this License. For legal entities, “You” includes any entity that 145 | controls, is controlled by, or is under common control with You. For 146 | purposes of this definition, “control” means (a) the power, direct or 147 | indirect, to cause the direction or management of such entity, whether 148 | by contract or otherwise, or (b) ownership of more than fifty percent 149 | (50%) of the outstanding shares or beneficial ownership of such entity.

150 |
151 |
152 |

2. License Grants and Conditions

153 |

2.1. Grants

154 |

Each Contributor hereby grants You a world-wide, royalty-free, non-exclusive license:

155 |
    156 |
  1. under intellectual property rights (other than patent or 157 | trademark) Licensable by such Contributor to use, reproduce, make 158 | available, modify, display, perform, distribute, and otherwise exploit 159 | its Contributions, either on an unmodified basis, with Modifications, or 160 | as part of a Larger Work; and

  2. 161 |
  3. under Patent Claims of such Contributor to make, use, sell, offer 162 | for sale, have made, import, and otherwise transfer either its 163 | Contributions or its Contributor Version.

  4. 164 |
165 |

2.2. Effective Date

166 |

The licenses granted in Section 2.1 with respect to any 167 | Contribution become effective for each Contribution on the date the 168 | Contributor first distributes such Contribution.

169 |

2.3. Limitations on Grant Scope

170 |

The licenses granted in this Section 2 are the only rights 171 | granted under this License. No additional rights or licenses will be 172 | implied from the distribution or licensing of Covered Software under 173 | this License. Notwithstanding Section 2.1(b) above, no patent 174 | license is granted by a Contributor:

175 |
    176 |
  1. for any code that a Contributor has removed from Covered Software; or

  2. 177 |
  3. for infringements caused by: (i) Your and any other third party’s 178 | modifications of Covered Software, or (ii) the combination of its 179 | Contributions with other software (except as part of its Contributor 180 | Version); or

  4. 181 |
  5. under Patent Claims infringed by Covered Software in the absence of its Contributions.

  6. 182 |
183 |

This License does not grant any rights in the trademarks, service 184 | marks, or logos of any Contributor (except as may be necessary to comply 185 | with the notice requirements in Section 3.4).

186 |

2.4. Subsequent Licenses

187 |

No Contributor makes additional grants as a result of Your choice to 188 | distribute the Covered Software under a subsequent version of this 189 | License (see Section 10.2) or under the terms of a Secondary 190 | License (if permitted under the terms of Section 3.3).

191 |

2.5. Representation

192 |

Each Contributor represents that the Contributor believes its 193 | Contributions are its original creation(s) or it has sufficient rights 194 | to grant the rights to its Contributions conveyed by this License.

195 |

2.6. Fair Use

196 |

This License is not intended to limit any rights You have under 197 | applicable copyright doctrines of fair use, fair dealing, or other 198 | equivalents.

199 |

2.7. Conditions

200 |

Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in Section 2.1.

201 |

3. Responsibilities

202 |

3.1. Distribution of Source Form

203 |

All distribution of Covered Software in Source Code Form, including 204 | any Modifications that You create or to which You contribute, must be 205 | under the terms of this License. You must inform recipients that the 206 | Source Code Form of the Covered Software is governed by the terms of 207 | this License, and how they can obtain a copy of this License. You may 208 | not attempt to alter or restrict the recipients’ rights in the Source 209 | Code Form.

210 |

3.2. Distribution of Executable Form

211 |

If You distribute Covered Software in Executable Form then:

212 |
    213 |
  1. such Covered Software must also be made available in Source Code 214 | Form, as described in Section 3.1, and You must inform recipients 215 | of the Executable Form how they can obtain a copy of such Source Code 216 | Form by reasonable means in a timely manner, at a charge no more than 217 | the cost of distribution to the recipient; and

  2. 218 |
  3. You may distribute such Executable Form under the terms of this 219 | License, or sublicense it under different terms, provided that the 220 | license for the Executable Form does not attempt to limit or alter the 221 | recipients’ rights in the Source Code Form under this License.

  4. 222 |
223 |

3.3. Distribution of a Larger Work

224 |

You may create and distribute a Larger Work under terms of Your 225 | choice, provided that You also comply with the requirements of this 226 | License for the Covered Software. If the Larger Work is a combination of 227 | Covered Software with a work governed by one or more Secondary 228 | Licenses, and the Covered Software is not Incompatible With Secondary 229 | Licenses, this License permits You to additionally distribute such 230 | Covered Software under the terms of such Secondary License(s), so that 231 | the recipient of the Larger Work may, at their option, further 232 | distribute the Covered Software under the terms of either this License 233 | or such Secondary License(s).

234 |

3.4. Notices

235 |

You may not remove or alter the substance of any license notices 236 | (including copyright notices, patent notices, disclaimers of warranty, 237 | or limitations of liability) contained within the Source Code Form of 238 | the Covered Software, except that You may alter any license notices to 239 | the extent required to remedy known factual inaccuracies.

240 |

3.5. Application of Additional Terms

241 |

You may choose to offer, and to charge a fee for, warranty, support, 242 | indemnity or liability obligations to one or more recipients of Covered 243 | Software. However, You may do so only on Your own behalf, and not on 244 | behalf of any Contributor. You must make it absolutely clear that any 245 | such warranty, support, indemnity, or liability obligation is offered by 246 | You alone, and You hereby agree to indemnify every Contributor for any 247 | liability incurred by such Contributor as a result of warranty, support, 248 | indemnity or liability terms You offer. You may include additional 249 | disclaimers of warranty and limitations of liability specific to any 250 | jurisdiction.

251 |

4. Inability to Comply Due to Statute or Regulation

252 |

If it is impossible for You to comply with any of the terms of this 253 | License with respect to some or all of the Covered Software due to 254 | statute, judicial order, or regulation then You must: (a) comply with 255 | the terms of this License to the maximum extent possible; and (b) 256 | describe the limitations and the code they affect. Such description must 257 | be placed in a text file included with all distributions of the Covered 258 | Software under this License. Except to the extent prohibited by statute 259 | or regulation, such description must be sufficiently detailed for a 260 | recipient of ordinary skill to be able to understand it.

261 |

5. Termination

262 |

5.1. The rights granted under this License will terminate 263 | automatically if You fail to comply with any of its terms. However, if 264 | You become compliant, then the rights granted under this License from a 265 | particular Contributor are reinstated (a) provisionally, unless and 266 | until such Contributor explicitly and finally terminates Your grants, 267 | and (b) on an ongoing basis, if such Contributor fails to notify You of 268 | the non-compliance by some reasonable means prior to 60 days after You 269 | have come back into compliance. Moreover, Your grants from a particular 270 | Contributor are reinstated on an ongoing basis if such Contributor 271 | notifies You of the non-compliance by some reasonable means, this is the 272 | first time You have received notice of non-compliance with this License 273 | from such Contributor, and You become compliant prior to 30 days after 274 | Your receipt of the notice.

275 |

5.2. If You initiate litigation against any entity by asserting a 276 | patent infringement claim (excluding declaratory judgment actions, 277 | counter-claims, and cross-claims) alleging that a Contributor Version 278 | directly or indirectly infringes any patent, then the rights granted to 279 | You by any and all Contributors for the Covered Software under 280 | Section 2.1 of this License shall terminate.

281 |

5.3. In the event of termination under Sections 5.1 or 5.2 282 | above, all end user license agreements (excluding distributors and 283 | resellers) which have been validly granted by You or Your distributors 284 | under this License prior to termination shall survive termination.

285 |

6. Disclaimer of Warranty

286 |

Covered Software is provided under this License on an “as is” 287 | basis, without warranty of any kind, either expressed, implied, or 288 | statutory, including, without limitation, warranties that the Covered 289 | Software is free of defects, merchantable, fit for a particular purpose 290 | or non-infringing. The entire risk as to the quality and performance of 291 | the Covered Software is with You. Should any Covered Software prove 292 | defective in any respect, You (not any Contributor) assume the cost of 293 | any necessary servicing, repair, or correction. This disclaimer of 294 | warranty constitutes an essential part of this License. No use of any 295 | Covered Software is authorized under this License except under this 296 | disclaimer.

297 |

7. Limitation of Liability

298 |

Under no circumstances and under no legal theory, whether tort 299 | (including negligence), contract, or otherwise, shall any Contributor, 300 | or anyone who distributes Covered Software as permitted above, be liable 301 | to You for any direct, indirect, special, incidental, or consequential 302 | damages of any character including, without limitation, damages for lost 303 | profits, loss of goodwill, work stoppage, computer failure or 304 | malfunction, or any and all other commercial damages or losses, even if 305 | such party shall have been informed of the possibility of such damages. 306 | This limitation of liability shall not apply to liability for death or 307 | personal injury resulting from such party’s negligence to the extent 308 | applicable law prohibits such limitation. Some jurisdictions do not 309 | allow the exclusion or limitation of incidental or consequential 310 | damages, so this exclusion and limitation may not apply to You.

311 |

8. Litigation

312 |

Any litigation relating to this License may be brought only in the 313 | courts of a jurisdiction where the defendant maintains its principal 314 | place of business and such litigation shall be governed by laws of that 315 | jurisdiction, without reference to its conflict-of-law provisions. 316 | Nothing in this Section shall prevent a party’s ability to bring 317 | cross-claims or counter-claims.

318 |

9. Miscellaneous

319 |

This License represents the complete agreement concerning the subject 320 | matter hereof. If any provision of this License is held to be 321 | unenforceable, such provision shall be reformed only to the extent 322 | necessary to make it enforceable. Any law or regulation which provides 323 | that the language of a contract shall be construed against the drafter 324 | shall not be used to construe this License against a Contributor.

325 |

10. Versions of the License

326 |

10.1. New Versions

327 |

Mozilla Foundation is the license steward. Except as provided in 328 | Section 10.3, no one other than the license steward has the right 329 | to modify or publish new versions of this License. Each version will be 330 | given a distinguishing version number.

331 |

10.2. Effect of New Versions

332 |

You may distribute the Covered Software under the terms of the 333 | version of the License under which You originally received the Covered 334 | Software, or under the terms of any subsequent version published by the 335 | license steward.

336 |

10.3. Modified Versions

337 |

If you create software not governed by this License, and you want to 338 | create a new license for such software, you may create and use a 339 | modified version of this License if you rename the license and remove 340 | any references to the name of the license steward (except to note that 341 | such modified license differs from this License).

342 |

10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses

343 |

If You choose to distribute Source Code Form that is Incompatible 344 | With Secondary Licenses under the terms of this version of the License, 345 | the notice described in Exhibit B of this License must be attached.

346 |

Exhibit A - Source Code Form License Notice

347 |
348 |

This Source Code Form is subject to the terms of the Mozilla Public 349 | License, v. 2.0. If a copy of the MPL was not distributed with this 350 | file, You can obtain one at http://mozilla.org/MPL/2.0/.

351 |
352 |

If it is not possible or desirable to put the notice in a particular 353 | file, then You may include the notice in a location (such as a LICENSE 354 | file in a relevant directory) where a recipient would be likely to look 355 | for such a notice.

356 |

You may add additional accurate notices of copyright ownership.

357 |

Exhibit B - “Incompatible With Secondary Licenses” Notice

358 |
359 |

This Source Code Form is “Incompatible With Secondary Licenses”, as defined by the Mozilla Public License, v. 2.0.

360 |
361 | 362 | 363 | -------------------------------------------------------------------------------- /LICENSE_files/css.css: -------------------------------------------------------------------------------- 1 | @font-face { 2 | font-family: 'Droid Sans Mono'; 3 | font-style: normal; 4 | font-weight: normal; 5 | src: local('Droid Sans Mono'), local('DroidSansMono'), url('http://themes.googleusercontent.com/static/fonts/droidsansmono/v4/ns-m2xQYezAtqh7ai59hJaH0X__W3S3MJL29bc5CWfs.woff') format('woff'); 6 | } 7 | -------------------------------------------------------------------------------- /LICENSE_files/css_002.css: -------------------------------------------------------------------------------- 1 | @font-face { 2 | font-family: 'Crimson Text'; 3 | font-style: normal; 4 | font-weight: normal; 5 | src: local('Crimson Text'), local('CrimsonText-Roman'), url('http://themes.googleusercontent.com/static/fonts/crimsontext/v3/3IFMwfRa07i-auYR-B-zNaRDOzjiPcYnFooOUGCOsRk.woff') format('woff'); 6 | } 7 | -------------------------------------------------------------------------------- /LICENSE_files/css_003.css: -------------------------------------------------------------------------------- 1 | @font-face { 2 | font-family: 'Lora'; 3 | font-style: normal; 4 | font-weight: normal; 5 | src: local('Lora'), url('http://themes.googleusercontent.com/static/fonts/lora/v5/nAKwuw6_dIh5kwvpj3ShNfesZW2xOQ-xsNqO47m55DA.woff') format('woff'); 6 | } 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Slowparse, a friendlier HTML5 parser 2 | 3 | Slowparse is an experimental JavaScript-based HTML5 parser born out of Mozilla Webmaking initiatives. A live demo of Slowparse can be found over at http://mozilla.github.io/slowparse 4 | 5 | ## Installing Slowparse 6 | 7 | The Slowparse library can be used both in the browser and in environments that support commonjs requirements such as Node.js, by respectively including it as a script resource: 8 | ``` 9 | 10 | ``` 11 | or as module import, by installing it using npm: 12 | ``` 13 | $> npm install slowparse 14 | ``` 15 | After installing, Slowparse can then be required into your code like any other module: 16 | ``` 17 | var Slowparse = require("slowparse"); 18 | ``` 19 | 20 | ## Using Slowparse 21 | 22 | To use Slowparse, call its `.HTML` function: 23 | 24 | ``` 25 | var result = Slowparse.HTML(document, '... html source here ...', options); 26 | ``` 27 | 28 | This function takes a DOM context as first argument, and HTML5 source code as second argument. The `options` object is optional, and if used can contain: 29 | 30 | ### options.errorDetectors 31 | 32 | This is an array of "additional parsers" that will be called as 'detector(html, domBuilder.fragment)` when no errors are found by Slowparse. These can be useful when you have additional constraints on what HTML source is permitted in your own software that cannot or should not be dealt with by Slowparse itself. 33 | 34 | This is mostly a convenience construction, and using it is equivalent to doing an `if (!result.error)` test and running the input through your own, additional parsers if no errors we found. 35 | 36 | ### options.disallowActiveAttributes 37 | 38 | This option can be either `true` or `false`, and when `true` will blank out attributes when it sees any that start with `on` such as `onclick`, `onload`, etc. 39 | 40 | This means the DOM formed during the Slowparse run is a tiny bit more secure, although you will still be responsible for checking for potentially harmful active content (Slowparse is not a security tool, and should not be used as such). 41 | 42 | ### Validating HTML 43 | 44 | Slowparse accepts both full HTML5 documents (starting at `` and ending in ``) as well as well formatted HTML5 fragments. Any input that does not pass HTML5 validation leads to a `result` output with an error property: 45 | ``` 46 | var result = Slowparse.HTML(document, ''); 47 | console.log(result.error); 48 | /* 49 | { 50 | type: 'INVALID_ATTR_NAME', 51 | start: 3, 52 | end: 8, 53 | attribute: { name: { value: "+" }}, 54 | cursor: 3 55 | }; 56 | */ 57 | ``` 58 | 59 | There are a large number of errors that Slowparse can generate in order to indicate not just that a validation error occurred, but also what kind of error it was. The full list of reportable errors can currently be found in the [ParseErrorBuilders.js](./src/ParseErrorBuilders.js) file. 60 | 61 | ### Using validated HTML 62 | 63 | If Slowparse yields a result without an `.error` property, the input HTML is considered valid HTML5 code, and can be injected into whatever context you need it injected into. 64 | ``` 65 | var input = "..."; 66 | 67 | var result = Slowparse.HTML(document, input); 68 | 69 | if (!result.error) { 70 | activeContext.inject(input); 71 | } else { 72 | notifyUserOfError(result.error); 73 | } 74 | ``` 75 | 76 | Note that Slowparse generates an internal DOM for validation that can be tapped into, as `result.document`. If no options object with the `disallowActiveAttributes` is passed during parsing, this DOM should be identical to the one built by simply injecting your source code. If `disallowActiveAttributes:true` is used, this DOM will be the same as the one built by the browser, with the exception of `on...` attributes, which will have been forced empty to prevent certain immediate script actions from kicking in. 77 | 78 | ### Getting friendlier error messages 79 | 80 | By default, Slowparse generates error objects. However, if you prefer human-readable error messages, the `./locale/` directory contains a file `en_US.json` that consists of English (US) localized error snippets. These are bits of HTML5 with templating variables that can be instantiated with the corresponding error object. 81 | 82 | For example, if you are getting a `MISSING_CSS_BLOCK_CLOSER` error, the local file specifies the following human-friendly error: 83 | ``` 84 |

Missing block closer or next property:value; pair following 85 | [[cssValue.value]].

86 | ``` 87 | We can replace `[[cssValue.start]]` with Slowparse's `result.error.cssValue.start` and `[[cssValue.end]]` with `result.error.cssValue.end`, and the same for `cssValue.value`, to generate a functional error. For instance, if there is an error in a CSS block after a property `background:white`, with "white" on the 24th character in the stream, the error might resolve as: 88 | ``` 89 |

Missing block closer or next property:value; pair following 90 | white.

91 | ``` 92 | Note that Slowparse has no built in mechanism for generating these errors, but only provides you with the error objects as a result from parsing, and the locale file for resolving error objects to uninstantiated human readable HTML snippets. 93 | 94 | ## Working on Slowparse 95 | 96 | The slowparse code is split up into modules, located in the `./src` directory, which are aggregated by `./src/index.js` for constructing the slowparse library. This construction is handled by [browserify](https://www.npmjs.com/package/browserify), and runs every time the `npm test` command is run, yielding a rebuilt `slowparse.js`. 97 | 98 | If you wish to help out on Slowparse, we try to keep Slowparse test-driven, so if you have bad code that is being parsed incorrectly, create a new test case in the `./test/test-slowparse.js` file. To see how tests work, simply open that file and have a look at the various tests already in place. Generally all you need to do is copy-paste a test case that's similar to what you're testing, and changing the description, input HTML, and test summary for pass/fail results. 99 | 100 | Passing all tests is the basic prerequisite to a patch for Slowparse landing, so make sure your code comes with tests and all of them pass =) 101 | 102 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Slowparse, a friendlier HTML5 parser 5 | 6 | 7 | 8 |
9 |

Slowparse, a friendlier HTML5 parser.

10 |
11 | 12 |

Slowparse is an experimental HTML5 parser that yields friendlier errors than 13 | most other parsers, using localisable strings. The fields below let you test 14 | Slowparse by writing HTML in the left panel, which will either render in the 15 | right panel if correct, or will show the base error detected in the middle 16 | panel with the error rendered in a friendlier, human-readable form on the right.

17 | 18 |
19 | 28 | 29 |
30 |
31 | 32 | 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | var xhr = new XMLHttpRequest(); 2 | xhr.open("GET", "locale/en_US.json", true); 3 | xhr.onreadystatechange = function() { 4 | if(xhr.readyState !== 4 || (xhr.status !== 0 && xhr.status !== 200)) return; 5 | 6 | var strings = xhr.responseText; 7 | var stringmap = JSON.parse(strings); 8 | 9 | var input = document.querySelector(".text.pane"), 10 | errors = document.querySelector(".error.pane"), 11 | preview = document.querySelector(".preview.pane"); 12 | 13 | var frame = document.createElement("iframe"); 14 | preview.innerHTML = ""; 15 | preview.appendChild(frame); 16 | var fdoc = frame.contentDocument; 17 | 18 | var bePre = function(v) { return v.replace(//g,'>'); }; 19 | var unPre = function(v) { return v.replace(/</g,'<').replace(/>/g,'>'); }; 20 | 21 | var hoverHandler = function(basedata) { 22 | return function(evt) { 23 | var target = evt.target; 24 | var hl = target.getAttribute("data-highlight"); 25 | if(!hl) return; 26 | var values = hl.split(",").map(function(v) { return parseInt(v,10); }); 27 | var start = values[0]; 28 | var end = values[1]; 29 | var pre = fdoc.querySelector("pre"); 30 | var marked = basedata; 31 | var marked = bePre(marked.slice(0,start) + "" + marked.slice(start, end) + "" + marked.slice(end)); 32 | marked = marked.replace("<highlight>", ""); 33 | marked = marked.replace("</highlight>", ""); 34 | pre.innerHTML = marked; 35 | }; 36 | }; 37 | 38 | 39 | var setPreview = function(data, original) { 40 | fdoc.open(); 41 | fdoc.write(data); 42 | fdoc.write(""); 43 | fdoc.close(); 44 | fdoc.addEventListener("mouseover", hoverHandler(original)); 45 | }; 46 | 47 | var resolveError = function(data, error, map) { 48 | var template = map[error.type]; 49 | var errorHTML = template.replace(/\[\[([^\]]+)\]\]/g, function(_, term) { 50 | var terms = term.indexOf(".") > -1 ? term.split(".") : [term]; 51 | var value = error; 52 | while(terms.length > 0) { 53 | value = value[terms.splice(0,1)[0]]; 54 | } 55 | return value; 56 | }); 57 | var suffix = "\n
\n
\n" + bePre(data) + "
"; 58 | errorHTML += suffix; 59 | return errorHTML 60 | }; 61 | 62 | var timeout = false; 63 | 64 | var update = function() { 65 | var data = input.value; 66 | var result = Slowparse.HTML(document, data); 67 | if (result.error) { 68 | errors.textContent = JSON.stringify(result.error, false, 2); 69 | setPreview(resolveError(data, result.error, stringmap), data); 70 | } else { 71 | errors.textContent = ''; 72 | setPreview(data); 73 | } 74 | }; 75 | 76 | input.addEventListener("keyup", function(evt) { 77 | setTimeout(update, 100); 78 | }); 79 | 80 | update(); 81 | } 82 | xhr.send(null); 83 | -------------------------------------------------------------------------------- /locale/en_US.json: -------------------------------------------------------------------------------- 1 | { 2 | "ATTRIBUTE_IN_CLOSING_TAG": "

The closing </[[closeTag.name]]> tag here cannot contain any attributes.

", 3 | "CLOSE_TAG_FOR_VOID_ELEMENT": "

The closing </[[closeTag.name]]> tag here is for a void element (that is, an element that doesn't need to be closed).

", 4 | "CSS_MIXED_ACTIVECONTENT": "

The css property [[cssProperty.property]] has a url() value here that currently points to an insecure resource. You can make this error disappear by logging into webmaker. For more information on how modern browsers signal insecure content, visit this link.

", 5 | "EVENT_HANDLER_ATTR_NOT_ALLOWED": "

Sorry, but security restrictions on this site prevent you from using the JavaScript event handler attribute here. If you really need to use JavaScript, consider using jsbin or jsfiddle.

", 6 | "HTML_CODE_IN_CSS_BLOCK": "

HTML code was detected in CSS context starting here

", 7 | "HTTP_LINK_FROM_HTTPS_PAGE": "

The <[[openTag.name]]> tag's [[attribute.name.value]] attribute (here) currently points to an insecure resource. You can make this error disappear by logging into webmaker. For more information on how modern browsers signal insecure content, visit this link.

", 8 | "INVALID_ATTR_NAME": "

The attribute here has a name that is not permitted under HTML5 naming conventions.

", 9 | "UNSUPPORTED_ATTR_NAMESPACE": "

The attribute here uses an attribute namespace that is not permitted under HTML5 conventions.

", 10 | "MULTIPLE_ATTR_NAMESPACES": "

The attribute here has multiple namespaces. Check your text and make sure there's only a single namespace prefix for the attribute.

", 11 | "INVALID_CSS_DECLARATION": "

This CSS declaration never closes.

", 12 | "INVALID_CSS_PROPERTY_NAME": "

CSS property [[cssProperty.property]] does not exist. You may want to see a list of CSS properties.

", 13 | "INVALID_CSS_RULE": "

This CSS rule is not legal CSS.

", 14 | "INVALID_TAG_NAME": "

The < character here appears to be the beginning of a tag, but is not followed by a valid tag name.

If you just want a < to appear on your Web page, try using &lt; instead.

Or, see a list of HTML5 tags.

", 15 | "JAVASCRIPT_URL_NOT_ALLOWED": "

Sorry, but security restrictions on this site prevent you from using the javascript: URL here. If you really need to use JavaScript, consider using jsbin or jsfiddle.

", 16 | "MISMATCHED_CLOSE_TAG": "

The closing </[[closeTag.name]]> tag here doesn't pair with the opening <[[openTag.name]]> tag here. This is likely due to a missing or misplaced </[[openTag.name]]> tag.", 17 | "MISSING_CSS_BLOCK_CLOSER": "

Missing block closer or next property:value; pair following [[cssValue.value]].

", 18 | "MISSING_CSS_BLOCK_OPENER": "

Missing block opener after [[cssSelector.selector]].

", 19 | "MISSING_CSS_PROPERTY": "

Missing property for [[cssSelector.selector]].

", 20 | "MISSING_CSS_SELECTOR": "

Missing either a new CSS selector or the </style> tag here.

", 21 | "MISSING_CSS_VALUE": "

Missing value for [[cssProperty.property]].

", 22 | "SCRIPT_ELEMENT_NOT_ALLOWED": "

Sorry, but security restrictions on this site prevent you from using <script> tags here. If you really need to use JavaScript, consider using jsbin or jsfiddle.

", 23 | "SELF_CLOSING_NON_VOID_ELEMENT": "

The <[[name]]> tag here can't be self-closed, because <[[name]]> is not a void element; it must be closed with a separate </[[name]]> tag.

", 24 | "UNCAUGHT_CSS_PARSE_ERROR": "

A parse error occurred outside expected cases: [[error.msg]]

", 25 | "UNCLOSED_TAG": "

The <[[openTag.name]]> tag here never closes.

", 26 | "UNEXPECTED_CLOSE_TAG": "

The closing </[[closeTag.name]]> tag here doesn't pair with anything, because there are no opening tags that need to be closed.

", 27 | "UNFINISHED_CSS_PROPERTY": "

Property [[cssProperty.property]] still needs finalizing with :

", 28 | "UNFINISHED_CSS_SELECTOR": "

Selector [[cssSelector.selector]] still needs finalizing with {

", 29 | "UNFINISHED_CSS_VALUE": "

Value [[cssValue.value]] still needs finalizing with ;

", 30 | "UNKOWN_CSS_KEYWORD": "

The CSS @keyword [[cssKeyword.value]] does not match any known @keywords.

", 31 | "UNQUOTED_ATTR_VALUE": "

The Attribute value here should start with an opening double quote.

", 32 | "UNTERMINATED_ATTR_VALUE": "

The <[[openTag.name]]> tag's [[attribute.name.value]] attribute has a value here that doesn't end with a closing double quote.

", 33 | "UNTERMINATED_CLOSE_TAG": "

The closing </[[closeTag.name]]> tag here doesn't end with a >.

", 34 | "UNTERMINATED_COMMENT": "

The comment here doesn't end with a -->.

", 35 | "UNTERMINATED_CSS_COMMENT": "

The CSS comment here doesn't end with a */.

", 36 | "UNBOUND_ATTRIBUTE_VALUE": "

The attribute value [[value]] here appears to be detached from an attribute. You may be missing an '=' sign.

", 37 | "UNTERMINATED_OPEN_TAG": "

The opening <[[openTag.name]]> tag here doesn't end with a >.

" 38 | } 39 | -------------------------------------------------------------------------------- /locale/index.css: -------------------------------------------------------------------------------- 1 | html, body { 2 | font-size: 12pt; 3 | } 4 | 5 | em[data-highlight] { 6 | border-bottom: 1px dotted red; 7 | color: red; 8 | } 9 | 10 | code { 11 | font-family: monospace; 12 | background: rgba(0,0,0,0.4); 13 | color: white; 14 | padding: 1px 3px; 15 | font-size: 0.9em; 16 | } 17 | 18 | #entries div { 19 | background: lightblue; 20 | border: 1px solid black; 21 | padding: 0; 22 | margin: 0; 23 | } 24 | 25 | #entries div + div { 26 | margin-top: 2em; 27 | } 28 | 29 | #entries h1 { 30 | font-family: sans-serif; 31 | font-size: 1.5em; 32 | background:white; 33 | padding: 2px; 34 | margin: 0; 35 | text-indent: 0.2em; 36 | } 37 | 38 | #entries p { 39 | font-family: serif; 40 | font-size: 1em; 41 | text-indent: 1em; 42 | } 43 | 44 | #entries pre { 45 | padding: 1em; 46 | background: rgba(255,255,250, 0.6); 47 | font-family: monospace; 48 | color: darkblue; 49 | font-size: 1em; 50 | white-space: normal; 51 | margin: 0; 52 | } 53 | -------------------------------------------------------------------------------- /locale/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Slowparse error localization 5 | 6 | 7 | 8 |
9 |

Slowparse errors

10 |
11 | 12 |
13 |

The following are default strings that can be localized to match a target locale, normatively expressed in the en-US locale

14 |
15 |
16 | 17 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /locale/index.js: -------------------------------------------------------------------------------- 1 | (function() { 2 | var template = document.createElement("div"); 3 | var script = document.querySelector("script[type='text/html']"); 4 | template.innerHTML = script.textContent; 5 | 6 | function exists(v) { 7 | return v!==null && v!==undefined; 8 | } 9 | 10 | function templatify(input, macros) { 11 | if (!macros) return input.replace(/\[\[[^\]]+\]\]/g, ''); 12 | return input.replace(/\[\[([^\]]+)\]\]/g, function(a,b) { 13 | b = b.split("."); 14 | rep = macros[b[0]]; 15 | b = b.slice(1); 16 | while(b && b.length>0 && rep) { 17 | rep = rep[b.splice(0,1)[0]]; 18 | } 19 | if (exists(rep)) return rep; 20 | return ''; 21 | }); 22 | } 23 | 24 | var genericInterval = { 25 | name: "test", 26 | msg: "this is a test", 27 | value: "test", 28 | selector: "#test .test", 29 | property: "test", 30 | start: 0, 31 | end: 10 32 | }; 33 | 34 | var genericObject = { 35 | name: "test", 36 | value: "test", 37 | start: 0, 38 | end: 10, 39 | openTag: genericInterval, 40 | closeTag: genericInterval, 41 | cssValue: genericInterval, 42 | cssSelector: genericInterval, 43 | cssProperty: genericInterval, 44 | cssDeclaration: genericInterval, 45 | cssKeyword: { start: 0, end: 0, value: "@test" }, 46 | interval: genericInterval, 47 | error: genericInterval 48 | }; 49 | 50 | var xhr = new XMLHttpRequest(); 51 | xhr.open("GET", "en_US.json", true); 52 | xhr.onload = function() { 53 | try { 54 | var obj = JSON.parse(xhr.responseText); 55 | var errors = Object.keys(obj); 56 | var div = document.getElementById("entries"); 57 | errors.forEach(function(error) { 58 | var p = template.cloneNode(true); 59 | p.querySelector("h1").id = error; 60 | p.querySelector("h1").textContent = error; 61 | p.querySelector("p").innerHTML = templatify(obj[error], genericObject); 62 | p.querySelector("pre").textContent = obj[error]; 63 | div.appendChild(p); 64 | }); 65 | } catch (e) { 66 | alert("ERROR: unable to decode locale string as JSON."); 67 | } 68 | } 69 | xhr.onerror = function() { 70 | alert("ERROR: could not load resource 'en_US.json'."); 71 | } 72 | xhr.send(null); 73 | }()); 74 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "slowparse", 3 | "version": "1.3.0", 4 | "description": "Slowparse is a token stream parser for HTML and CSS text, recording regions of interest during the parse run and signaling any errors detected accompanied by relevant regions in the text stream, to make debugging easy.", 5 | "main": "slowparse.js", 6 | "directories": { 7 | "test": "test" 8 | }, 9 | "scripts": { 10 | "build": "browserify ./src/index.js -o slowparse.js --standalone Slowparse", 11 | "test": "node test.js", 12 | "dev": "npm run build && npm run test" 13 | }, 14 | "repository": { 15 | "type": "git", 16 | "url": "https://github.com/mozilla/slowparse.git" 17 | }, 18 | "keywords": [ 19 | "slowparse", 20 | "html", 21 | "css", 22 | "parser" 23 | ], 24 | "author": "Mozilla Foundation", 25 | "license": "MPL 2.0", 26 | "bugs": { 27 | "url": "https://github.com/mozilla/slowparse/issues" 28 | }, 29 | "homepage": "https://github.com/mozilla/slowparse", 30 | "devDependencies": { 31 | "browserify": "^14.4.0", 32 | "jsdom": "11" 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/CSSParser.js: -------------------------------------------------------------------------------- 1 | // ### CSS Parsing 2 | // 3 | // `CSSParser` is our internal CSS token stream parser object. This object 4 | // has references to the stream, as well as the HTML DOM builder that is 5 | // used by the HTML parser. 6 | module.exports = (function(){ 7 | "use strict"; 8 | 9 | var ParseError = require("./ParseError"); 10 | 11 | //Define a property checker for https page 12 | var checkMixedContent = require("./checkMixedContent").mixedContent; 13 | 14 | function CSSParser(stream, domBuilder, warnings) { 15 | this.stream = stream; 16 | // note: we do not actually use the domBuilder during CSS parsing 17 | this.domBuilder = domBuilder; 18 | this.warnings = warnings || []; 19 | } 20 | 21 | CSSParser.prototype = { 22 | // We keep a list of all currently valid CSS properties (CSS1-CSS3). 23 | // This list does not contain vendor prefixes. 24 | cssProperties: [ 25 | "alignment-adjust","alignment-baseline","animation","animation-delay", 26 | "animation-direction","animation-duration","animation-iteration-count", 27 | "animation-name","animation-play-state","animation-timing-function", 28 | "appearance","azimuth","backface-visibility","background", 29 | "background-attachment","background-clip","background-color", 30 | "background-image","background-origin","background-position", 31 | "background-repeat","background-size","baseline-shift","binding", 32 | "bleed","bookmark-label","bookmark-level","bookmark-state", 33 | "bookmark-target","border","border-bottom","border-bottom-color", 34 | "border-bottom-left-radius","border-bottom-right-radius", 35 | "border-bottom-style","border-bottom-width","border-collapse", 36 | "border-color","border-image","border-image-outset", 37 | "border-image-repeat","border-image-slice","border-image-source", 38 | "border-image-width","border-left","border-left-color", 39 | "border-left-style","border-left-width","border-radius","border-right", 40 | "border-right-color","border-right-style","border-right-width", 41 | "border-spacing","border-style","border-top","border-top-color", 42 | "border-top-left-radius","border-top-right-radius","border-top-style", 43 | "border-top-width","border-width","bottom","box-decoration-break", 44 | "box-shadow","box-sizing","break-after","break-before","break-inside", 45 | "caption-side","clear","clip","color","color-profile","column-count", 46 | "column-fill","column-gap","column-rule","column-rule-color", 47 | "column-rule-style","column-rule-width","column-span","column-width", 48 | "columns","content","counter-increment","counter-reset","crop","cue", 49 | "cue-after","cue-before","cursor","direction","display", 50 | "dominant-baseline","drop-initial-after-adjust", 51 | "drop-initial-after-align","drop-initial-before-adjust", 52 | "drop-initial-before-align","drop-initial-size","drop-initial-value", 53 | "elevation","empty-cells","filter","fit","fit-position","flex-align", 54 | "flex-flow","flex-line-pack","flex-order","flex-pack","float","float-offset", 55 | "font","font-family","font-size","font-size-adjust","font-stretch", 56 | "font-style","font-variant","font-weight","grid-columns","grid-rows", 57 | "hanging-punctuation","height","hyphenate-after","hyphenate-before", 58 | "hyphenate-character","hyphenate-lines","hyphenate-resource","hyphens", 59 | "icon","image-orientation","image-rendering","image-resolution", 60 | "inline-box-align","left","letter-spacing","line-break","line-height", 61 | "line-stacking","line-stacking-ruby","line-stacking-shift", 62 | "line-stacking-strategy","list-style","list-style-image", 63 | "list-style-position","list-style-type","margin","margin-bottom", 64 | "margin-left","margin-right","margin-top","marker-offset","marks", 65 | "marquee-direction","marquee-loop","marquee-play-count","marquee-speed", 66 | "marquee-style","max-height","max-width","min-height","min-width", 67 | "move-to","nav-down","nav-index","nav-left","nav-right","nav-up", 68 | "opacity","orphans","outline","outline-color","outline-offset", 69 | "outline-style","outline-width","overflow","overflow-style", 70 | "overflow-wrap","overflow-x","overflow-y","padding","padding-bottom", 71 | "padding-left","padding-right","padding-top","page","page-break-after", 72 | "page-break-before","page-break-inside","page-policy","pause", 73 | "pause-after","pause-before","perspective","perspective-origin", 74 | "phonemes","pitch","pitch-range","play-during","pointer-events", 75 | "position", 76 | "presentation-level","punctuation-trim","quotes","rendering-intent", 77 | "resize","rest","rest-after","rest-before","richness","right", 78 | "rotation","rotation-point","ruby-align","ruby-overhang", 79 | "ruby-position","ruby-span","src","size","speak","speak-header", 80 | "speak-numeral","speak-punctuation","speech-rate","stress","string-set", 81 | "tab-size","table-layout","target","target-name","target-new", 82 | "target-position","text-align","text-align-last","text-decoration", 83 | "text-decoration-color","text-decoration-line","text-decoration-skip", 84 | "text-decoration-style","text-emphasis","text-emphasis-color", 85 | "text-emphasis-position","text-emphasis-style","text-height", 86 | "text-indent","text-justify","text-outline","text-shadow", 87 | "text-space-collapse","text-transform","text-underline-position", 88 | "text-wrap","top","transform","transform-origin","transform-style", 89 | "transition","transition-delay","transition-duration", 90 | "transition-property","transition-timing-function","unicode-bidi", 91 | "vertical-align","visibility","voice-balance","voice-duration", 92 | "voice-family","voice-pitch","voice-pitch-range","voice-rate", 93 | "voice-stress","voice-volume","volume","white-space","widows","width", 94 | "word-break","word-spacing","word-wrap","z-index", 95 | // flexbox: 96 | "align-content", "align-items", "align-self", "flex", "flex-basis", 97 | "flex-direction", "flex-flow", "flex-grow", "flex-shrink", "flex-wrap", 98 | "justify-content"], 99 | 100 | // This helper verifies that a specific string is a known CSS property. 101 | // We include vendor-prefixed known CSS properties, like `-o-transition`. 102 | _knownCSSProperty: function(propertyName) { 103 | propertyName = propertyName.replace(/^-.+?-/,''); 104 | return this.cssProperties.indexOf(propertyName) > -1; 105 | }, 106 | // #### The CSS Master Parse Function 107 | // 108 | // Here we process the token stream, assumed to have its pointer inside a 109 | // CSS element, and will try to parse the content inside it as CSS until 110 | // we hit the end of the CSS element. 111 | // 112 | // Any parse errors along the way will result in a `ParseError` 113 | // being thrown. 114 | parse: function(standalone) { 115 | // We'll use some instance variables to keep track of our parse 116 | // state: 117 | 118 | // * A list of the CSS rulesets for the CSS block. 119 | this.rules = []; 120 | 121 | // * A list of comment blocks inside the CSS. 122 | this.comments = []; 123 | 124 | // Parsing is based on finite states, and a call 125 | // to `_parseBlockType()` will run through any number 126 | // of states until it either throws an error, 127 | // or terminates cleanly. 128 | var sliceStart = this.stream.pos; 129 | this.stream.markTokenStartAfterSpace(); 130 | this._parseBlockType(); 131 | var sliceEnd = this.stream.pos; 132 | 133 | // If we get here, the CSS block has no errors, 134 | // and we report the start/end of the CSS block 135 | // in the stream, as well as the rules/comments 136 | // for the calling `HTMLparser` instance to work with. 137 | var cssBlock = { 138 | value: this.stream.text.slice(sliceStart, sliceEnd), 139 | parseInfo: { 140 | start: sliceStart, 141 | end: sliceEnd, 142 | rules: this.rules, 143 | comments: this.comments 144 | } 145 | }; 146 | 147 | this.rules = null; 148 | this.comments = null; 149 | return cssBlock; 150 | }, 151 | // #### CSS Comment Parsing 152 | // 153 | // Here we record the position of comments in *term* in the instance's 154 | // comment list, and return *term* with all its comments stripped. 155 | stripComments: function(term, startPos) { 156 | var pos, 157 | last = term.length, 158 | commentStart, commentEnd, 159 | prev, next, 160 | stripped = ""; 161 | for (pos=0; pos < last; pos++) { 162 | if (term[pos] === '/' && pos= last-1 && term.substr(pos-1,2) !== "*/") 169 | throw new ParseError("UNTERMINATED_CSS_COMMENT", commentStart); 170 | commentEnd = startPos + pos + 1; 171 | this.comments.push({start: commentStart, end: commentEnd}); 172 | } else { 173 | stripped += term[pos]; 174 | } 175 | } 176 | return stripped.trim(); 177 | }, 178 | // #### CSS Comment Filtering 179 | // 180 | // Here we filter a token so that its start and end positions 181 | // point to the content without leading and trailing comments, 182 | // with comments in the token.value completely removed. 183 | filterComments: function(token) { 184 | var text = token.value, 185 | tsize = text.length, 186 | ntsize, 187 | stripped = this.stripComments(text, token.interval.start); 188 | // strip leading comments 189 | text = text.replace(/^\s+/,""); 190 | text = text.replace(/^\/\*[\w\W]*?\*\/\s*/,''); 191 | ntsize = text.length; 192 | token.interval.start += tsize - ntsize; 193 | // strip trailing comments (=reverse and repeat previous) 194 | tsize = ntsize; 195 | text = text.split('').reverse().join(''); 196 | text = text.replace(/^\s+/,""); 197 | text = text.replace(/^\/\*[\w\W]*?\*\/\s*/,''); 198 | // FIXME: this still fails comments like this: /* ... /* ... */, 199 | // which is a single block. The problems is that in the 200 | // reversed string this looks like /* ... */ ... */ which 201 | // counts as one block plus left-over junk. 202 | ntsize = text.length; 203 | token.interval.end -= tsize - ntsize; 204 | // commit text change 205 | token.value = stripped; 206 | }, 207 | _parseBlockType: function() { 208 | // Depending on our state, we may be coming from having just parsed 209 | // a rule. If that's the case, add it to our list of rules. 210 | if (this.currentRule) { 211 | this.rules.push(this.currentRule); 212 | this.currentRule = null; 213 | } 214 | 215 | // skip over comments, if there is one at this position 216 | this.stream.stripCommentBlock(); 217 | 218 | if (this.stream.peek() === "{") { 219 | throw new ParseError("MISSING_CSS_SELECTOR", this, this.stream.pos-1, this.stream.pos); 220 | } 221 | 222 | // are we looking at an @block? 223 | if (this.stream.peek() === "@") { 224 | this.stream.eatCSSWhile(/[^\{]/); 225 | var token = this.stream.makeToken(), 226 | name = token.value.trim(); 227 | 228 | // we currently support @keyframes (with prefixes) 229 | if(name.match(/@(-[^-]+-)?keyframes/)) { 230 | this.stream.next(); 231 | this.nested = true; 232 | return this._parseSelector(); 233 | } 234 | 235 | // and media queries 236 | if(name.match(/@media\s*\([^{)]+\)/)) { 237 | this.stream.next(); 238 | this.nested = true; 239 | return this._parseSelector(); 240 | } 241 | 242 | // and @font-face 243 | if(name === "@font-face") { 244 | this.stream.rewind(token.value.length); 245 | this.stream.markTokenStart(); 246 | return this._parseSelector(); 247 | } 248 | 249 | // anything else is completely unknown 250 | throw new ParseError("UNKOWN_CSS_KEYWORD", this, token.interval.start, token.interval.end, name); 251 | } 252 | 253 | this._parseSelector(); 254 | }, 255 | // #### CSS Selector Parsing 256 | // 257 | // A selector is a string, and terminates on `{`, which signals 258 | // the start of a CSS property/value pair (which may be empty). 259 | // 260 | // There are a few characters in selectors that are an immediate error: 261 | // 262 | // * `;` Rule terminator (ERROR: missing block opener) 263 | // * `}` End of css block (ERROR: missing block opener) 264 | // * `<` End of `` 265 | // (ERROR: css declaration has no body) 266 | // 267 | // Note that we cannot flag `:` as an error because pseudo-classes use 268 | // it as their prefix. 269 | _parseSelector: function() { 270 | // Gobble all characters that could be part of the selector. 271 | this.stream.eatCSSWhile(/[^\{;\}<]/); 272 | var token = this.stream.makeToken(), 273 | peek = this.stream.peek(); 274 | 275 | // if we encounter } we're actually inside a block, like 276 | // @keyframes or the like, and need to try for a new block. 277 | if (peek === "}") { 278 | this.stream.next(); 279 | return this._parseBlockType(); 280 | } 281 | 282 | // If there was nothing to select, we're either done, 283 | // or an error occurred. 284 | if (token === null) { 285 | if (!this.stream.end() && this.stream.peek() === '<') { 286 | // if this is the start of ', true)) { 360 | token = this.stream.makeToken(); 361 | this.domBuilder.comment(token.value.slice(4, -3), token.interval); 362 | return; 363 | } 364 | this.stream.next(); 365 | } 366 | token = this.stream.makeToken(); 367 | throw new ParseError("UNTERMINATED_COMMENT", token); 368 | }, 369 | // This helper parses CDATA content, which should be treated as raw text, 370 | // rather than being parsed for markup. It assumes the stream has just 371 | // passed the beginning `', 375 | text, 376 | textInterval = { start: 0, end: 0 }, 377 | openTagEnd = this.domBuilder.currentNode.parseInfo.openTag.end, 378 | closeTagInterval; 379 | 380 | this.stream.makeToken(); 381 | while (!this.stream.end()) { 382 | if (this.stream.match(matchString, true)) { 383 | token = this.stream.makeToken(); 384 | text = token.value.slice(0, -matchString.length); 385 | closeTagInterval = { 386 | start: openTagEnd + text.length, 387 | end: token.interval.end 388 | }; 389 | this.domBuilder.currentNode.parseInfo.closeTag = closeTagInterval; 390 | textInterval.start = token.interval.start; 391 | textInterval.end = token.interval.end - (closeTagInterval.end - closeTagInterval.start); 392 | this.domBuilder.text(text, textInterval); 393 | this.domBuilder.popElement(); 394 | return; 395 | } 396 | this.stream.next(); 397 | } 398 | throw new ParseError("UNCLOSED_TAG", this); 399 | }, 400 | // This helper function checks if the current tag contains an attribute 401 | containsAttribute: function (stream) { 402 | return stream.eat(nameStartChar); 403 | }, 404 | // This helper function parses the end of a closing tag. It expects 405 | // the stream to be right after the end of the closing tag's tag 406 | // name. 407 | _parseEndCloseTag: function() { 408 | this.stream.eatSpace(); 409 | if (this.stream.next() != '>') { 410 | if(this.containsAttribute(this.stream)) { 411 | throw new ParseError("ATTRIBUTE_IN_CLOSING_TAG", this); 412 | } else { 413 | throw new ParseError("UNTERMINATED_CLOSE_TAG", this); 414 | } 415 | } 416 | var end = this.stream.makeToken().interval.end; 417 | this.domBuilder.currentNode.parseInfo.closeTag.end = end; 418 | this.domBuilder.popElement(); 419 | }, 420 | // This helper function parses the rest of an opening tag after 421 | // its tag name, looking for `attribute="value"` data until a 422 | // `>` is encountered. 423 | _parseEndOpenTag: function(tagName) { 424 | var tagMark = this.stream.pos, 425 | startMark = this.stream.pos; 426 | 427 | while (!this.stream.end()) { 428 | 429 | if (this.containsAttribute(this.stream)) { 430 | if (this.stream.peek !== "=") { 431 | this.stream.eatWhile(nameChar); 432 | } 433 | this._parseAttribute(tagName); 434 | } 435 | 436 | else if (this.stream.eatSpace()) { 437 | this.stream.makeToken(); 438 | startMark = this.stream.pos; 439 | } 440 | 441 | else if (this.stream.peek() == '>' || this.stream.match("/>")) { 442 | var selfClosing = this.stream.match("/>", true); 443 | if (selfClosing) { 444 | if (!this.parsingSVG && !this._knownVoidHTMLElement(tagName)) 445 | throw new ParseError("SELF_CLOSING_NON_VOID_ELEMENT", this, 446 | tagName); 447 | } else 448 | this.stream.next(); 449 | var end = this.stream.makeToken().interval.end; 450 | this.domBuilder.currentNode.parseInfo.openTag.end = end; 451 | 452 | // If the opening tag represents a void element, there will not be 453 | // a closing element, so we tell our DOM builder that we're done. 454 | if (tagName && ((selfClosing && this._knownSVGElement(tagName)) || this._knownVoidHTMLElement(tagName))) 455 | this.domBuilder.popElement(); 456 | 457 | // If the open tag represents a optional-omit-close-tag element, there may be 458 | // an optional closing element, so we save the currentNode into activeTag for next step check. 459 | activeTagNode = false; 460 | if (tagName && this._knownOmittableCloseTagHtmlElement(tagName)){ 461 | activeTagNode = this.domBuilder.currentNode; 462 | } 463 | 464 | // If the opening tag represents a `'; 169 | var doc = parseWithoutErrors(html); 170 | var styleContents = doc.childNodes[0].childNodes[0]; 171 | equal(styleContents.nodeValue, css); 172 | cb(html, css, styleContents); 173 | }); 174 | }); 175 | } 176 | 177 | return { 178 | ok: ok, 179 | equal: equal, 180 | test: test, 181 | asyncTest: asyncTest, 182 | start: start, 183 | documentFragmentHTML: documentFragmentHTML, 184 | assertParseIntervals: assertParseIntervals, 185 | parseWithoutErrors: parseWithoutErrors, 186 | testManySnippets: testManySnippets, 187 | testStyleSheet: testStyleSheet, 188 | getFailCount: function() { return failCount; } 189 | }; 190 | }; 191 | -------------------------------------------------------------------------------- /test/test-slowparse.js: -------------------------------------------------------------------------------- 1 | module.exports = function(Slowparse, window, document, validators) { 2 | 3 | var Node = require('../src/Node'); 4 | 5 | var ok = validators.ok; 6 | var equal = validators.equal; 7 | var test = validators.test; 8 | var asyncTest = validators.asyncTest; 9 | var start = validators.start; 10 | var documentFragmentHTML = validators.documentFragmentHTML; 11 | var assertParseIntervals = validators.assertParseIntervals; 12 | var parseWithoutErrors = validators.parseWithoutErrors; 13 | var testManySnippets = validators.testManySnippets; 14 | var testStyleSheet = validators.testStyleSheet; 15 | 16 | var parse = function(html) { return Slowparse.HTML(document, html); }; 17 | var parseCSS = function(css) { return Slowparse.CSS(css); }; 18 | 19 | test("Stream.match()", function() { 20 | var stream = new Slowparse.Stream("blArgle"); 21 | ok(stream.match("blArgle")); 22 | equal(stream.pos, 0); 23 | ok(!stream.match("blargle")); 24 | equal(stream.pos, 0); 25 | ok(stream.match("blargle", false, true)); 26 | equal(stream.pos, 0); 27 | ok(stream.match("bla", true, true)); 28 | equal(stream.pos, 3); 29 | ok(stream.match("rgle", true)); 30 | equal(stream.pos, 7); 31 | }); 32 | 33 | test("Stream.eat() works at EOF", function() { 34 | var stream = new Slowparse.Stream(""); 35 | ok(!stream.eat(/blah/)); 36 | }); 37 | 38 | test("parsing of valid DOCTYPE", function() { 39 | var html = '

hi

'; 40 | var doc = parseWithoutErrors(html); 41 | assertParseIntervals(html, doc, "document", { 42 | 'parseInfo.doctype': '' 43 | }); 44 | }); 45 | 46 | test("parsing of misplaced DOCTYPE", function() { 47 | var html = '

hi

'; 48 | var result = parse(html); 49 | equal(result.error, { 50 | "openTag": { 51 | "end": 10, 52 | "name": "", 53 | "start": 9 54 | }, 55 | cursor: 9, 56 | "type": "INVALID_TAG_NAME" 57 | }); 58 | }); 59 | 60 | test("parsing of HTML comments", function() { 61 | var html = 'hithere'; 62 | var doc = parseWithoutErrors(html); 63 | assertParseIntervals(html, doc.childNodes[1], "comment", { 64 | 'parseInfo': '' 65 | }); 66 | }); 67 | 68 | test("UNQUOTED_ATTR_VALUE in

", function() { 69 | // https://github.com/mozilla/slowparse/issues/6 70 | var err = Slowparse.HTML(document, '

').error; 71 | equal(err.type, "UNQUOTED_ATTR_VALUE"); 72 | }); 73 | 74 | test("parsing of elements with boolean attributes", function() { 75 | var html = ''; 76 | var doc = parseWithoutErrors(html); 77 | var attr = doc.childNodes[0].attributes[0]; 78 | equal(attr.nodeName, 'href'); 79 | equal(attr.nodeValue, ''); 80 | assertParseIntervals(html, attr, "attr", { 81 | 'parseInfo.name': 'href' 82 | }); 83 | 84 | var result = Slowparse.HTML(document, ''); 85 | var expected = { 86 | type: 'INVALID_ATTR_NAME', 87 | start: 3, 88 | end: 8, 89 | attribute: { name: { value: "+" }}, 90 | cursor: 3 91 | }; 92 | equal(result.error, expected); 93 | 94 | html = ''; 95 | doc = parseWithoutErrors(html); 96 | var attr1 = doc.childNodes[0].attributes[0]; 97 | var attr2 = doc.childNodes[0].attributes[1]; 98 | 99 | // Apparently NamedNodeMap entries are not in any particular order: 100 | // https://developer.mozilla.org/en/DOM/NamedNodeMap 101 | // 102 | // So, we'll swap these values if they're not in the order we expect 103 | // them to be in, which is the case in IE9, at the very least. 104 | if (attr1.nodeName == 'class') { 105 | var temp = attr1; 106 | attr1 = attr2; 107 | attr2 = temp; 108 | } 109 | 110 | equal(attr1.nodeName, 'href'); 111 | equal(attr1.nodeValue, ''); 112 | equal(attr2.nodeName, 'class'); 113 | equal(attr2.nodeValue, 'foo'); 114 | assertParseIntervals(html, attr1, "attr1", { 115 | 'parseInfo.name': 'href' 116 | }); 117 | ok(attr1.parseInfo.value === undefined); 118 | assertParseIntervals(html, attr2, "attr2", { 119 | 'parseInfo.name': 'class', 120 | 'parseInfo.value': '"foo"' 121 | }); 122 | }); 123 | 124 | test("parsing of '; 126 | var doc = parseWithoutErrors(html); 127 | equal(doc.childNodes.length, 1, "document has one child"); 128 | 129 | var script = doc.childNodes[0]; 130 | equal(script.nodeName, "SCRIPT", 131 | "first child of generated DOM is '; 503 | var doc = parseWithoutErrors(html); 504 | setTimeout(function() { 505 | equal(window.PWNED, undefined); 506 | start(); 507 | }, 100); 508 | }); 509 | }); 510 | 511 | test("parsing of attr content w/ HTML entities", function() { 512 | var html = '

'; 513 | var doc = parseWithoutErrors(html); 514 | var attrNode = doc.childNodes[0].attributes[0]; 515 | equal(attrNode.nodeValue, '1 < 2 < 3'); 516 | assertParseIntervals(html, attrNode, "attr", { 517 | 'parseInfo.value': '"1 < 2 < 3"' 518 | }); 519 | }); 520 | 521 | test("INVALID_TAG_NAME raised by < at EOF", function() { 522 | var error = Slowparse.HTML(document, '<').error; 523 | equal(error.type, "INVALID_TAG_NAME"); 524 | }); 525 | 526 | test("MISSING_CSS_SELECTOR works after comment", function() { 527 | var html = ''; 528 | var error = Slowparse.HTML(document, html).error; 529 | equal(error.type, "MISSING_CSS_SELECTOR"); 530 | }); 531 | 532 | test("UNTERMINATED_ATTR_VALUE works at end of stream", function() { 533 | var html = 'Foo"; 555 | var error = Slowparse.HTML(document, html).error; 556 | equal(error.type, "ATTRIBUTE_IN_CLOSING_TAG", "error.type should be ATTRIBUTE_IN_CLOSING_TAG, but was " + error.type); 557 | }); 558 | 559 | test("Space at the end of a closing tag (with no attributes) produces no error", function() { 560 | var html = "Foo"; 561 | var error = Slowparse.HTML(document, html).error; 562 | equal(error, null, "Valid closing tag with extra space should produce no error."); 563 | }); 564 | 565 | test("Slowparse.HTML_ELEMENT_NAMES", function() { 566 | ok(Slowparse.HTML_ELEMENT_NAMES.indexOf("p") != -1); 567 | }); 568 | 569 | test("Slowparse.CSS_PROPERTY_NAMES", function() { 570 | ok(Slowparse.CSS_PROPERTY_NAMES.indexOf("color") != -1); 571 | }); 572 | 573 | test("parsing elements with optional close tags:

", function() { 574 | var html = '

text\n

more text

'; 575 | var result = parse(html); 576 | ok(!result.error, "no error on omitted

"); 577 | }); 578 | 579 | test("parsing elements with nested optional close tags:
  • ", function() { 580 | var html = '
    '; 581 | var result = parse(html); 582 | ok(!result.error, "no error on omitted

    "); 583 | }); 584 | 585 | test("parsing elements with nested optional close tags:
  • x

  • ", function() { 586 | var html = '
    • x

    '; 587 | var result = parse(html); 588 | ok(!result.error, "no error on omitted

    "); 589 | }); 590 | 591 | test("parsing elements with nested optional close tags:
  • x

  • ", function() { 592 | var html = '
    • x

    '; 593 | var result = parse(html); 594 | ok(!result.error, "no error on omitted

    "); 595 | }); 596 | 597 | test("parsing elements with nested optional close tags:
  • x

    m
  • ", function() { 598 | var html = '
    • x

      m
    '; 599 | var result = parse(html); 600 | ok(!result.error, "no error on omitted

    "); 601 | }); 602 | 603 | test("parsing elements with nested optional close tags:
  • x

    y

  • ", function() { 604 | var html = '
    • x

      y

    '; 605 | var result = parse(html); 606 | ok(!result.error, "no error on omitted

    "); 607 | }); 608 | 609 | test("intentional fail for optional close tag (incorrect use). pass = not accepted", function() { 610 | var html = '

    text\nmore text

    '; 611 | var result = parse(html); 612 | var expected = { 613 | type: 'MISMATCHED_CLOSE_TAG', 614 | openTag: { 615 | name: 'p', 616 | start: 5, 617 | end: 8 618 | }, 619 | closeTag: { 620 | name: 'div', 621 | start: 29, 622 | end: 34 623 | }, 624 | cursor: 29 625 | }; 626 | equal(result.error, expected, "bad omission error for

    "); 627 | }); 628 | 629 | test("testing CSS 'content' property values with semi-colons", function() { 630 | var html = ""; 631 | var result = parse(html); 632 | ok(!result.error, "semi-colons accepted"); 633 | }); 634 | 635 | test("testing CSS 'content' property values with semi-colons inside nested quotes", function() { 636 | var html = ""; 637 | var result = parse(html); 638 | ok(!result.error, "semi-colons accepted"); 639 | }); 640 | 641 | test("@keyframes css block", function() { 642 | var html = ""; 643 | var result = parse(html); 644 | ok(!result.error, "@keyframes accepted"); 645 | }); 646 | 647 | test("@keyframes css block with named frame", function() { 648 | var html = ""; 649 | var result = parse(html); 650 | ok(!result.error, "@keyframes accepted"); 651 | }); 652 | 653 | test("@keyframes css block with typo (@keyfarmes). pass = not accepted", function() { 654 | var html = ""; 655 | var result = parse(html); 656 | var expected = { 657 | type: 'UNKOWN_CSS_KEYWORD', 658 | cssKeyword: { 659 | start: 7, 660 | end: 18, 661 | value: "@keyfarmes" 662 | }, 663 | cursor: 7 664 | }; 665 | equal(result.error, expected, "keyfarmes is not accepted as @keyword"); 666 | }); 667 | 668 | test("@-moz-keyframes css block", function() { 669 | var html = ""; 670 | var result = parse(html); 671 | ok(!result.error, "@-*-keyframes accepted"); 672 | }); 673 | 674 | test("@-webkit-keyframes css block", function() { 675 | var html = ""; 676 | var result = parse(html); 677 | ok(!result.error, "@-*-keyframes accepted"); 678 | }); 679 | 680 | test("@keyframes css block with leading block comment", function() { 681 | var html = ""; 682 | var result = parse(html); 683 | ok(!result.error, "@keyframes accepted"); 684 | }); 685 | 686 | test("@keyframes css block with trailing block comment", function() { 687 | var html = ""; 688 | var result = parse(html); 689 | ok(!result.error, "@keyframes accepted"); 690 | }); 691 | 692 | test("@keyframes css block with leading and trailing block comment", function() { 693 | var html = ""; 694 | var result = parse(html); 695 | ok(!result.error, "@keyframes accepted"); 696 | }); 697 | 698 | test("@media rule", function() { 699 | var html = ""; 700 | var result = parse(html); 701 | ok(!result.error, "@media accepted"); 702 | }); 703 | 704 | test("@media rule (complex)", function() { 705 | var html = ""; 706 | var result = parse(html); 707 | ok(!result.error, "@media accepted"); 708 | }); 709 | 710 | test("@font-face rule", function() { 711 | var html = ""; 712 | var result = parse(html); 713 | ok(!result.error, "@font-face accepted"); 714 | }); 715 | 716 | test("@font-face rule with typo (@font-faec). pass = not accepted", function() { 717 | var html = ""; 718 | var result = parse(html); 719 | var expected = { 720 | type: 'UNKOWN_CSS_KEYWORD', 721 | cssKeyword: { 722 | start: 7, 723 | end: 18, 724 | value: "@font-faec" 725 | }, 726 | cursor: 7 727 | }; 728 | equal(result.error, expected, "font-faec is not accepted as @keyword"); 729 | }); 730 | 731 | test("Custom Elements are allowed", function () { 732 | var html = "hello"; 733 | var result = parse(html); 734 | ok(!result.error, "custom element p-q accepted"); 735 | }); 736 | 737 | test("Custom Elements with attributes are allowed", function () { 738 | var html = "hello"; 739 | var result = parse(html); 740 | ok(!result.error, "custom element p-q with attribtues accepted"); 741 | }); 742 | 743 | test("Custom Element tag names must be <[a-z]-[a-z]>", function () { 744 | var html = "<->"; 745 | var result = parse(html); 746 | equal(result.error, { 747 | type: 'INVALID_TAG_NAME', 748 | openTag: { name: '-', start: 0, end: 2 }, 749 | cursor: 0 750 | }); 751 | }); 752 | 753 | test("HTML with SVG that uses xlink:* attributes", function () { 754 | var html = ""; 755 | var result = parse(html); 756 | ok(!result.error, "xlink:* attributes accepted"); 757 | }); 758 | 759 | test("HTML that uses xml:* attributes", function () { 760 | var html = "

    test

    "; 761 | var result = parse(html); 762 | ok(!result.error, "xml:* attributes accepted"); 763 | }); 764 | 765 | test("HTML with nonsense:attribute should not be rejected", function () { 766 | var html = "

    test

    "; 767 | var result = parse(html); 768 | ok(result.error, { 769 | type: 'UNSUPPORTED_ATTR_NAMESPACE', 770 | start: 3, 771 | end: 21, 772 | attribute: { 773 | name: { 774 | value: 'nonsense:attribute' 775 | } 776 | }, 777 | cursor: 3 778 | }); 779 | }); 780 | 781 | test("Attributes with multiple namespaces should be rejected", function () { 782 | var html = ""; 783 | var result = parse(html); 784 | equal(result.error, { 785 | type: 'MULTIPLE_ATTR_NAMESPACES', 786 | start: 11, 787 | end: 25, 788 | attribute: { 789 | name: { 790 | value: 'xml:xlink:href' 791 | } 792 | }, 793 | cursor: 11 794 | }); 795 | }); 796 | 797 | test("Rogue \"value\" entries inside the opening tag should be flagged as bad", function () { 798 | var html = 'test image'; 799 | var result = parse(html); 800 | equal(result.error, { 801 | type: 'UNBOUND_ATTRIBUTE_VALUE', 802 | value: '"207"', 803 | interval: { 804 | start: 69, 805 | end: 74 806 | }, 807 | cursor: 69 808 | }); 809 | }); 810 | 811 | 812 | // specifically CSS testing 813 | 814 | test("parsing empty CSS document", function() { 815 | var css = ''; 816 | var result = parseCSS(css); 817 | equal(result.error, null); 818 | }); 819 | 820 | test("comment-only CSS document", function() { 821 | var css = '/* this is \n a comment */'; 822 | var result = parseCSS(css); 823 | equal(result.error, null); 824 | }); 825 | 826 | 827 | return validators.getFailCount(); 828 | }; 829 | --------------------------------------------------------------------------------