├── .gitignore ├── Godeps ├── Godeps.json ├── Readme └── _workspace │ ├── .gitignore │ └── src │ └── golang.org │ └── x │ └── net │ └── html │ ├── atom │ ├── atom.go │ ├── atom_test.go │ ├── gen.go │ ├── table.go │ └── table_test.go │ ├── charset │ ├── charset.go │ ├── charset_test.go │ ├── gen.go │ ├── table.go │ └── testdata │ │ ├── HTTP-charset.html │ │ ├── HTTP-vs-UTF-8-BOM.html │ │ ├── HTTP-vs-meta-charset.html │ │ ├── HTTP-vs-meta-content.html │ │ ├── No-encoding-declaration.html │ │ ├── README │ │ ├── UTF-16BE-BOM.html │ │ ├── UTF-16LE-BOM.html │ │ ├── UTF-8-BOM-vs-meta-charset.html │ │ ├── UTF-8-BOM-vs-meta-content.html │ │ ├── meta-charset-attribute.html │ │ └── meta-content-attribute.html │ ├── const.go │ ├── doc.go │ ├── doctype.go │ ├── entity.go │ ├── entity_test.go │ ├── escape.go │ ├── escape_test.go │ ├── example_test.go │ ├── foreign.go │ ├── node.go │ ├── node_test.go │ ├── parse.go │ ├── parse_test.go │ ├── render.go │ ├── render_test.go │ ├── testdata │ ├── go1.html │ └── webkit │ │ ├── README │ │ ├── adoption01.dat │ │ ├── adoption02.dat │ │ ├── comments01.dat │ │ ├── doctype01.dat │ │ ├── entities01.dat │ │ ├── entities02.dat │ │ ├── html5test-com.dat │ │ ├── inbody01.dat │ │ ├── isindex.dat │ │ ├── pending-spec-changes-plain-text-unsafe.dat │ │ ├── pending-spec-changes.dat │ │ ├── plain-text-unsafe.dat │ │ ├── scriptdata01.dat │ │ ├── scripted │ │ ├── adoption01.dat │ │ └── webkit01.dat │ │ ├── tables01.dat │ │ ├── tests1.dat │ │ ├── tests10.dat │ │ ├── tests11.dat │ │ ├── tests12.dat │ │ ├── tests14.dat │ │ ├── tests15.dat │ │ ├── tests16.dat │ │ ├── tests17.dat │ │ ├── tests18.dat │ │ ├── tests19.dat │ │ ├── tests2.dat │ │ ├── tests20.dat │ │ ├── tests21.dat │ │ ├── tests22.dat │ │ ├── tests23.dat │ │ ├── tests24.dat │ │ ├── tests25.dat │ │ ├── tests26.dat │ │ ├── tests3.dat │ │ ├── tests4.dat │ │ ├── tests5.dat │ │ ├── tests6.dat │ │ ├── tests7.dat │ │ ├── tests8.dat │ │ ├── tests9.dat │ │ ├── tests_innerHTML_1.dat │ │ ├── tricky01.dat │ │ ├── webkit01.dat │ │ └── webkit02.dat │ ├── token.go │ └── token_test.go ├── LICENSE ├── Procfile ├── README.md ├── app.json ├── extra.html ├── img ├── mina.png └── sirefox.png ├── spoonerize ├── bench_test.go ├── html.go ├── html_test.go ├── spoonerize.go ├── spoonerize_test.go └── words.go └── web.go /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | *.exe 23 | *.test 24 | *.prof 25 | 26 | *.swp 27 | -------------------------------------------------------------------------------- /Godeps/Godeps.json: -------------------------------------------------------------------------------- 1 | { 2 | "ImportPath": "github.com/clarkduvall/spoonerizer", 3 | "GoVersion": "go1.4.2", 4 | "Deps": [ 5 | { 6 | "ImportPath": "golang.org/x/net/html", 7 | "Rev": "d9558e5c97f85372afee28cf2b6059d7d3818919" 8 | } 9 | ] 10 | } 11 | -------------------------------------------------------------------------------- /Godeps/Readme: -------------------------------------------------------------------------------- 1 | This directory tree is generated automatically by godep. 2 | 3 | Please do not edit. 4 | 5 | See https://github.com/tools/godep for more information. 6 | -------------------------------------------------------------------------------- /Godeps/_workspace/.gitignore: -------------------------------------------------------------------------------- 1 | /pkg 2 | /bin 3 | -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/atom/atom.go: -------------------------------------------------------------------------------- 1 | // Copyright 2012 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Package atom provides integer codes (also known as atoms) for a fixed set of 6 | // frequently occurring HTML strings: tag names and attribute keys such as "p" 7 | // and "id". 8 | // 9 | // Sharing an atom's name between all elements with the same tag can result in 10 | // fewer string allocations when tokenizing and parsing HTML. Integer 11 | // comparisons are also generally faster than string comparisons. 12 | // 13 | // The value of an atom's particular code is not guaranteed to stay the same 14 | // between versions of this package. Neither is any ordering guaranteed: 15 | // whether atom.H1 < atom.H2 may also change. The codes are not guaranteed to 16 | // be dense. The only guarantees are that e.g. looking up "div" will yield 17 | // atom.Div, calling atom.Div.String will return "div", and atom.Div != 0. 18 | package atom 19 | 20 | // Atom is an integer code for a string. The zero value maps to "". 21 | type Atom uint32 22 | 23 | // String returns the atom's name. 24 | func (a Atom) String() string { 25 | start := uint32(a >> 8) 26 | n := uint32(a & 0xff) 27 | if start+n > uint32(len(atomText)) { 28 | return "" 29 | } 30 | return atomText[start : start+n] 31 | } 32 | 33 | func (a Atom) string() string { 34 | return atomText[a>>8 : a>>8+a&0xff] 35 | } 36 | 37 | // fnv computes the FNV hash with an arbitrary starting value h. 38 | func fnv(h uint32, s []byte) uint32 { 39 | for i := range s { 40 | h ^= uint32(s[i]) 41 | h *= 16777619 42 | } 43 | return h 44 | } 45 | 46 | func match(s string, t []byte) bool { 47 | for i, c := range t { 48 | if s[i] != c { 49 | return false 50 | } 51 | } 52 | return true 53 | } 54 | 55 | // Lookup returns the atom whose name is s. It returns zero if there is no 56 | // such atom. The lookup is case sensitive. 57 | func Lookup(s []byte) Atom { 58 | if len(s) == 0 || len(s) > maxAtomLen { 59 | return 0 60 | } 61 | h := fnv(hash0, s) 62 | if a := table[h&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) { 63 | return a 64 | } 65 | if a := table[(h>>16)&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) { 66 | return a 67 | } 68 | return 0 69 | } 70 | 71 | // String returns a string whose contents are equal to s. In that sense, it is 72 | // equivalent to string(s) but may be more efficient. 73 | func String(s []byte) string { 74 | if a := Lookup(s); a != 0 { 75 | return a.String() 76 | } 77 | return string(s) 78 | } 79 | -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/atom/atom_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2012 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package atom 6 | 7 | import ( 8 | "sort" 9 | "testing" 10 | ) 11 | 12 | func TestKnown(t *testing.T) { 13 | for _, s := range testAtomList { 14 | if atom := Lookup([]byte(s)); atom.String() != s { 15 | t.Errorf("Lookup(%q) = %#x (%q)", s, uint32(atom), atom.String()) 16 | } 17 | } 18 | } 19 | 20 | func TestHits(t *testing.T) { 21 | for _, a := range table { 22 | if a == 0 { 23 | continue 24 | } 25 | got := Lookup([]byte(a.String())) 26 | if got != a { 27 | t.Errorf("Lookup(%q) = %#x, want %#x", a.String(), uint32(got), uint32(a)) 28 | } 29 | } 30 | } 31 | 32 | func TestMisses(t *testing.T) { 33 | testCases := []string{ 34 | "", 35 | "\x00", 36 | "\xff", 37 | "A", 38 | "DIV", 39 | "Div", 40 | "dIV", 41 | "aa", 42 | "a\x00", 43 | "ab", 44 | "abb", 45 | "abbr0", 46 | "abbr ", 47 | " abbr", 48 | " a", 49 | "acceptcharset", 50 | "acceptCharset", 51 | "accept_charset", 52 | "h0", 53 | "h1h2", 54 | "h7", 55 | "onClick", 56 | "λ", 57 | // The following string has the same hash (0xa1d7fab7) as "onmouseover". 58 | "\x00\x00\x00\x00\x00\x50\x18\xae\x38\xd0\xb7", 59 | } 60 | for _, tc := range testCases { 61 | got := Lookup([]byte(tc)) 62 | if got != 0 { 63 | t.Errorf("Lookup(%q): got %d, want 0", tc, got) 64 | } 65 | } 66 | } 67 | 68 | func TestForeignObject(t *testing.T) { 69 | const ( 70 | afo = Foreignobject 71 | afO = ForeignObject 72 | sfo = "foreignobject" 73 | sfO = "foreignObject" 74 | ) 75 | if got := Lookup([]byte(sfo)); got != afo { 76 | t.Errorf("Lookup(%q): got %#v, want %#v", sfo, got, afo) 77 | } 78 | if got := Lookup([]byte(sfO)); got != afO { 79 | t.Errorf("Lookup(%q): got %#v, want %#v", sfO, got, afO) 80 | } 81 | if got := afo.String(); got != sfo { 82 | t.Errorf("Atom(%#v).String(): got %q, want %q", afo, got, sfo) 83 | } 84 | if got := afO.String(); got != sfO { 85 | t.Errorf("Atom(%#v).String(): got %q, want %q", afO, got, sfO) 86 | } 87 | } 88 | 89 | func BenchmarkLookup(b *testing.B) { 90 | sortedTable := make([]string, 0, len(table)) 91 | for _, a := range table { 92 | if a != 0 { 93 | sortedTable = append(sortedTable, a.String()) 94 | } 95 | } 96 | sort.Strings(sortedTable) 97 | 98 | x := make([][]byte, 1000) 99 | for i := range x { 100 | x[i] = []byte(sortedTable[i%len(sortedTable)]) 101 | } 102 | 103 | b.ResetTimer() 104 | for i := 0; i < b.N; i++ { 105 | for _, s := range x { 106 | Lookup(s) 107 | } 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/atom/table_test.go: -------------------------------------------------------------------------------- 1 | // generated by go run gen.go -test; DO NOT EDIT 2 | 3 | package atom 4 | 5 | var testAtomList = []string{ 6 | "a", 7 | "abbr", 8 | "abbr", 9 | "accept", 10 | "accept-charset", 11 | "accesskey", 12 | "action", 13 | "address", 14 | "align", 15 | "alt", 16 | "annotation", 17 | "annotation-xml", 18 | "applet", 19 | "area", 20 | "article", 21 | "aside", 22 | "async", 23 | "audio", 24 | "autocomplete", 25 | "autofocus", 26 | "autoplay", 27 | "b", 28 | "base", 29 | "basefont", 30 | "bdi", 31 | "bdo", 32 | "bgsound", 33 | "big", 34 | "blink", 35 | "blockquote", 36 | "body", 37 | "br", 38 | "button", 39 | "canvas", 40 | "caption", 41 | "center", 42 | "challenge", 43 | "charset", 44 | "checked", 45 | "cite", 46 | "cite", 47 | "class", 48 | "code", 49 | "col", 50 | "colgroup", 51 | "color", 52 | "cols", 53 | "colspan", 54 | "command", 55 | "command", 56 | "content", 57 | "contenteditable", 58 | "contextmenu", 59 | "controls", 60 | "coords", 61 | "crossorigin", 62 | "data", 63 | "data", 64 | "datalist", 65 | "datetime", 66 | "dd", 67 | "default", 68 | "defer", 69 | "del", 70 | "desc", 71 | "details", 72 | "dfn", 73 | "dialog", 74 | "dir", 75 | "dirname", 76 | "disabled", 77 | "div", 78 | "dl", 79 | "download", 80 | "draggable", 81 | "dropzone", 82 | "dt", 83 | "em", 84 | "embed", 85 | "enctype", 86 | "face", 87 | "fieldset", 88 | "figcaption", 89 | "figure", 90 | "font", 91 | "footer", 92 | "for", 93 | "foreignObject", 94 | "foreignobject", 95 | "form", 96 | "form", 97 | "formaction", 98 | "formenctype", 99 | "formmethod", 100 | "formnovalidate", 101 | "formtarget", 102 | "frame", 103 | "frameset", 104 | "h1", 105 | "h2", 106 | "h3", 107 | "h4", 108 | "h5", 109 | "h6", 110 | "head", 111 | "header", 112 | "headers", 113 | "height", 114 | "hgroup", 115 | "hidden", 116 | "high", 117 | "hr", 118 | "href", 119 | "hreflang", 120 | "html", 121 | "http-equiv", 122 | "i", 123 | "icon", 124 | "id", 125 | "iframe", 126 | "image", 127 | "img", 128 | "input", 129 | "inputmode", 130 | "ins", 131 | "isindex", 132 | "ismap", 133 | "itemid", 134 | "itemprop", 135 | "itemref", 136 | "itemscope", 137 | "itemtype", 138 | "kbd", 139 | "keygen", 140 | "keytype", 141 | "kind", 142 | "label", 143 | "label", 144 | "lang", 145 | "legend", 146 | "li", 147 | "link", 148 | "list", 149 | "listing", 150 | "loop", 151 | "low", 152 | "malignmark", 153 | "manifest", 154 | "map", 155 | "mark", 156 | "marquee", 157 | "math", 158 | "max", 159 | "maxlength", 160 | "media", 161 | "mediagroup", 162 | "menu", 163 | "menuitem", 164 | "meta", 165 | "meter", 166 | "method", 167 | "mglyph", 168 | "mi", 169 | "min", 170 | "minlength", 171 | "mn", 172 | "mo", 173 | "ms", 174 | "mtext", 175 | "multiple", 176 | "muted", 177 | "name", 178 | "nav", 179 | "nobr", 180 | "noembed", 181 | "noframes", 182 | "noscript", 183 | "novalidate", 184 | "object", 185 | "ol", 186 | "onabort", 187 | "onafterprint", 188 | "onautocomplete", 189 | "onautocompleteerror", 190 | "onbeforeprint", 191 | "onbeforeunload", 192 | "onblur", 193 | "oncancel", 194 | "oncanplay", 195 | "oncanplaythrough", 196 | "onchange", 197 | "onclick", 198 | "onclose", 199 | "oncontextmenu", 200 | "oncuechange", 201 | "ondblclick", 202 | "ondrag", 203 | "ondragend", 204 | "ondragenter", 205 | "ondragleave", 206 | "ondragover", 207 | "ondragstart", 208 | "ondrop", 209 | "ondurationchange", 210 | "onemptied", 211 | "onended", 212 | "onerror", 213 | "onfocus", 214 | "onhashchange", 215 | "oninput", 216 | "oninvalid", 217 | "onkeydown", 218 | "onkeypress", 219 | "onkeyup", 220 | "onlanguagechange", 221 | "onload", 222 | "onloadeddata", 223 | "onloadedmetadata", 224 | "onloadstart", 225 | "onmessage", 226 | "onmousedown", 227 | "onmousemove", 228 | "onmouseout", 229 | "onmouseover", 230 | "onmouseup", 231 | "onmousewheel", 232 | "onoffline", 233 | "ononline", 234 | "onpagehide", 235 | "onpageshow", 236 | "onpause", 237 | "onplay", 238 | "onplaying", 239 | "onpopstate", 240 | "onprogress", 241 | "onratechange", 242 | "onreset", 243 | "onresize", 244 | "onscroll", 245 | "onseeked", 246 | "onseeking", 247 | "onselect", 248 | "onshow", 249 | "onsort", 250 | "onstalled", 251 | "onstorage", 252 | "onsubmit", 253 | "onsuspend", 254 | "ontimeupdate", 255 | "ontoggle", 256 | "onunload", 257 | "onvolumechange", 258 | "onwaiting", 259 | "open", 260 | "optgroup", 261 | "optimum", 262 | "option", 263 | "output", 264 | "p", 265 | "param", 266 | "pattern", 267 | "ping", 268 | "placeholder", 269 | "plaintext", 270 | "poster", 271 | "pre", 272 | "preload", 273 | "progress", 274 | "prompt", 275 | "public", 276 | "q", 277 | "radiogroup", 278 | "readonly", 279 | "rel", 280 | "required", 281 | "reversed", 282 | "rows", 283 | "rowspan", 284 | "rp", 285 | "rt", 286 | "ruby", 287 | "s", 288 | "samp", 289 | "sandbox", 290 | "scope", 291 | "scoped", 292 | "script", 293 | "seamless", 294 | "section", 295 | "select", 296 | "selected", 297 | "shape", 298 | "size", 299 | "sizes", 300 | "small", 301 | "sortable", 302 | "sorted", 303 | "source", 304 | "spacer", 305 | "span", 306 | "span", 307 | "spellcheck", 308 | "src", 309 | "srcdoc", 310 | "srclang", 311 | "start", 312 | "step", 313 | "strike", 314 | "strong", 315 | "style", 316 | "style", 317 | "sub", 318 | "summary", 319 | "sup", 320 | "svg", 321 | "system", 322 | "tabindex", 323 | "table", 324 | "target", 325 | "tbody", 326 | "td", 327 | "template", 328 | "textarea", 329 | "tfoot", 330 | "th", 331 | "thead", 332 | "time", 333 | "title", 334 | "title", 335 | "tr", 336 | "track", 337 | "translate", 338 | "tt", 339 | "type", 340 | "typemustmatch", 341 | "u", 342 | "ul", 343 | "usemap", 344 | "value", 345 | "var", 346 | "video", 347 | "wbr", 348 | "width", 349 | "wrap", 350 | "xmp", 351 | } 352 | -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/charset/charset.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Package charset provides common text encodings for HTML documents. 6 | // 7 | // The mapping from encoding labels to encodings is defined at 8 | // https://encoding.spec.whatwg.org/. 9 | package charset 10 | 11 | import ( 12 | "bytes" 13 | "fmt" 14 | "io" 15 | "mime" 16 | "strings" 17 | "unicode/utf8" 18 | 19 | "golang.org/x/net/html" 20 | "golang.org/x/text/encoding" 21 | "golang.org/x/text/encoding/charmap" 22 | "golang.org/x/text/transform" 23 | ) 24 | 25 | // Lookup returns the encoding with the specified label, and its canonical 26 | // name. It returns nil and the empty string if label is not one of the 27 | // standard encodings for HTML. Matching is case-insensitive and ignores 28 | // leading and trailing whitespace. 29 | func Lookup(label string) (e encoding.Encoding, name string) { 30 | label = strings.ToLower(strings.Trim(label, "\t\n\r\f ")) 31 | enc := encodings[label] 32 | return enc.e, enc.name 33 | } 34 | 35 | // DetermineEncoding determines the encoding of an HTML document by examining 36 | // up to the first 1024 bytes of content and the declared Content-Type. 37 | // 38 | // See http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#determining-the-character-encoding 39 | func DetermineEncoding(content []byte, contentType string) (e encoding.Encoding, name string, certain bool) { 40 | if len(content) > 1024 { 41 | content = content[:1024] 42 | } 43 | 44 | for _, b := range boms { 45 | if bytes.HasPrefix(content, b.bom) { 46 | e, name = Lookup(b.enc) 47 | return e, name, true 48 | } 49 | } 50 | 51 | if _, params, err := mime.ParseMediaType(contentType); err == nil { 52 | if cs, ok := params["charset"]; ok { 53 | if e, name = Lookup(cs); e != nil { 54 | return e, name, true 55 | } 56 | } 57 | } 58 | 59 | if len(content) > 0 { 60 | e, name = prescan(content) 61 | if e != nil { 62 | return e, name, false 63 | } 64 | } 65 | 66 | // Try to detect UTF-8. 67 | // First eliminate any partial rune at the end. 68 | for i := len(content) - 1; i >= 0 && i > len(content)-4; i-- { 69 | b := content[i] 70 | if b < 0x80 { 71 | break 72 | } 73 | if utf8.RuneStart(b) { 74 | content = content[:i] 75 | break 76 | } 77 | } 78 | hasHighBit := false 79 | for _, c := range content { 80 | if c >= 0x80 { 81 | hasHighBit = true 82 | break 83 | } 84 | } 85 | if hasHighBit && utf8.Valid(content) { 86 | return encoding.Nop, "utf-8", false 87 | } 88 | 89 | // TODO: change default depending on user's locale? 90 | return charmap.Windows1252, "windows-1252", false 91 | } 92 | 93 | // NewReader returns an io.Reader that converts the content of r to UTF-8. 94 | // It calls DetermineEncoding to find out what r's encoding is. 95 | func NewReader(r io.Reader, contentType string) (io.Reader, error) { 96 | preview := make([]byte, 1024) 97 | n, err := io.ReadFull(r, preview) 98 | switch { 99 | case err == io.ErrUnexpectedEOF: 100 | preview = preview[:n] 101 | r = bytes.NewReader(preview) 102 | case err != nil: 103 | return nil, err 104 | default: 105 | r = io.MultiReader(bytes.NewReader(preview), r) 106 | } 107 | 108 | if e, _, _ := DetermineEncoding(preview, contentType); e != encoding.Nop { 109 | r = transform.NewReader(r, e.NewDecoder()) 110 | } 111 | return r, nil 112 | } 113 | 114 | // NewReaderLabel returns a reader that converts from the specified charset to 115 | // UTF-8. It uses Lookup to find the encoding that corresponds to label, and 116 | // returns an error if Lookup returns nil. It is suitable for use as 117 | // encoding/xml.Decoder's CharsetReader function. 118 | func NewReaderLabel(label string, input io.Reader) (io.Reader, error) { 119 | e, _ := Lookup(label) 120 | if e == nil { 121 | return nil, fmt.Errorf("unsupported charset: %q", label) 122 | } 123 | return transform.NewReader(input, e.NewDecoder()), nil 124 | } 125 | 126 | func prescan(content []byte) (e encoding.Encoding, name string) { 127 | z := html.NewTokenizer(bytes.NewReader(content)) 128 | for { 129 | switch z.Next() { 130 | case html.ErrorToken: 131 | return nil, "" 132 | 133 | case html.StartTagToken, html.SelfClosingTagToken: 134 | tagName, hasAttr := z.TagName() 135 | if !bytes.Equal(tagName, []byte("meta")) { 136 | continue 137 | } 138 | attrList := make(map[string]bool) 139 | gotPragma := false 140 | 141 | const ( 142 | dontKnow = iota 143 | doNeedPragma 144 | doNotNeedPragma 145 | ) 146 | needPragma := dontKnow 147 | 148 | name = "" 149 | e = nil 150 | for hasAttr { 151 | var key, val []byte 152 | key, val, hasAttr = z.TagAttr() 153 | ks := string(key) 154 | if attrList[ks] { 155 | continue 156 | } 157 | attrList[ks] = true 158 | for i, c := range val { 159 | if 'A' <= c && c <= 'Z' { 160 | val[i] = c + 0x20 161 | } 162 | } 163 | 164 | switch ks { 165 | case "http-equiv": 166 | if bytes.Equal(val, []byte("content-type")) { 167 | gotPragma = true 168 | } 169 | 170 | case "content": 171 | if e == nil { 172 | name = fromMetaElement(string(val)) 173 | if name != "" { 174 | e, name = Lookup(name) 175 | if e != nil { 176 | needPragma = doNeedPragma 177 | } 178 | } 179 | } 180 | 181 | case "charset": 182 | e, name = Lookup(string(val)) 183 | needPragma = doNotNeedPragma 184 | } 185 | } 186 | 187 | if needPragma == dontKnow || needPragma == doNeedPragma && !gotPragma { 188 | continue 189 | } 190 | 191 | if strings.HasPrefix(name, "utf-16") { 192 | name = "utf-8" 193 | e = encoding.Nop 194 | } 195 | 196 | if e != nil { 197 | return e, name 198 | } 199 | } 200 | } 201 | } 202 | 203 | func fromMetaElement(s string) string { 204 | for s != "" { 205 | csLoc := strings.Index(s, "charset") 206 | if csLoc == -1 { 207 | return "" 208 | } 209 | s = s[csLoc+len("charset"):] 210 | s = strings.TrimLeft(s, " \t\n\f\r") 211 | if !strings.HasPrefix(s, "=") { 212 | continue 213 | } 214 | s = s[1:] 215 | s = strings.TrimLeft(s, " \t\n\f\r") 216 | if s == "" { 217 | return "" 218 | } 219 | if q := s[0]; q == '"' || q == '\'' { 220 | s = s[1:] 221 | closeQuote := strings.IndexRune(s, rune(q)) 222 | if closeQuote == -1 { 223 | return "" 224 | } 225 | return s[:closeQuote] 226 | } 227 | 228 | end := strings.IndexAny(s, "; \t\n\f\r") 229 | if end == -1 { 230 | end = len(s) 231 | } 232 | return s[:end] 233 | } 234 | return "" 235 | } 236 | 237 | var boms = []struct { 238 | bom []byte 239 | enc string 240 | }{ 241 | {[]byte{0xfe, 0xff}, "utf-16be"}, 242 | {[]byte{0xff, 0xfe}, "utf-16le"}, 243 | {[]byte{0xef, 0xbb, 0xbf}, "utf-8"}, 244 | } 245 | -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/charset/gen.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // +build ignore 6 | 7 | package main 8 | 9 | // Download https://encoding.spec.whatwg.org/encodings.json and use it to 10 | // generate table.go. 11 | 12 | import ( 13 | "encoding/json" 14 | "fmt" 15 | "log" 16 | "net/http" 17 | "strings" 18 | ) 19 | 20 | type enc struct { 21 | Name string 22 | Labels []string 23 | } 24 | 25 | type group struct { 26 | Encodings []enc 27 | Heading string 28 | } 29 | 30 | const specURL = "https://encoding.spec.whatwg.org/encodings.json" 31 | 32 | func main() { 33 | resp, err := http.Get(specURL) 34 | if err != nil { 35 | log.Fatalf("error fetching %s: %s", specURL, err) 36 | } 37 | if resp.StatusCode != 200 { 38 | log.Fatalf("error fetching %s: HTTP status %s", specURL, resp.Status) 39 | } 40 | defer resp.Body.Close() 41 | 42 | var groups []group 43 | d := json.NewDecoder(resp.Body) 44 | err = d.Decode(&groups) 45 | if err != nil { 46 | log.Fatalf("error reading encodings.json: %s", err) 47 | } 48 | 49 | fmt.Println("// generated by go run gen.go; DO NOT EDIT") 50 | fmt.Println() 51 | fmt.Println("package charset") 52 | fmt.Println() 53 | 54 | fmt.Println("import (") 55 | fmt.Println(`"golang.org/x/text/encoding"`) 56 | for _, pkg := range []string{"charmap", "japanese", "korean", "simplifiedchinese", "traditionalchinese", "unicode"} { 57 | fmt.Printf("\"golang.org/x/text/encoding/%s\"\n", pkg) 58 | } 59 | fmt.Println(")") 60 | fmt.Println() 61 | 62 | fmt.Println("var encodings = map[string]struct{e encoding.Encoding; name string} {") 63 | for _, g := range groups { 64 | for _, e := range g.Encodings { 65 | goName, ok := miscNames[e.Name] 66 | if !ok { 67 | for k, v := range prefixes { 68 | if strings.HasPrefix(e.Name, k) { 69 | goName = v + e.Name[len(k):] 70 | break 71 | } 72 | } 73 | if goName == "" { 74 | log.Fatalf("unrecognized encoding name: %s", e.Name) 75 | } 76 | } 77 | 78 | for _, label := range e.Labels { 79 | fmt.Printf("%q: {%s, %q},\n", label, goName, e.Name) 80 | } 81 | } 82 | } 83 | fmt.Println("}") 84 | } 85 | 86 | var prefixes = map[string]string{ 87 | "iso-8859-": "charmap.ISO8859_", 88 | "windows-": "charmap.Windows", 89 | } 90 | 91 | var miscNames = map[string]string{ 92 | "utf-8": "encoding.Nop", 93 | "ibm866": "charmap.CodePage866", 94 | "iso-8859-8-i": "charmap.ISO8859_8", 95 | "koi8-r": "charmap.KOI8R", 96 | "koi8-u": "charmap.KOI8U", 97 | "macintosh": "charmap.Macintosh", 98 | "x-mac-cyrillic": "charmap.MacintoshCyrillic", 99 | "gbk": "simplifiedchinese.GBK", 100 | "gb18030": "simplifiedchinese.GB18030", 101 | "hz-gb-2312": "simplifiedchinese.HZGB2312", 102 | "big5": "traditionalchinese.Big5", 103 | "euc-jp": "japanese.EUCJP", 104 | "iso-2022-jp": "japanese.ISO2022JP", 105 | "shift_jis": "japanese.ShiftJIS", 106 | "euc-kr": "korean.EUCKR", 107 | "replacement": "encoding.Replacement", 108 | "utf-16be": "unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)", 109 | "utf-16le": "unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM)", 110 | "x-user-defined": "charmap.XUserDefined", 111 | } 112 | -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/charset/testdata/HTTP-charset.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | HTTP charset 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 14 | 15 | 16 | 17 |

HTTP charset

18 | 19 | 20 |
21 | 22 | 23 |
 
24 | 25 | 26 | 27 | 28 | 29 |
30 |

The character encoding of a page can be set using the HTTP header charset declaration.

31 |

The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector .test div.ÜÀÚ. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.

The only character encoding declaration for this HTML file is in the HTTP header, which sets the encoding to ISO 8859-15.

32 |
33 |
34 |
Next test
HTML5
35 |

the-input-byte-stream-001
Result summary & related tests
Detailed results for this test
Link to spec

36 |
Assumptions:
38 |
39 | 44 | 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/charset/testdata/HTTP-vs-UTF-8-BOM.html: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | HTTP vs UTF-8 BOM 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 14 | 15 | 16 | 17 |

HTTP vs UTF-8 BOM

18 | 19 | 20 |
21 | 22 | 23 |
 
24 | 25 | 26 | 27 | 28 | 29 |
30 |

A character encoding set in the HTTP header has lower precedence than the UTF-8 signature.

31 |

The HTTP header attempts to set the character encoding to ISO 8859-15. The page starts with a UTF-8 signature.

The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector .test div.ýäè. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.

If the test is unsuccessful, the characters  should appear at the top of the page. These represent the bytes that make up the UTF-8 signature when encountered in the ISO 8859-15 encoding.

32 |
33 |
34 |
Next test
HTML5
35 |

the-input-byte-stream-034
Result summary & related tests
Detailed results for this test
Link to spec

36 |
Assumptions:
38 |
39 | 44 | 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/charset/testdata/HTTP-vs-meta-charset.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | HTTP vs meta charset 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 15 | 16 | 17 | 18 |

HTTP vs meta charset

19 | 20 | 21 |
22 | 23 | 24 |
 
25 | 26 | 27 | 28 | 29 | 30 |
31 |

The HTTP header has a higher precedence than an encoding declaration in a meta charset attribute.

32 |

The HTTP header attempts to set the character encoding to ISO 8859-15. The page contains an encoding declaration in a meta charset attribute that attempts to set the character encoding to ISO 8859-1.

The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector .test div.ÜÀÚ. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.

33 |
34 |
35 |
Next test
HTML5
36 |

the-input-byte-stream-018
Result summary & related tests
Detailed results for this test
Link to spec

37 |
Assumptions:
39 |
40 | 45 | 46 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/charset/testdata/HTTP-vs-meta-content.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | HTTP vs meta content 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 15 | 16 | 17 | 18 |

HTTP vs meta content

19 | 20 | 21 |
22 | 23 | 24 |
 
25 | 26 | 27 | 28 | 29 | 30 |
31 |

The HTTP header has a higher precedence than an encoding declaration in a meta content attribute.

32 |

The HTTP header attempts to set the character encoding to ISO 8859-15. The page contains an encoding declaration in a meta content attribute that attempts to set the character encoding to ISO 8859-1.

The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector .test div.ÜÀÚ. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.

33 |
34 |
35 |
Next test
HTML5
36 |

the-input-byte-stream-016
Result summary & related tests
Detailed results for this test
Link to spec

37 |
Assumptions:
39 |
40 | 45 | 46 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/charset/testdata/No-encoding-declaration.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | No encoding declaration 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 14 | 15 | 16 | 17 |

No encoding declaration

18 | 19 | 20 |
21 | 22 | 23 |
 
24 | 25 | 26 | 27 | 28 | 29 |
30 |

A page with no encoding information in HTTP, BOM, XML declaration or meta element will be treated as UTF-8.

31 |

The test on this page contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector .test div.ýäè. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.

32 |
33 |
34 |
Next test
HTML5
35 |

the-input-byte-stream-015
Result summary & related tests
Detailed results for this test
Link to spec

36 |
Assumptions:
37 |
38 | 43 | 44 | 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/charset/testdata/README: -------------------------------------------------------------------------------- 1 | These test cases come from 2 | http://www.w3.org/International/tests/repository/html5/the-input-byte-stream/results-basics 3 | 4 | Distributed under both the W3C Test Suite License 5 | (http://www.w3.org/Consortium/Legal/2008/04-testsuite-license) 6 | and the W3C 3-clause BSD License 7 | (http://www.w3.org/Consortium/Legal/2008/03-bsd-license). 8 | To contribute to a W3C Test Suite, see the policies and contribution 9 | forms (http://www.w3.org/2004/10/27-testcases). 10 | -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/charset/testdata/UTF-16BE-BOM.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarkduvall/spoonerizer/420f218109903b1af4f13a2ffa5545aa2ccd74ad/Godeps/_workspace/src/golang.org/x/net/html/charset/testdata/UTF-16BE-BOM.html -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/charset/testdata/UTF-16LE-BOM.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clarkduvall/spoonerizer/420f218109903b1af4f13a2ffa5545aa2ccd74ad/Godeps/_workspace/src/golang.org/x/net/html/charset/testdata/UTF-16LE-BOM.html -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/charset/testdata/UTF-8-BOM-vs-meta-charset.html: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | UTF-8 BOM vs meta charset 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 15 | 16 | 17 | 18 |

UTF-8 BOM vs meta charset

19 | 20 | 21 |
22 | 23 | 24 |
 
25 | 26 | 27 | 28 | 29 | 30 |
31 |

A page with a UTF-8 BOM will be recognized as UTF-8 even if the meta charset attribute declares a different encoding.

32 |

The page contains an encoding declaration in a meta charset attribute that attempts to set the character encoding to ISO 8859-15, but the file starts with a UTF-8 signature.

The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector .test div.ýäè. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.

33 |
34 |
35 |
Next test
HTML5
36 |

the-input-byte-stream-038
Result summary & related tests
Detailed results for this test
Link to spec

37 |
Assumptions:
39 |
40 | 45 | 46 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/charset/testdata/UTF-8-BOM-vs-meta-content.html: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | UTF-8 BOM vs meta content 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 14 | 15 | 16 | 17 |

UTF-8 BOM vs meta content

18 | 19 | 20 |
21 | 22 | 23 |
 
24 | 25 | 26 | 27 | 28 | 29 |
30 |

A page with a UTF-8 BOM will be recognized as UTF-8 even if the meta content attribute declares a different encoding.

31 |

The page contains an encoding declaration in a meta content attribute that attempts to set the character encoding to ISO 8859-15, but the file starts with a UTF-8 signature.

The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector .test div.ýäè. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.

32 |
33 |
34 |
Next test
HTML5
35 |

the-input-byte-stream-037
Result summary & related tests
Detailed results for this test
Link to spec

36 |
Assumptions:
38 |
39 | 44 | 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/charset/testdata/meta-charset-attribute.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | meta charset attribute 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 14 | 15 | 16 | 17 |

meta charset attribute

18 | 19 | 20 |
21 | 22 | 23 |
 
24 | 25 | 26 | 27 | 28 | 29 |
30 |

The character encoding of the page can be set by a meta element with charset attribute.

31 |

The only character encoding declaration for this HTML file is in the charset attribute of the meta element, which declares the encoding to be ISO 8859-15.

The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector .test div.ÜÀÚ. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.

32 |
33 |
34 |
Next test
HTML5
35 |

the-input-byte-stream-009
Result summary & related tests
Detailed results for this test
Link to spec

36 |
Assumptions:
38 |
39 | 44 | 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/charset/testdata/meta-content-attribute.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | meta content attribute 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 14 | 15 | 16 | 17 |

meta content attribute

18 | 19 | 20 |
21 | 22 | 23 |
 
24 | 25 | 26 | 27 | 28 | 29 |
30 |

The character encoding of the page can be set by a meta element with http-equiv and content attributes.

31 |

The only character encoding declaration for this HTML file is in the content attribute of the meta element, which declares the encoding to be ISO 8859-15.

The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector .test div.ÜÀÚ. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.

32 |
33 |
34 |
Next test
HTML5
35 |

the-input-byte-stream-007
Result summary & related tests
Detailed results for this test
Link to spec

36 |
Assumptions:
38 |
39 | 44 | 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/const.go: -------------------------------------------------------------------------------- 1 | // Copyright 2011 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package html 6 | 7 | // Section 12.2.3.2 of the HTML5 specification says "The following elements 8 | // have varying levels of special parsing rules". 9 | // https://html.spec.whatwg.org/multipage/syntax.html#the-stack-of-open-elements 10 | var isSpecialElementMap = map[string]bool{ 11 | "address": true, 12 | "applet": true, 13 | "area": true, 14 | "article": true, 15 | "aside": true, 16 | "base": true, 17 | "basefont": true, 18 | "bgsound": true, 19 | "blockquote": true, 20 | "body": true, 21 | "br": true, 22 | "button": true, 23 | "caption": true, 24 | "center": true, 25 | "col": true, 26 | "colgroup": true, 27 | "dd": true, 28 | "details": true, 29 | "dir": true, 30 | "div": true, 31 | "dl": true, 32 | "dt": true, 33 | "embed": true, 34 | "fieldset": true, 35 | "figcaption": true, 36 | "figure": true, 37 | "footer": true, 38 | "form": true, 39 | "frame": true, 40 | "frameset": true, 41 | "h1": true, 42 | "h2": true, 43 | "h3": true, 44 | "h4": true, 45 | "h5": true, 46 | "h6": true, 47 | "head": true, 48 | "header": true, 49 | "hgroup": true, 50 | "hr": true, 51 | "html": true, 52 | "iframe": true, 53 | "img": true, 54 | "input": true, 55 | "isindex": true, 56 | "li": true, 57 | "link": true, 58 | "listing": true, 59 | "marquee": true, 60 | "menu": true, 61 | "meta": true, 62 | "nav": true, 63 | "noembed": true, 64 | "noframes": true, 65 | "noscript": true, 66 | "object": true, 67 | "ol": true, 68 | "p": true, 69 | "param": true, 70 | "plaintext": true, 71 | "pre": true, 72 | "script": true, 73 | "section": true, 74 | "select": true, 75 | "source": true, 76 | "style": true, 77 | "summary": true, 78 | "table": true, 79 | "tbody": true, 80 | "td": true, 81 | "template": true, 82 | "textarea": true, 83 | "tfoot": true, 84 | "th": true, 85 | "thead": true, 86 | "title": true, 87 | "tr": true, 88 | "track": true, 89 | "ul": true, 90 | "wbr": true, 91 | "xmp": true, 92 | } 93 | 94 | func isSpecialElement(element *Node) bool { 95 | switch element.Namespace { 96 | case "", "html": 97 | return isSpecialElementMap[element.Data] 98 | case "svg": 99 | return element.Data == "foreignObject" 100 | } 101 | return false 102 | } 103 | -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/doc.go: -------------------------------------------------------------------------------- 1 | // Copyright 2010 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | /* 6 | Package html implements an HTML5-compliant tokenizer and parser. 7 | 8 | Tokenization is done by creating a Tokenizer for an io.Reader r. It is the 9 | caller's responsibility to ensure that r provides UTF-8 encoded HTML. 10 | 11 | z := html.NewTokenizer(r) 12 | 13 | Given a Tokenizer z, the HTML is tokenized by repeatedly calling z.Next(), 14 | which parses the next token and returns its type, or an error: 15 | 16 | for { 17 | tt := z.Next() 18 | if tt == html.ErrorToken { 19 | // ... 20 | return ... 21 | } 22 | // Process the current token. 23 | } 24 | 25 | There are two APIs for retrieving the current token. The high-level API is to 26 | call Token; the low-level API is to call Text or TagName / TagAttr. Both APIs 27 | allow optionally calling Raw after Next but before Token, Text, TagName, or 28 | TagAttr. In EBNF notation, the valid call sequence per token is: 29 | 30 | Next {Raw} [ Token | Text | TagName {TagAttr} ] 31 | 32 | Token returns an independent data structure that completely describes a token. 33 | Entities (such as "<") are unescaped, tag names and attribute keys are 34 | lower-cased, and attributes are collected into a []Attribute. For example: 35 | 36 | for { 37 | if z.Next() == html.ErrorToken { 38 | // Returning io.EOF indicates success. 39 | return z.Err() 40 | } 41 | emitToken(z.Token()) 42 | } 43 | 44 | The low-level API performs fewer allocations and copies, but the contents of 45 | the []byte values returned by Text, TagName and TagAttr may change on the next 46 | call to Next. For example, to extract an HTML page's anchor text: 47 | 48 | depth := 0 49 | for { 50 | tt := z.Next() 51 | switch tt { 52 | case ErrorToken: 53 | return z.Err() 54 | case TextToken: 55 | if depth > 0 { 56 | // emitBytes should copy the []byte it receives, 57 | // if it doesn't process it immediately. 58 | emitBytes(z.Text()) 59 | } 60 | case StartTagToken, EndTagToken: 61 | tn, _ := z.TagName() 62 | if len(tn) == 1 && tn[0] == 'a' { 63 | if tt == StartTagToken { 64 | depth++ 65 | } else { 66 | depth-- 67 | } 68 | } 69 | } 70 | } 71 | 72 | Parsing is done by calling Parse with an io.Reader, which returns the root of 73 | the parse tree (the document element) as a *Node. It is the caller's 74 | responsibility to ensure that the Reader provides UTF-8 encoded HTML. For 75 | example, to process each anchor node in depth-first order: 76 | 77 | doc, err := html.Parse(r) 78 | if err != nil { 79 | // ... 80 | } 81 | var f func(*html.Node) 82 | f = func(n *html.Node) { 83 | if n.Type == html.ElementNode && n.Data == "a" { 84 | // Do something with n... 85 | } 86 | for c := n.FirstChild; c != nil; c = c.NextSibling { 87 | f(c) 88 | } 89 | } 90 | f(doc) 91 | 92 | The relevant specifications include: 93 | https://html.spec.whatwg.org/multipage/syntax.html and 94 | https://html.spec.whatwg.org/multipage/syntax.html#tokenization 95 | */ 96 | package html 97 | 98 | // The tokenization algorithm implemented by this package is not a line-by-line 99 | // transliteration of the relatively verbose state-machine in the WHATWG 100 | // specification. A more direct approach is used instead, where the program 101 | // counter implies the state, such as whether it is tokenizing a tag or a text 102 | // node. Specification compliance is verified by checking expected and actual 103 | // outputs over a test suite rather than aiming for algorithmic fidelity. 104 | 105 | // TODO(nigeltao): Does a DOM API belong in this package or a separate one? 106 | // TODO(nigeltao): How does parsing interact with a JavaScript engine? 107 | -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/doctype.go: -------------------------------------------------------------------------------- 1 | // Copyright 2011 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package html 6 | 7 | import ( 8 | "strings" 9 | ) 10 | 11 | // parseDoctype parses the data from a DoctypeToken into a name, 12 | // public identifier, and system identifier. It returns a Node whose Type 13 | // is DoctypeNode, whose Data is the name, and which has attributes 14 | // named "system" and "public" for the two identifiers if they were present. 15 | // quirks is whether the document should be parsed in "quirks mode". 16 | func parseDoctype(s string) (n *Node, quirks bool) { 17 | n = &Node{Type: DoctypeNode} 18 | 19 | // Find the name. 20 | space := strings.IndexAny(s, whitespace) 21 | if space == -1 { 22 | space = len(s) 23 | } 24 | n.Data = s[:space] 25 | // The comparison to "html" is case-sensitive. 26 | if n.Data != "html" { 27 | quirks = true 28 | } 29 | n.Data = strings.ToLower(n.Data) 30 | s = strings.TrimLeft(s[space:], whitespace) 31 | 32 | if len(s) < 6 { 33 | // It can't start with "PUBLIC" or "SYSTEM". 34 | // Ignore the rest of the string. 35 | return n, quirks || s != "" 36 | } 37 | 38 | key := strings.ToLower(s[:6]) 39 | s = s[6:] 40 | for key == "public" || key == "system" { 41 | s = strings.TrimLeft(s, whitespace) 42 | if s == "" { 43 | break 44 | } 45 | quote := s[0] 46 | if quote != '"' && quote != '\'' { 47 | break 48 | } 49 | s = s[1:] 50 | q := strings.IndexRune(s, rune(quote)) 51 | var id string 52 | if q == -1 { 53 | id = s 54 | s = "" 55 | } else { 56 | id = s[:q] 57 | s = s[q+1:] 58 | } 59 | n.Attr = append(n.Attr, Attribute{Key: key, Val: id}) 60 | if key == "public" { 61 | key = "system" 62 | } else { 63 | key = "" 64 | } 65 | } 66 | 67 | if key != "" || s != "" { 68 | quirks = true 69 | } else if len(n.Attr) > 0 { 70 | if n.Attr[0].Key == "public" { 71 | public := strings.ToLower(n.Attr[0].Val) 72 | switch public { 73 | case "-//w3o//dtd w3 html strict 3.0//en//", "-/w3d/dtd html 4.0 transitional/en", "html": 74 | quirks = true 75 | default: 76 | for _, q := range quirkyIDs { 77 | if strings.HasPrefix(public, q) { 78 | quirks = true 79 | break 80 | } 81 | } 82 | } 83 | // The following two public IDs only cause quirks mode if there is no system ID. 84 | if len(n.Attr) == 1 && (strings.HasPrefix(public, "-//w3c//dtd html 4.01 frameset//") || 85 | strings.HasPrefix(public, "-//w3c//dtd html 4.01 transitional//")) { 86 | quirks = true 87 | } 88 | } 89 | if lastAttr := n.Attr[len(n.Attr)-1]; lastAttr.Key == "system" && 90 | strings.ToLower(lastAttr.Val) == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" { 91 | quirks = true 92 | } 93 | } 94 | 95 | return n, quirks 96 | } 97 | 98 | // quirkyIDs is a list of public doctype identifiers that cause a document 99 | // to be interpreted in quirks mode. The identifiers should be in lower case. 100 | var quirkyIDs = []string{ 101 | "+//silmaril//dtd html pro v0r11 19970101//", 102 | "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", 103 | "-//as//dtd html 3.0 aswedit + extensions//", 104 | "-//ietf//dtd html 2.0 level 1//", 105 | "-//ietf//dtd html 2.0 level 2//", 106 | "-//ietf//dtd html 2.0 strict level 1//", 107 | "-//ietf//dtd html 2.0 strict level 2//", 108 | "-//ietf//dtd html 2.0 strict//", 109 | "-//ietf//dtd html 2.0//", 110 | "-//ietf//dtd html 2.1e//", 111 | "-//ietf//dtd html 3.0//", 112 | "-//ietf//dtd html 3.2 final//", 113 | "-//ietf//dtd html 3.2//", 114 | "-//ietf//dtd html 3//", 115 | "-//ietf//dtd html level 0//", 116 | "-//ietf//dtd html level 1//", 117 | "-//ietf//dtd html level 2//", 118 | "-//ietf//dtd html level 3//", 119 | "-//ietf//dtd html strict level 0//", 120 | "-//ietf//dtd html strict level 1//", 121 | "-//ietf//dtd html strict level 2//", 122 | "-//ietf//dtd html strict level 3//", 123 | "-//ietf//dtd html strict//", 124 | "-//ietf//dtd html//", 125 | "-//metrius//dtd metrius presentational//", 126 | "-//microsoft//dtd internet explorer 2.0 html strict//", 127 | "-//microsoft//dtd internet explorer 2.0 html//", 128 | "-//microsoft//dtd internet explorer 2.0 tables//", 129 | "-//microsoft//dtd internet explorer 3.0 html strict//", 130 | "-//microsoft//dtd internet explorer 3.0 html//", 131 | "-//microsoft//dtd internet explorer 3.0 tables//", 132 | "-//netscape comm. corp.//dtd html//", 133 | "-//netscape comm. corp.//dtd strict html//", 134 | "-//o'reilly and associates//dtd html 2.0//", 135 | "-//o'reilly and associates//dtd html extended 1.0//", 136 | "-//o'reilly and associates//dtd html extended relaxed 1.0//", 137 | "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", 138 | "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", 139 | "-//spyglass//dtd html 2.0 extended//", 140 | "-//sq//dtd html 2.0 hotmetal + extensions//", 141 | "-//sun microsystems corp.//dtd hotjava html//", 142 | "-//sun microsystems corp.//dtd hotjava strict html//", 143 | "-//w3c//dtd html 3 1995-03-24//", 144 | "-//w3c//dtd html 3.2 draft//", 145 | "-//w3c//dtd html 3.2 final//", 146 | "-//w3c//dtd html 3.2//", 147 | "-//w3c//dtd html 3.2s draft//", 148 | "-//w3c//dtd html 4.0 frameset//", 149 | "-//w3c//dtd html 4.0 transitional//", 150 | "-//w3c//dtd html experimental 19960712//", 151 | "-//w3c//dtd html experimental 970421//", 152 | "-//w3c//dtd w3 html//", 153 | "-//w3o//dtd w3 html 3.0//", 154 | "-//webtechs//dtd mozilla html 2.0//", 155 | "-//webtechs//dtd mozilla html//", 156 | } 157 | -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/entity_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2010 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package html 6 | 7 | import ( 8 | "testing" 9 | "unicode/utf8" 10 | ) 11 | 12 | func TestEntityLength(t *testing.T) { 13 | // We verify that the length of UTF-8 encoding of each value is <= 1 + len(key). 14 | // The +1 comes from the leading "&". This property implies that the length of 15 | // unescaped text is <= the length of escaped text. 16 | for k, v := range entity { 17 | if 1+len(k) < utf8.RuneLen(v) { 18 | t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v)) 19 | } 20 | if len(k) > longestEntityWithoutSemicolon && k[len(k)-1] != ';' { 21 | t.Errorf("entity name %s is %d characters, but longestEntityWithoutSemicolon=%d", k, len(k), longestEntityWithoutSemicolon) 22 | } 23 | } 24 | for k, v := range entity2 { 25 | if 1+len(k) < utf8.RuneLen(v[0])+utf8.RuneLen(v[1]) { 26 | t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v[0]) + string(v[1])) 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/escape.go: -------------------------------------------------------------------------------- 1 | // Copyright 2010 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package html 6 | 7 | import ( 8 | "bytes" 9 | "strings" 10 | "unicode/utf8" 11 | ) 12 | 13 | // These replacements permit compatibility with old numeric entities that 14 | // assumed Windows-1252 encoding. 15 | // https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference 16 | var replacementTable = [...]rune{ 17 | '\u20AC', // First entry is what 0x80 should be replaced with. 18 | '\u0081', 19 | '\u201A', 20 | '\u0192', 21 | '\u201E', 22 | '\u2026', 23 | '\u2020', 24 | '\u2021', 25 | '\u02C6', 26 | '\u2030', 27 | '\u0160', 28 | '\u2039', 29 | '\u0152', 30 | '\u008D', 31 | '\u017D', 32 | '\u008F', 33 | '\u0090', 34 | '\u2018', 35 | '\u2019', 36 | '\u201C', 37 | '\u201D', 38 | '\u2022', 39 | '\u2013', 40 | '\u2014', 41 | '\u02DC', 42 | '\u2122', 43 | '\u0161', 44 | '\u203A', 45 | '\u0153', 46 | '\u009D', 47 | '\u017E', 48 | '\u0178', // Last entry is 0x9F. 49 | // 0x00->'\uFFFD' is handled programmatically. 50 | // 0x0D->'\u000D' is a no-op. 51 | } 52 | 53 | // unescapeEntity reads an entity like "<" from b[src:] and writes the 54 | // corresponding "<" to b[dst:], returning the incremented dst and src cursors. 55 | // Precondition: b[src] == '&' && dst <= src. 56 | // attribute should be true if parsing an attribute value. 57 | func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) { 58 | // https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference 59 | 60 | // i starts at 1 because we already know that s[0] == '&'. 61 | i, s := 1, b[src:] 62 | 63 | if len(s) <= 1 { 64 | b[dst] = b[src] 65 | return dst + 1, src + 1 66 | } 67 | 68 | if s[i] == '#' { 69 | if len(s) <= 3 { // We need to have at least "&#.". 70 | b[dst] = b[src] 71 | return dst + 1, src + 1 72 | } 73 | i++ 74 | c := s[i] 75 | hex := false 76 | if c == 'x' || c == 'X' { 77 | hex = true 78 | i++ 79 | } 80 | 81 | x := '\x00' 82 | for i < len(s) { 83 | c = s[i] 84 | i++ 85 | if hex { 86 | if '0' <= c && c <= '9' { 87 | x = 16*x + rune(c) - '0' 88 | continue 89 | } else if 'a' <= c && c <= 'f' { 90 | x = 16*x + rune(c) - 'a' + 10 91 | continue 92 | } else if 'A' <= c && c <= 'F' { 93 | x = 16*x + rune(c) - 'A' + 10 94 | continue 95 | } 96 | } else if '0' <= c && c <= '9' { 97 | x = 10*x + rune(c) - '0' 98 | continue 99 | } 100 | if c != ';' { 101 | i-- 102 | } 103 | break 104 | } 105 | 106 | if i <= 3 { // No characters matched. 107 | b[dst] = b[src] 108 | return dst + 1, src + 1 109 | } 110 | 111 | if 0x80 <= x && x <= 0x9F { 112 | // Replace characters from Windows-1252 with UTF-8 equivalents. 113 | x = replacementTable[x-0x80] 114 | } else if x == 0 || (0xD800 <= x && x <= 0xDFFF) || x > 0x10FFFF { 115 | // Replace invalid characters with the replacement character. 116 | x = '\uFFFD' 117 | } 118 | 119 | return dst + utf8.EncodeRune(b[dst:], x), src + i 120 | } 121 | 122 | // Consume the maximum number of characters possible, with the 123 | // consumed characters matching one of the named references. 124 | 125 | for i < len(s) { 126 | c := s[i] 127 | i++ 128 | // Lower-cased characters are more common in entities, so we check for them first. 129 | if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' { 130 | continue 131 | } 132 | if c != ';' { 133 | i-- 134 | } 135 | break 136 | } 137 | 138 | entityName := string(s[1:i]) 139 | if entityName == "" { 140 | // No-op. 141 | } else if attribute && entityName[len(entityName)-1] != ';' && len(s) > i && s[i] == '=' { 142 | // No-op. 143 | } else if x := entity[entityName]; x != 0 { 144 | return dst + utf8.EncodeRune(b[dst:], x), src + i 145 | } else if x := entity2[entityName]; x[0] != 0 { 146 | dst1 := dst + utf8.EncodeRune(b[dst:], x[0]) 147 | return dst1 + utf8.EncodeRune(b[dst1:], x[1]), src + i 148 | } else if !attribute { 149 | maxLen := len(entityName) - 1 150 | if maxLen > longestEntityWithoutSemicolon { 151 | maxLen = longestEntityWithoutSemicolon 152 | } 153 | for j := maxLen; j > 1; j-- { 154 | if x := entity[entityName[:j]]; x != 0 { 155 | return dst + utf8.EncodeRune(b[dst:], x), src + j + 1 156 | } 157 | } 158 | } 159 | 160 | dst1, src1 = dst+i, src+i 161 | copy(b[dst:dst1], b[src:src1]) 162 | return dst1, src1 163 | } 164 | 165 | // unescape unescapes b's entities in-place, so that "a<b" becomes "a': 214 | esc = ">" 215 | case '"': 216 | // """ is shorter than """. 217 | esc = """ 218 | case '\r': 219 | esc = " " 220 | default: 221 | panic("unrecognized escape character") 222 | } 223 | s = s[i+1:] 224 | if _, err := w.WriteString(esc); err != nil { 225 | return err 226 | } 227 | i = strings.IndexAny(s, escapedChars) 228 | } 229 | _, err := w.WriteString(s) 230 | return err 231 | } 232 | 233 | // EscapeString escapes special characters like "<" to become "<". It 234 | // escapes only five such characters: <, >, &, ' and ". 235 | // UnescapeString(EscapeString(s)) == s always holds, but the converse isn't 236 | // always true. 237 | func EscapeString(s string) string { 238 | if strings.IndexAny(s, escapedChars) == -1 { 239 | return s 240 | } 241 | var buf bytes.Buffer 242 | escape(&buf, s) 243 | return buf.String() 244 | } 245 | 246 | // UnescapeString unescapes entities like "<" to become "<". It unescapes a 247 | // larger range of entities than EscapeString escapes. For example, "á" 248 | // unescapes to "á", as does "á" and "&xE1;". 249 | // UnescapeString(EscapeString(s)) == s always holds, but the converse isn't 250 | // always true. 251 | func UnescapeString(s string) string { 252 | for _, c := range s { 253 | if c == '&' { 254 | return string(unescape([]byte(s), false)) 255 | } 256 | } 257 | return s 258 | } 259 | -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/escape_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package html 6 | 7 | import "testing" 8 | 9 | type unescapeTest struct { 10 | // A short description of the test case. 11 | desc string 12 | // The HTML text. 13 | html string 14 | // The unescaped text. 15 | unescaped string 16 | } 17 | 18 | var unescapeTests = []unescapeTest{ 19 | // Handle no entities. 20 | { 21 | "copy", 22 | "A\ttext\nstring", 23 | "A\ttext\nstring", 24 | }, 25 | // Handle simple named entities. 26 | { 27 | "simple", 28 | "& > <", 29 | "& > <", 30 | }, 31 | // Handle hitting the end of the string. 32 | { 33 | "stringEnd", 34 | "& &", 35 | "& &", 36 | }, 37 | // Handle entities with two codepoints. 38 | { 39 | "multiCodepoint", 40 | "text ⋛︀ blah", 41 | "text \u22db\ufe00 blah", 42 | }, 43 | // Handle decimal numeric entities. 44 | { 45 | "decimalEntity", 46 | "Delta = Δ ", 47 | "Delta = Δ ", 48 | }, 49 | // Handle hexadecimal numeric entities. 50 | { 51 | "hexadecimalEntity", 52 | "Lambda = λ = λ ", 53 | "Lambda = λ = λ ", 54 | }, 55 | // Handle numeric early termination. 56 | { 57 | "numericEnds", 58 | "&# &#x €43 © = ©f = ©", 59 | "&# &#x €43 © = ©f = ©", 60 | }, 61 | // Handle numeric ISO-8859-1 entity replacements. 62 | { 63 | "numericReplacements", 64 | "Footnote‡", 65 | "Footnote‡", 66 | }, 67 | } 68 | 69 | func TestUnescape(t *testing.T) { 70 | for _, tt := range unescapeTests { 71 | unescaped := UnescapeString(tt.html) 72 | if unescaped != tt.unescaped { 73 | t.Errorf("TestUnescape %s: want %q, got %q", tt.desc, tt.unescaped, unescaped) 74 | } 75 | } 76 | } 77 | 78 | func TestUnescapeEscape(t *testing.T) { 79 | ss := []string{ 80 | ``, 81 | `abc def`, 82 | `a & b`, 83 | `a&b`, 84 | `a & b`, 85 | `"`, 86 | `"`, 87 | `"<&>"`, 88 | `"<&>"`, 89 | `3&5==1 && 0<1, "0<1", a+acute=á`, 90 | `The special characters are: <, >, &, ' and "`, 91 | } 92 | for _, s := range ss { 93 | if got := UnescapeString(EscapeString(s)); got != s { 94 | t.Errorf("got %q want %q", got, s) 95 | } 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/example_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2012 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // This example demonstrates parsing HTML data and walking the resulting tree. 6 | package html_test 7 | 8 | import ( 9 | "fmt" 10 | "log" 11 | "strings" 12 | 13 | "golang.org/x/net/html" 14 | ) 15 | 16 | func ExampleParse() { 17 | s := `

Links:

` 18 | doc, err := html.Parse(strings.NewReader(s)) 19 | if err != nil { 20 | log.Fatal(err) 21 | } 22 | var f func(*html.Node) 23 | f = func(n *html.Node) { 24 | if n.Type == html.ElementNode && n.Data == "a" { 25 | for _, a := range n.Attr { 26 | if a.Key == "href" { 27 | fmt.Println(a.Val) 28 | break 29 | } 30 | } 31 | } 32 | for c := n.FirstChild; c != nil; c = c.NextSibling { 33 | f(c) 34 | } 35 | } 36 | f(doc) 37 | // Output: 38 | // foo 39 | // /bar/baz 40 | } 41 | -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/foreign.go: -------------------------------------------------------------------------------- 1 | // Copyright 2011 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package html 6 | 7 | import ( 8 | "strings" 9 | ) 10 | 11 | func adjustAttributeNames(aa []Attribute, nameMap map[string]string) { 12 | for i := range aa { 13 | if newName, ok := nameMap[aa[i].Key]; ok { 14 | aa[i].Key = newName 15 | } 16 | } 17 | } 18 | 19 | func adjustForeignAttributes(aa []Attribute) { 20 | for i, a := range aa { 21 | if a.Key == "" || a.Key[0] != 'x' { 22 | continue 23 | } 24 | switch a.Key { 25 | case "xlink:actuate", "xlink:arcrole", "xlink:href", "xlink:role", "xlink:show", 26 | "xlink:title", "xlink:type", "xml:base", "xml:lang", "xml:space", "xmlns:xlink": 27 | j := strings.Index(a.Key, ":") 28 | aa[i].Namespace = a.Key[:j] 29 | aa[i].Key = a.Key[j+1:] 30 | } 31 | } 32 | } 33 | 34 | func htmlIntegrationPoint(n *Node) bool { 35 | if n.Type != ElementNode { 36 | return false 37 | } 38 | switch n.Namespace { 39 | case "math": 40 | if n.Data == "annotation-xml" { 41 | for _, a := range n.Attr { 42 | if a.Key == "encoding" { 43 | val := strings.ToLower(a.Val) 44 | if val == "text/html" || val == "application/xhtml+xml" { 45 | return true 46 | } 47 | } 48 | } 49 | } 50 | case "svg": 51 | switch n.Data { 52 | case "desc", "foreignObject", "title": 53 | return true 54 | } 55 | } 56 | return false 57 | } 58 | 59 | func mathMLTextIntegrationPoint(n *Node) bool { 60 | if n.Namespace != "math" { 61 | return false 62 | } 63 | switch n.Data { 64 | case "mi", "mo", "mn", "ms", "mtext": 65 | return true 66 | } 67 | return false 68 | } 69 | 70 | // Section 12.2.5.5. 71 | var breakout = map[string]bool{ 72 | "b": true, 73 | "big": true, 74 | "blockquote": true, 75 | "body": true, 76 | "br": true, 77 | "center": true, 78 | "code": true, 79 | "dd": true, 80 | "div": true, 81 | "dl": true, 82 | "dt": true, 83 | "em": true, 84 | "embed": true, 85 | "h1": true, 86 | "h2": true, 87 | "h3": true, 88 | "h4": true, 89 | "h5": true, 90 | "h6": true, 91 | "head": true, 92 | "hr": true, 93 | "i": true, 94 | "img": true, 95 | "li": true, 96 | "listing": true, 97 | "menu": true, 98 | "meta": true, 99 | "nobr": true, 100 | "ol": true, 101 | "p": true, 102 | "pre": true, 103 | "ruby": true, 104 | "s": true, 105 | "small": true, 106 | "span": true, 107 | "strong": true, 108 | "strike": true, 109 | "sub": true, 110 | "sup": true, 111 | "table": true, 112 | "tt": true, 113 | "u": true, 114 | "ul": true, 115 | "var": true, 116 | } 117 | 118 | // Section 12.2.5.5. 119 | var svgTagNameAdjustments = map[string]string{ 120 | "altglyph": "altGlyph", 121 | "altglyphdef": "altGlyphDef", 122 | "altglyphitem": "altGlyphItem", 123 | "animatecolor": "animateColor", 124 | "animatemotion": "animateMotion", 125 | "animatetransform": "animateTransform", 126 | "clippath": "clipPath", 127 | "feblend": "feBlend", 128 | "fecolormatrix": "feColorMatrix", 129 | "fecomponenttransfer": "feComponentTransfer", 130 | "fecomposite": "feComposite", 131 | "feconvolvematrix": "feConvolveMatrix", 132 | "fediffuselighting": "feDiffuseLighting", 133 | "fedisplacementmap": "feDisplacementMap", 134 | "fedistantlight": "feDistantLight", 135 | "feflood": "feFlood", 136 | "fefunca": "feFuncA", 137 | "fefuncb": "feFuncB", 138 | "fefuncg": "feFuncG", 139 | "fefuncr": "feFuncR", 140 | "fegaussianblur": "feGaussianBlur", 141 | "feimage": "feImage", 142 | "femerge": "feMerge", 143 | "femergenode": "feMergeNode", 144 | "femorphology": "feMorphology", 145 | "feoffset": "feOffset", 146 | "fepointlight": "fePointLight", 147 | "fespecularlighting": "feSpecularLighting", 148 | "fespotlight": "feSpotLight", 149 | "fetile": "feTile", 150 | "feturbulence": "feTurbulence", 151 | "foreignobject": "foreignObject", 152 | "glyphref": "glyphRef", 153 | "lineargradient": "linearGradient", 154 | "radialgradient": "radialGradient", 155 | "textpath": "textPath", 156 | } 157 | 158 | // Section 12.2.5.1 159 | var mathMLAttributeAdjustments = map[string]string{ 160 | "definitionurl": "definitionURL", 161 | } 162 | 163 | var svgAttributeAdjustments = map[string]string{ 164 | "attributename": "attributeName", 165 | "attributetype": "attributeType", 166 | "basefrequency": "baseFrequency", 167 | "baseprofile": "baseProfile", 168 | "calcmode": "calcMode", 169 | "clippathunits": "clipPathUnits", 170 | "contentscripttype": "contentScriptType", 171 | "contentstyletype": "contentStyleType", 172 | "diffuseconstant": "diffuseConstant", 173 | "edgemode": "edgeMode", 174 | "externalresourcesrequired": "externalResourcesRequired", 175 | "filterres": "filterRes", 176 | "filterunits": "filterUnits", 177 | "glyphref": "glyphRef", 178 | "gradienttransform": "gradientTransform", 179 | "gradientunits": "gradientUnits", 180 | "kernelmatrix": "kernelMatrix", 181 | "kernelunitlength": "kernelUnitLength", 182 | "keypoints": "keyPoints", 183 | "keysplines": "keySplines", 184 | "keytimes": "keyTimes", 185 | "lengthadjust": "lengthAdjust", 186 | "limitingconeangle": "limitingConeAngle", 187 | "markerheight": "markerHeight", 188 | "markerunits": "markerUnits", 189 | "markerwidth": "markerWidth", 190 | "maskcontentunits": "maskContentUnits", 191 | "maskunits": "maskUnits", 192 | "numoctaves": "numOctaves", 193 | "pathlength": "pathLength", 194 | "patterncontentunits": "patternContentUnits", 195 | "patterntransform": "patternTransform", 196 | "patternunits": "patternUnits", 197 | "pointsatx": "pointsAtX", 198 | "pointsaty": "pointsAtY", 199 | "pointsatz": "pointsAtZ", 200 | "preservealpha": "preserveAlpha", 201 | "preserveaspectratio": "preserveAspectRatio", 202 | "primitiveunits": "primitiveUnits", 203 | "refx": "refX", 204 | "refy": "refY", 205 | "repeatcount": "repeatCount", 206 | "repeatdur": "repeatDur", 207 | "requiredextensions": "requiredExtensions", 208 | "requiredfeatures": "requiredFeatures", 209 | "specularconstant": "specularConstant", 210 | "specularexponent": "specularExponent", 211 | "spreadmethod": "spreadMethod", 212 | "startoffset": "startOffset", 213 | "stddeviation": "stdDeviation", 214 | "stitchtiles": "stitchTiles", 215 | "surfacescale": "surfaceScale", 216 | "systemlanguage": "systemLanguage", 217 | "tablevalues": "tableValues", 218 | "targetx": "targetX", 219 | "targety": "targetY", 220 | "textlength": "textLength", 221 | "viewbox": "viewBox", 222 | "viewtarget": "viewTarget", 223 | "xchannelselector": "xChannelSelector", 224 | "ychannelselector": "yChannelSelector", 225 | "zoomandpan": "zoomAndPan", 226 | } 227 | -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/node.go: -------------------------------------------------------------------------------- 1 | // Copyright 2011 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package html 6 | 7 | import ( 8 | "golang.org/x/net/html/atom" 9 | ) 10 | 11 | // A NodeType is the type of a Node. 12 | type NodeType uint32 13 | 14 | const ( 15 | ErrorNode NodeType = iota 16 | TextNode 17 | DocumentNode 18 | ElementNode 19 | CommentNode 20 | DoctypeNode 21 | scopeMarkerNode 22 | ) 23 | 24 | // Section 12.2.3.3 says "scope markers are inserted when entering applet 25 | // elements, buttons, object elements, marquees, table cells, and table 26 | // captions, and are used to prevent formatting from 'leaking'". 27 | var scopeMarker = Node{Type: scopeMarkerNode} 28 | 29 | // A Node consists of a NodeType and some Data (tag name for element nodes, 30 | // content for text) and are part of a tree of Nodes. Element nodes may also 31 | // have a Namespace and contain a slice of Attributes. Data is unescaped, so 32 | // that it looks like "a 0 { 160 | return (*s)[i-1] 161 | } 162 | return nil 163 | } 164 | 165 | // index returns the index of the top-most occurrence of n in the stack, or -1 166 | // if n is not present. 167 | func (s *nodeStack) index(n *Node) int { 168 | for i := len(*s) - 1; i >= 0; i-- { 169 | if (*s)[i] == n { 170 | return i 171 | } 172 | } 173 | return -1 174 | } 175 | 176 | // insert inserts a node at the given index. 177 | func (s *nodeStack) insert(i int, n *Node) { 178 | (*s) = append(*s, nil) 179 | copy((*s)[i+1:], (*s)[i:]) 180 | (*s)[i] = n 181 | } 182 | 183 | // remove removes a node from the stack. It is a no-op if n is not present. 184 | func (s *nodeStack) remove(n *Node) { 185 | i := s.index(n) 186 | if i == -1 { 187 | return 188 | } 189 | copy((*s)[i:], (*s)[i+1:]) 190 | j := len(*s) - 1 191 | (*s)[j] = nil 192 | *s = (*s)[:j] 193 | } 194 | -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/node_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2010 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package html 6 | 7 | import ( 8 | "fmt" 9 | ) 10 | 11 | // checkTreeConsistency checks that a node and its descendants are all 12 | // consistent in their parent/child/sibling relationships. 13 | func checkTreeConsistency(n *Node) error { 14 | return checkTreeConsistency1(n, 0) 15 | } 16 | 17 | func checkTreeConsistency1(n *Node, depth int) error { 18 | if depth == 1e4 { 19 | return fmt.Errorf("html: tree looks like it contains a cycle") 20 | } 21 | if err := checkNodeConsistency(n); err != nil { 22 | return err 23 | } 24 | for c := n.FirstChild; c != nil; c = c.NextSibling { 25 | if err := checkTreeConsistency1(c, depth+1); err != nil { 26 | return err 27 | } 28 | } 29 | return nil 30 | } 31 | 32 | // checkNodeConsistency checks that a node's parent/child/sibling relationships 33 | // are consistent. 34 | func checkNodeConsistency(n *Node) error { 35 | if n == nil { 36 | return nil 37 | } 38 | 39 | nParent := 0 40 | for p := n.Parent; p != nil; p = p.Parent { 41 | nParent++ 42 | if nParent == 1e4 { 43 | return fmt.Errorf("html: parent list looks like an infinite loop") 44 | } 45 | } 46 | 47 | nForward := 0 48 | for c := n.FirstChild; c != nil; c = c.NextSibling { 49 | nForward++ 50 | if nForward == 1e6 { 51 | return fmt.Errorf("html: forward list of children looks like an infinite loop") 52 | } 53 | if c.Parent != n { 54 | return fmt.Errorf("html: inconsistent child/parent relationship") 55 | } 56 | } 57 | 58 | nBackward := 0 59 | for c := n.LastChild; c != nil; c = c.PrevSibling { 60 | nBackward++ 61 | if nBackward == 1e6 { 62 | return fmt.Errorf("html: backward list of children looks like an infinite loop") 63 | } 64 | if c.Parent != n { 65 | return fmt.Errorf("html: inconsistent child/parent relationship") 66 | } 67 | } 68 | 69 | if n.Parent != nil { 70 | if n.Parent == n { 71 | return fmt.Errorf("html: inconsistent parent relationship") 72 | } 73 | if n.Parent == n.FirstChild { 74 | return fmt.Errorf("html: inconsistent parent/first relationship") 75 | } 76 | if n.Parent == n.LastChild { 77 | return fmt.Errorf("html: inconsistent parent/last relationship") 78 | } 79 | if n.Parent == n.PrevSibling { 80 | return fmt.Errorf("html: inconsistent parent/prev relationship") 81 | } 82 | if n.Parent == n.NextSibling { 83 | return fmt.Errorf("html: inconsistent parent/next relationship") 84 | } 85 | 86 | parentHasNAsAChild := false 87 | for c := n.Parent.FirstChild; c != nil; c = c.NextSibling { 88 | if c == n { 89 | parentHasNAsAChild = true 90 | break 91 | } 92 | } 93 | if !parentHasNAsAChild { 94 | return fmt.Errorf("html: inconsistent parent/child relationship") 95 | } 96 | } 97 | 98 | if n.PrevSibling != nil && n.PrevSibling.NextSibling != n { 99 | return fmt.Errorf("html: inconsistent prev/next relationship") 100 | } 101 | if n.NextSibling != nil && n.NextSibling.PrevSibling != n { 102 | return fmt.Errorf("html: inconsistent next/prev relationship") 103 | } 104 | 105 | if (n.FirstChild == nil) != (n.LastChild == nil) { 106 | return fmt.Errorf("html: inconsistent first/last relationship") 107 | } 108 | if n.FirstChild != nil && n.FirstChild == n.LastChild { 109 | // We have a sole child. 110 | if n.FirstChild.PrevSibling != nil || n.FirstChild.NextSibling != nil { 111 | return fmt.Errorf("html: inconsistent sole child's sibling relationship") 112 | } 113 | } 114 | 115 | seen := map[*Node]bool{} 116 | 117 | var last *Node 118 | for c := n.FirstChild; c != nil; c = c.NextSibling { 119 | if seen[c] { 120 | return fmt.Errorf("html: inconsistent repeated child") 121 | } 122 | seen[c] = true 123 | last = c 124 | } 125 | if last != n.LastChild { 126 | return fmt.Errorf("html: inconsistent last relationship") 127 | } 128 | 129 | var first *Node 130 | for c := n.LastChild; c != nil; c = c.PrevSibling { 131 | if !seen[c] { 132 | return fmt.Errorf("html: inconsistent missing child") 133 | } 134 | delete(seen, c) 135 | first = c 136 | } 137 | if first != n.FirstChild { 138 | return fmt.Errorf("html: inconsistent first relationship") 139 | } 140 | 141 | if len(seen) != 0 { 142 | return fmt.Errorf("html: inconsistent forwards/backwards child list") 143 | } 144 | 145 | return nil 146 | } 147 | -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/render_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2010 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package html 6 | 7 | import ( 8 | "bytes" 9 | "testing" 10 | ) 11 | 12 | func TestRenderer(t *testing.T) { 13 | nodes := [...]*Node{ 14 | 0: { 15 | Type: ElementNode, 16 | Data: "html", 17 | }, 18 | 1: { 19 | Type: ElementNode, 20 | Data: "head", 21 | }, 22 | 2: { 23 | Type: ElementNode, 24 | Data: "body", 25 | }, 26 | 3: { 27 | Type: TextNode, 28 | Data: "0<1", 29 | }, 30 | 4: { 31 | Type: ElementNode, 32 | Data: "p", 33 | Attr: []Attribute{ 34 | { 35 | Key: "id", 36 | Val: "A", 37 | }, 38 | { 39 | Key: "foo", 40 | Val: `abc"def`, 41 | }, 42 | }, 43 | }, 44 | 5: { 45 | Type: TextNode, 46 | Data: "2", 47 | }, 48 | 6: { 49 | Type: ElementNode, 50 | Data: "b", 51 | Attr: []Attribute{ 52 | { 53 | Key: "empty", 54 | Val: "", 55 | }, 56 | }, 57 | }, 58 | 7: { 59 | Type: TextNode, 60 | Data: "3", 61 | }, 62 | 8: { 63 | Type: ElementNode, 64 | Data: "i", 65 | Attr: []Attribute{ 66 | { 67 | Key: "backslash", 68 | Val: `\`, 69 | }, 70 | }, 71 | }, 72 | 9: { 73 | Type: TextNode, 74 | Data: "&4", 75 | }, 76 | 10: { 77 | Type: TextNode, 78 | Data: "5", 79 | }, 80 | 11: { 81 | Type: ElementNode, 82 | Data: "blockquote", 83 | }, 84 | 12: { 85 | Type: ElementNode, 86 | Data: "br", 87 | }, 88 | 13: { 89 | Type: TextNode, 90 | Data: "6", 91 | }, 92 | } 93 | 94 | // Build a tree out of those nodes, based on a textual representation. 95 | // Only the ".\t"s are significant. The trailing HTML-like text is 96 | // just commentary. The "0:" prefixes are for easy cross-reference with 97 | // the nodes array. 98 | treeAsText := [...]string{ 99 | 0: ``, 100 | 1: `. `, 101 | 2: `. `, 102 | 3: `. . "0<1"`, 103 | 4: `. .

`, 104 | 5: `. . . "2"`, 105 | 6: `. . . `, 106 | 7: `. . . . "3"`, 107 | 8: `. . . `, 108 | 9: `. . . . "&4"`, 109 | 10: `. . "5"`, 110 | 11: `. .

`, 111 | 12: `. .
`, 112 | 13: `. . "6"`, 113 | } 114 | if len(nodes) != len(treeAsText) { 115 | t.Fatal("len(nodes) != len(treeAsText)") 116 | } 117 | var stack [8]*Node 118 | for i, line := range treeAsText { 119 | level := 0 120 | for line[0] == '.' { 121 | // Strip a leading ".\t". 122 | line = line[2:] 123 | level++ 124 | } 125 | n := nodes[i] 126 | if level == 0 { 127 | if stack[0] != nil { 128 | t.Fatal("multiple root nodes") 129 | } 130 | stack[0] = n 131 | } else { 132 | stack[level-1].AppendChild(n) 133 | stack[level] = n 134 | for i := level + 1; i < len(stack); i++ { 135 | stack[i] = nil 136 | } 137 | } 138 | // At each stage of tree construction, we check all nodes for consistency. 139 | for j, m := range nodes { 140 | if err := checkNodeConsistency(m); err != nil { 141 | t.Fatalf("i=%d, j=%d: %v", i, j, err) 142 | } 143 | } 144 | } 145 | 146 | want := `0<1

` + 147 | `23&4

` + 148 | `5

6` 149 | b := new(bytes.Buffer) 150 | if err := Render(b, nodes[0]); err != nil { 151 | t.Fatal(err) 152 | } 153 | if got := b.String(); got != want { 154 | t.Errorf("got vs want:\n%s\n%s\n", got, want) 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/README: -------------------------------------------------------------------------------- 1 | The *.dat files in this directory are copied from The WebKit Open Source 2 | Project, specifically $WEBKITROOT/LayoutTests/html5lib/resources. 3 | WebKit is licensed under a BSD style license. 4 | http://webkit.org/coding/bsd-license.html says: 5 | 6 | Copyright (C) 2009 Apple Inc. All rights reserved. 7 | 8 | Redistribution and use in source and binary forms, with or without 9 | modification, are permitted provided that the following conditions are met: 10 | 11 | 1. Redistributions of source code must retain the above copyright notice, 12 | this list of conditions and the following disclaimer. 13 | 14 | 2. Redistributions in binary form must reproduce the above copyright notice, 15 | this list of conditions and the following disclaimer in the documentation 16 | and/or other materials provided with the distribution. 17 | 18 | THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS "AS IS" AND ANY 19 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY 22 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 25 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | -------------------------------------------------------------------------------- /Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/adoption01.dat: -------------------------------------------------------------------------------- 1 | #data 2 |

3 | #errors 4 | #document 5 | | 6 | | 7 | | 8 | | 9 | |

10 | | 11 | 12 | #data 13 | 1

23

14 | #errors 15 | #document 16 | | 17 | | 18 | | 19 | | 20 | | "1" 21 | |

22 | | 23 | | "2" 24 | | "3" 25 | 26 | #data 27 | 1 28 | #errors 29 | #document 30 | | 31 | | 32 | | 33 | | 34 | | "1" 35 | |