├── LICENSE ├── README.md ├── auto_chain.go ├── chain.go ├── doc.go ├── examples └── main.go ├── expr ├── auto_expr.go ├── bfs.go ├── checker.go ├── dfs.go ├── getter.go └── util.go ├── gen ├── .gitignore ├── gen.go ├── main.go ├── spec.go └── util.go ├── node.go ├── plain.go ├── pretty.go └── util.go /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Hǎiliàng Wáng. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without modification, 4 | are permitted provided that the following conditions are met: 5 | 6 | * Redistributions of source code must retain the above copyright notice, this 7 | * list of conditions and the following disclaimer. 8 | 9 | * Redistributions in binary form must reproduce the above copyright notice, 10 | * this list of conditions and the following disclaimer in the documentation 11 | * and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 20 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | html-query: A fluent and functional approach to querying HTML DOM 2 | ================================================================= 3 | 4 | [![GoDoc](https://godoc.org/h12.io/html-query?status.svg)](https://godoc.org/h12.io/html-query) 5 | 6 | html-query is a Go package that provides a fluent and functional interface for 7 | querying HTML DOM. It is based on [golang.org/x/net/html](https://godoc.org/golang.org/x/net/html). 8 | 9 | Examples 10 | ======== 11 | 12 | 1. A simple example (under "examples" directory) 13 | ``` 14 | r := get(`http://blog.golang.org/index`) 15 | defer r.Close() 16 | root, err := query.Parse(r) 17 | checkError(err) 18 | root.Div(Id("content")).Children(Class("blogtitle")).For(func(item *query.Node) { 19 | href := item.Ahref().Href() 20 | date := item.Span(Class("date")).Text() 21 | tags := item.Span(Class("tags")).Text() 22 | // ...... 23 | }) 24 | ``` 25 | 26 | 2. Generator of html-query (under "gen" directory) 27 | 28 | A large part of html-query is automatically generated from HTML spec. The 29 | spec is in HTML format, so the generator parses it using html-query itself. 30 | 31 | Design 32 | ====== 33 | 34 | Here is a simple explanation of the design of html-query. 35 | 36 | ### Functional query expressions 37 | 38 | All functional definitions are defined in html-query/expr package. 39 | 40 | 1. Checker and checker composition 41 | 42 | A checker is a function that accept and conditionally returns a *html.Node. 43 | ``` 44 | type Checker func(*html.Node) *html.Node 45 | ``` 46 | Here are some checker examples: 47 | ``` 48 | Id("id1") 49 | Class("c1") 50 | Div 51 | Abbr 52 | H1 53 | H2 54 | ``` 55 | Checkers can be combined as boolean expressions: 56 | ``` 57 | And(Id("id1"), Class("c1")) 58 | Or(Class("c1"), Class("c2")) 59 | And(Class("c1"), Not(Class("c2"))) 60 | ``` 61 | 2. Checker builder 62 | 63 | A checker builder is a function that returns a checker. "Id", "Class", "And", 64 | "Or", "Not" shown above are all checker builders. There are also some checker 65 | builder builder (function that returns a checker builder) defined in 66 | html-query when needed. 67 | 68 | ### Fluent interface 69 | 70 | Fluent interface (http://en.wikipedia.org/wiki/Fluent_interface) are defined in 71 | html-query package. 72 | 73 | 1. Root node 74 | 75 | Function Parse returns the root node of an html document. 76 | 77 | 2. Node finder 78 | 79 | Method Node.Find implements a BFS search for a node, e.g. 80 | ``` 81 | node.Find(Div, Class("id1")) 82 | ``` 83 | But usually you can write the short form: 84 | ``` 85 | node.Div(Class("id1")) 86 | ``` 87 | 3. Attribute getter 88 | 89 | Method Node.Attr can be used to get the value (or a regular expression 90 | submatch of the value) of a node, e.g. 91 | ``` 92 | node.Attr("Id") 93 | node.Attr("href", "\(.*)") 94 | ``` 95 | But usually you can write the short form: 96 | ``` 97 | node.Id() 98 | node.Href("\(.*)") 99 | ``` 100 | 4. Node iterator 101 | 102 | Method Node.Children and Node.Descendants each returns a node iterator 103 | (NodeIter). Method NodeIter.For can be used to loop through these nodes. 104 | 105 | Alternative 106 | =========== 107 | If you prefer a jquery like DSL rather than functional way, you might want to 108 | try goquery: https://github.com/PuerkitoBio/goquery. 109 | -------------------------------------------------------------------------------- /auto_chain.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017, Hǎiliàng Wáng. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package query 6 | 7 | import ( 8 | . "h12.io/html-query/expr" 9 | ) 10 | 11 | func (n *Node) A(cs ...Checker) *Node { 12 | return n.find(A, cs) 13 | } 14 | 15 | func (n *Node) Abbr(cs ...Checker) *Node { 16 | return n.find(Abbr, cs) 17 | } 18 | 19 | func (n *Node) Address(cs ...Checker) *Node { 20 | return n.find(Address, cs) 21 | } 22 | 23 | func (n *Node) Area(cs ...Checker) *Node { 24 | return n.find(Area, cs) 25 | } 26 | 27 | func (n *Node) Article(cs ...Checker) *Node { 28 | return n.find(Article, cs) 29 | } 30 | 31 | func (n *Node) Aside(cs ...Checker) *Node { 32 | return n.find(Aside, cs) 33 | } 34 | 35 | func (n *Node) Audio(cs ...Checker) *Node { 36 | return n.find(Audio, cs) 37 | } 38 | 39 | func (n *Node) B(cs ...Checker) *Node { 40 | return n.find(B, cs) 41 | } 42 | 43 | func (n *Node) Base(cs ...Checker) *Node { 44 | return n.find(Base, cs) 45 | } 46 | 47 | func (n *Node) Bdi(cs ...Checker) *Node { 48 | return n.find(Bdi, cs) 49 | } 50 | 51 | func (n *Node) Bdo(cs ...Checker) *Node { 52 | return n.find(Bdo, cs) 53 | } 54 | 55 | func (n *Node) Blockquote(cs ...Checker) *Node { 56 | return n.find(Blockquote, cs) 57 | } 58 | 59 | func (n *Node) Body(cs ...Checker) *Node { 60 | return n.find(Body, cs) 61 | } 62 | 63 | func (n *Node) Br(cs ...Checker) *Node { 64 | return n.find(Br, cs) 65 | } 66 | 67 | func (n *Node) Button(cs ...Checker) *Node { 68 | return n.find(Button, cs) 69 | } 70 | 71 | func (n *Node) Canvas(cs ...Checker) *Node { 72 | return n.find(Canvas, cs) 73 | } 74 | 75 | func (n *Node) Caption(cs ...Checker) *Node { 76 | return n.find(Caption, cs) 77 | } 78 | 79 | func (n *Node) Cite(cs ...Checker) *Node { 80 | return n.find(Cite, cs) 81 | } 82 | 83 | func (n *Node) Code(cs ...Checker) *Node { 84 | return n.find(Code, cs) 85 | } 86 | 87 | func (n *Node) Col(cs ...Checker) *Node { 88 | return n.find(Col, cs) 89 | } 90 | 91 | func (n *Node) Colgroup(cs ...Checker) *Node { 92 | return n.find(Colgroup, cs) 93 | } 94 | 95 | func (n *Node) Data(cs ...Checker) *Node { 96 | return n.find(Data, cs) 97 | } 98 | 99 | func (n *Node) Datalist(cs ...Checker) *Node { 100 | return n.find(Datalist, cs) 101 | } 102 | 103 | func (n *Node) Dd(cs ...Checker) *Node { 104 | return n.find(Dd, cs) 105 | } 106 | 107 | func (n *Node) Del(cs ...Checker) *Node { 108 | return n.find(Del, cs) 109 | } 110 | 111 | func (n *Node) Details(cs ...Checker) *Node { 112 | return n.find(Details, cs) 113 | } 114 | 115 | func (n *Node) Dfn(cs ...Checker) *Node { 116 | return n.find(Dfn, cs) 117 | } 118 | 119 | func (n *Node) Dialog(cs ...Checker) *Node { 120 | return n.find(Dialog, cs) 121 | } 122 | 123 | func (n *Node) Div(cs ...Checker) *Node { 124 | return n.find(Div, cs) 125 | } 126 | 127 | func (n *Node) Dl(cs ...Checker) *Node { 128 | return n.find(Dl, cs) 129 | } 130 | 131 | func (n *Node) Dt(cs ...Checker) *Node { 132 | return n.find(Dt, cs) 133 | } 134 | 135 | func (n *Node) Em(cs ...Checker) *Node { 136 | return n.find(Em, cs) 137 | } 138 | 139 | func (n *Node) Embed(cs ...Checker) *Node { 140 | return n.find(Embed, cs) 141 | } 142 | 143 | func (n *Node) Fieldset(cs ...Checker) *Node { 144 | return n.find(Fieldset, cs) 145 | } 146 | 147 | func (n *Node) Figcaption(cs ...Checker) *Node { 148 | return n.find(Figcaption, cs) 149 | } 150 | 151 | func (n *Node) Figure(cs ...Checker) *Node { 152 | return n.find(Figure, cs) 153 | } 154 | 155 | func (n *Node) Footer(cs ...Checker) *Node { 156 | return n.find(Footer, cs) 157 | } 158 | 159 | func (n *Node) Form(cs ...Checker) *Node { 160 | return n.find(Form, cs) 161 | } 162 | 163 | func (n *Node) H1(cs ...Checker) *Node { 164 | return n.find(H1, cs) 165 | } 166 | 167 | func (n *Node) H2(cs ...Checker) *Node { 168 | return n.find(H2, cs) 169 | } 170 | 171 | func (n *Node) H3(cs ...Checker) *Node { 172 | return n.find(H3, cs) 173 | } 174 | 175 | func (n *Node) H4(cs ...Checker) *Node { 176 | return n.find(H4, cs) 177 | } 178 | 179 | func (n *Node) H5(cs ...Checker) *Node { 180 | return n.find(H5, cs) 181 | } 182 | 183 | func (n *Node) H6(cs ...Checker) *Node { 184 | return n.find(H6, cs) 185 | } 186 | 187 | func (n *Node) Head(cs ...Checker) *Node { 188 | return n.find(Head, cs) 189 | } 190 | 191 | func (n *Node) Header(cs ...Checker) *Node { 192 | return n.find(Header, cs) 193 | } 194 | 195 | func (n *Node) Hgroup(cs ...Checker) *Node { 196 | return n.find(Hgroup, cs) 197 | } 198 | 199 | func (n *Node) Hr(cs ...Checker) *Node { 200 | return n.find(Hr, cs) 201 | } 202 | 203 | func (n *Node) Html(cs ...Checker) *Node { 204 | return n.find(Html, cs) 205 | } 206 | 207 | func (n *Node) I(cs ...Checker) *Node { 208 | return n.find(I, cs) 209 | } 210 | 211 | func (n *Node) Iframe(cs ...Checker) *Node { 212 | return n.find(Iframe, cs) 213 | } 214 | 215 | func (n *Node) Img(cs ...Checker) *Node { 216 | return n.find(Img, cs) 217 | } 218 | 219 | func (n *Node) Input(cs ...Checker) *Node { 220 | return n.find(Input, cs) 221 | } 222 | 223 | func (n *Node) Ins(cs ...Checker) *Node { 224 | return n.find(Ins, cs) 225 | } 226 | 227 | func (n *Node) Kbd(cs ...Checker) *Node { 228 | return n.find(Kbd, cs) 229 | } 230 | 231 | func (n *Node) Label(cs ...Checker) *Node { 232 | return n.find(Label, cs) 233 | } 234 | 235 | func (n *Node) Legend(cs ...Checker) *Node { 236 | return n.find(Legend, cs) 237 | } 238 | 239 | func (n *Node) Li(cs ...Checker) *Node { 240 | return n.find(Li, cs) 241 | } 242 | 243 | func (n *Node) Link(cs ...Checker) *Node { 244 | return n.find(Link, cs) 245 | } 246 | 247 | func (n *Node) Map(cs ...Checker) *Node { 248 | return n.find(Map, cs) 249 | } 250 | 251 | func (n *Node) Mark(cs ...Checker) *Node { 252 | return n.find(Mark, cs) 253 | } 254 | 255 | func (n *Node) Menu(cs ...Checker) *Node { 256 | return n.find(Menu, cs) 257 | } 258 | 259 | func (n *Node) Meta(cs ...Checker) *Node { 260 | return n.find(Meta, cs) 261 | } 262 | 263 | func (n *Node) Meter(cs ...Checker) *Node { 264 | return n.find(Meter, cs) 265 | } 266 | 267 | func (n *Node) Nav(cs ...Checker) *Node { 268 | return n.find(Nav, cs) 269 | } 270 | 271 | func (n *Node) Noscript(cs ...Checker) *Node { 272 | return n.find(Noscript, cs) 273 | } 274 | 275 | func (n *Node) Object(cs ...Checker) *Node { 276 | return n.find(Object, cs) 277 | } 278 | 279 | func (n *Node) Ol(cs ...Checker) *Node { 280 | return n.find(Ol, cs) 281 | } 282 | 283 | func (n *Node) Optgroup(cs ...Checker) *Node { 284 | return n.find(Optgroup, cs) 285 | } 286 | 287 | func (n *Node) Option(cs ...Checker) *Node { 288 | return n.find(Option, cs) 289 | } 290 | 291 | func (n *Node) Output(cs ...Checker) *Node { 292 | return n.find(Output, cs) 293 | } 294 | 295 | func (n *Node) P(cs ...Checker) *Node { 296 | return n.find(P, cs) 297 | } 298 | 299 | func (n *Node) Param(cs ...Checker) *Node { 300 | return n.find(Param, cs) 301 | } 302 | 303 | func (n *Node) Pre(cs ...Checker) *Node { 304 | return n.find(Pre, cs) 305 | } 306 | 307 | func (n *Node) Progress(cs ...Checker) *Node { 308 | return n.find(Progress, cs) 309 | } 310 | 311 | func (n *Node) Q(cs ...Checker) *Node { 312 | return n.find(Q, cs) 313 | } 314 | 315 | func (n *Node) Rp(cs ...Checker) *Node { 316 | return n.find(Rp, cs) 317 | } 318 | 319 | func (n *Node) Rt(cs ...Checker) *Node { 320 | return n.find(Rt, cs) 321 | } 322 | 323 | func (n *Node) Ruby(cs ...Checker) *Node { 324 | return n.find(Ruby, cs) 325 | } 326 | 327 | func (n *Node) S(cs ...Checker) *Node { 328 | return n.find(S, cs) 329 | } 330 | 331 | func (n *Node) Samp(cs ...Checker) *Node { 332 | return n.find(Samp, cs) 333 | } 334 | 335 | func (n *Node) Script(cs ...Checker) *Node { 336 | return n.find(Script, cs) 337 | } 338 | 339 | func (n *Node) Section(cs ...Checker) *Node { 340 | return n.find(Section, cs) 341 | } 342 | 343 | func (n *Node) Select(cs ...Checker) *Node { 344 | return n.find(Select, cs) 345 | } 346 | 347 | func (n *Node) Small(cs ...Checker) *Node { 348 | return n.find(Small, cs) 349 | } 350 | 351 | func (n *Node) Source(cs ...Checker) *Node { 352 | return n.find(Source, cs) 353 | } 354 | 355 | func (n *Node) Span(cs ...Checker) *Node { 356 | return n.find(Span, cs) 357 | } 358 | 359 | func (n *Node) Strong(cs ...Checker) *Node { 360 | return n.find(Strong, cs) 361 | } 362 | 363 | func (n *Node) Style(cs ...Checker) *Node { 364 | return n.find(Style, cs) 365 | } 366 | 367 | func (n *Node) Sub(cs ...Checker) *Node { 368 | return n.find(Sub, cs) 369 | } 370 | 371 | func (n *Node) Summary(cs ...Checker) *Node { 372 | return n.find(Summary, cs) 373 | } 374 | 375 | func (n *Node) Sup(cs ...Checker) *Node { 376 | return n.find(Sup, cs) 377 | } 378 | 379 | func (n *Node) Table(cs ...Checker) *Node { 380 | return n.find(Table, cs) 381 | } 382 | 383 | func (n *Node) Tbody(cs ...Checker) *Node { 384 | return n.find(Tbody, cs) 385 | } 386 | 387 | func (n *Node) Td(cs ...Checker) *Node { 388 | return n.find(Td, cs) 389 | } 390 | 391 | func (n *Node) Textarea(cs ...Checker) *Node { 392 | return n.find(Textarea, cs) 393 | } 394 | 395 | func (n *Node) Tfoot(cs ...Checker) *Node { 396 | return n.find(Tfoot, cs) 397 | } 398 | 399 | func (n *Node) Th(cs ...Checker) *Node { 400 | return n.find(Th, cs) 401 | } 402 | 403 | func (n *Node) Thead(cs ...Checker) *Node { 404 | return n.find(Thead, cs) 405 | } 406 | 407 | func (n *Node) Time(cs ...Checker) *Node { 408 | return n.find(Time, cs) 409 | } 410 | 411 | func (n *Node) Title(cs ...Checker) *Node { 412 | return n.find(Title, cs) 413 | } 414 | 415 | func (n *Node) Tr(cs ...Checker) *Node { 416 | return n.find(Tr, cs) 417 | } 418 | 419 | func (n *Node) Track(cs ...Checker) *Node { 420 | return n.find(Track, cs) 421 | } 422 | 423 | func (n *Node) U(cs ...Checker) *Node { 424 | return n.find(U, cs) 425 | } 426 | 427 | func (n *Node) Ul(cs ...Checker) *Node { 428 | return n.find(Ul, cs) 429 | } 430 | 431 | func (n *Node) Var(cs ...Checker) *Node { 432 | return n.find(Var, cs) 433 | } 434 | 435 | func (n *Node) Video(cs ...Checker) *Node { 436 | return n.find(Video, cs) 437 | } 438 | 439 | func (n *Node) Wbr(cs ...Checker) *Node { 440 | return n.find(Wbr, cs) 441 | } 442 | 443 | func (n *Node) Abbr_(pat ...string) *string { 444 | return n.Attr("abbr", pat...) 445 | } 446 | 447 | func (n *Node) Accept(pat ...string) *string { 448 | return n.Attr("accept", pat...) 449 | } 450 | 451 | func (n *Node) AcceptCharset(pat ...string) *string { 452 | return n.Attr("accept-charset", pat...) 453 | } 454 | 455 | func (n *Node) Accesskey(pat ...string) *string { 456 | return n.Attr("accesskey", pat...) 457 | } 458 | 459 | func (n *Node) Action(pat ...string) *string { 460 | return n.Attr("action", pat...) 461 | } 462 | 463 | func (n *Node) Allowfullscreen(pat ...string) *string { 464 | return n.Attr("allowfullscreen", pat...) 465 | } 466 | 467 | func (n *Node) Allowpaymentrequest(pat ...string) *string { 468 | return n.Attr("allowpaymentrequest", pat...) 469 | } 470 | 471 | func (n *Node) Allowusermedia(pat ...string) *string { 472 | return n.Attr("allowusermedia", pat...) 473 | } 474 | 475 | func (n *Node) Alt(pat ...string) *string { 476 | return n.Attr("alt", pat...) 477 | } 478 | 479 | func (n *Node) As(pat ...string) *string { 480 | return n.Attr("as", pat...) 481 | } 482 | 483 | func (n *Node) Async(pat ...string) *string { 484 | return n.Attr("async", pat...) 485 | } 486 | 487 | func (n *Node) Autocomplete(pat ...string) *string { 488 | return n.Attr("autocomplete", pat...) 489 | } 490 | 491 | func (n *Node) Autofocus(pat ...string) *string { 492 | return n.Attr("autofocus", pat...) 493 | } 494 | 495 | func (n *Node) Autoplay(pat ...string) *string { 496 | return n.Attr("autoplay", pat...) 497 | } 498 | 499 | func (n *Node) Charset(pat ...string) *string { 500 | return n.Attr("charset", pat...) 501 | } 502 | 503 | func (n *Node) Checked(pat ...string) *string { 504 | return n.Attr("checked", pat...) 505 | } 506 | 507 | func (n *Node) Cite_(pat ...string) *string { 508 | return n.Attr("cite", pat...) 509 | } 510 | 511 | func (n *Node) Class(pat ...string) *string { 512 | return n.Attr("class", pat...) 513 | } 514 | 515 | func (n *Node) Color(pat ...string) *string { 516 | return n.Attr("color", pat...) 517 | } 518 | 519 | func (n *Node) Cols(pat ...string) *string { 520 | return n.Attr("cols", pat...) 521 | } 522 | 523 | func (n *Node) Colspan(pat ...string) *string { 524 | return n.Attr("colspan", pat...) 525 | } 526 | 527 | func (n *Node) Content(pat ...string) *string { 528 | return n.Attr("content", pat...) 529 | } 530 | 531 | func (n *Node) Contenteditable(pat ...string) *string { 532 | return n.Attr("contenteditable", pat...) 533 | } 534 | 535 | func (n *Node) Controls(pat ...string) *string { 536 | return n.Attr("controls", pat...) 537 | } 538 | 539 | func (n *Node) Coords(pat ...string) *string { 540 | return n.Attr("coords", pat...) 541 | } 542 | 543 | func (n *Node) Crossorigin(pat ...string) *string { 544 | return n.Attr("crossorigin", pat...) 545 | } 546 | 547 | func (n *Node) Data_(pat ...string) *string { 548 | return n.Attr("data", pat...) 549 | } 550 | 551 | func (n *Node) Datetime(pat ...string) *string { 552 | return n.Attr("datetime", pat...) 553 | } 554 | 555 | func (n *Node) Default(pat ...string) *string { 556 | return n.Attr("default", pat...) 557 | } 558 | 559 | func (n *Node) Defer(pat ...string) *string { 560 | return n.Attr("defer", pat...) 561 | } 562 | 563 | func (n *Node) Dir(pat ...string) *string { 564 | return n.Attr("dir", pat...) 565 | } 566 | 567 | func (n *Node) Dirname(pat ...string) *string { 568 | return n.Attr("dirname", pat...) 569 | } 570 | 571 | func (n *Node) Disabled(pat ...string) *string { 572 | return n.Attr("disabled", pat...) 573 | } 574 | 575 | func (n *Node) Download(pat ...string) *string { 576 | return n.Attr("download", pat...) 577 | } 578 | 579 | func (n *Node) Draggable(pat ...string) *string { 580 | return n.Attr("draggable", pat...) 581 | } 582 | 583 | func (n *Node) Enctype(pat ...string) *string { 584 | return n.Attr("enctype", pat...) 585 | } 586 | 587 | func (n *Node) For(pat ...string) *string { 588 | return n.Attr("for", pat...) 589 | } 590 | 591 | func (n *Node) Form_(pat ...string) *string { 592 | return n.Attr("form", pat...) 593 | } 594 | 595 | func (n *Node) Formaction(pat ...string) *string { 596 | return n.Attr("formaction", pat...) 597 | } 598 | 599 | func (n *Node) Formenctype(pat ...string) *string { 600 | return n.Attr("formenctype", pat...) 601 | } 602 | 603 | func (n *Node) Formmethod(pat ...string) *string { 604 | return n.Attr("formmethod", pat...) 605 | } 606 | 607 | func (n *Node) Formnovalidate(pat ...string) *string { 608 | return n.Attr("formnovalidate", pat...) 609 | } 610 | 611 | func (n *Node) Formtarget(pat ...string) *string { 612 | return n.Attr("formtarget", pat...) 613 | } 614 | 615 | func (n *Node) Headers(pat ...string) *string { 616 | return n.Attr("headers", pat...) 617 | } 618 | 619 | func (n *Node) Height(pat ...string) *string { 620 | return n.Attr("height", pat...) 621 | } 622 | 623 | func (n *Node) Hidden(pat ...string) *string { 624 | return n.Attr("hidden", pat...) 625 | } 626 | 627 | func (n *Node) High(pat ...string) *string { 628 | return n.Attr("high", pat...) 629 | } 630 | 631 | func (n *Node) Href(pat ...string) *string { 632 | return n.Attr("href", pat...) 633 | } 634 | 635 | func (n *Node) Hreflang(pat ...string) *string { 636 | return n.Attr("hreflang", pat...) 637 | } 638 | 639 | func (n *Node) HttpEquiv(pat ...string) *string { 640 | return n.Attr("http-equiv", pat...) 641 | } 642 | 643 | func (n *Node) Id(pat ...string) *string { 644 | return n.Attr("id", pat...) 645 | } 646 | 647 | func (n *Node) Inputmode(pat ...string) *string { 648 | return n.Attr("inputmode", pat...) 649 | } 650 | 651 | func (n *Node) Integrity(pat ...string) *string { 652 | return n.Attr("integrity", pat...) 653 | } 654 | 655 | func (n *Node) Is(pat ...string) *string { 656 | return n.Attr("is", pat...) 657 | } 658 | 659 | func (n *Node) Ismap(pat ...string) *string { 660 | return n.Attr("ismap", pat...) 661 | } 662 | 663 | func (n *Node) Itemid(pat ...string) *string { 664 | return n.Attr("itemid", pat...) 665 | } 666 | 667 | func (n *Node) Itemprop(pat ...string) *string { 668 | return n.Attr("itemprop", pat...) 669 | } 670 | 671 | func (n *Node) Itemref(pat ...string) *string { 672 | return n.Attr("itemref", pat...) 673 | } 674 | 675 | func (n *Node) Itemscope(pat ...string) *string { 676 | return n.Attr("itemscope", pat...) 677 | } 678 | 679 | func (n *Node) Itemtype(pat ...string) *string { 680 | return n.Attr("itemtype", pat...) 681 | } 682 | 683 | func (n *Node) Kind(pat ...string) *string { 684 | return n.Attr("kind", pat...) 685 | } 686 | 687 | func (n *Node) Label_(pat ...string) *string { 688 | return n.Attr("label", pat...) 689 | } 690 | 691 | func (n *Node) Lang(pat ...string) *string { 692 | return n.Attr("lang", pat...) 693 | } 694 | 695 | func (n *Node) List(pat ...string) *string { 696 | return n.Attr("list", pat...) 697 | } 698 | 699 | func (n *Node) Loop(pat ...string) *string { 700 | return n.Attr("loop", pat...) 701 | } 702 | 703 | func (n *Node) Low(pat ...string) *string { 704 | return n.Attr("low", pat...) 705 | } 706 | 707 | func (n *Node) Manifest(pat ...string) *string { 708 | return n.Attr("manifest", pat...) 709 | } 710 | 711 | func (n *Node) Max(pat ...string) *string { 712 | return n.Attr("max", pat...) 713 | } 714 | 715 | func (n *Node) Maxlength(pat ...string) *string { 716 | return n.Attr("maxlength", pat...) 717 | } 718 | 719 | func (n *Node) Media(pat ...string) *string { 720 | return n.Attr("media", pat...) 721 | } 722 | 723 | func (n *Node) Method(pat ...string) *string { 724 | return n.Attr("method", pat...) 725 | } 726 | 727 | func (n *Node) Min(pat ...string) *string { 728 | return n.Attr("min", pat...) 729 | } 730 | 731 | func (n *Node) Minlength(pat ...string) *string { 732 | return n.Attr("minlength", pat...) 733 | } 734 | 735 | func (n *Node) Multiple(pat ...string) *string { 736 | return n.Attr("multiple", pat...) 737 | } 738 | 739 | func (n *Node) Muted(pat ...string) *string { 740 | return n.Attr("muted", pat...) 741 | } 742 | 743 | func (n *Node) Name(pat ...string) *string { 744 | return n.Attr("name", pat...) 745 | } 746 | 747 | func (n *Node) Nomodule(pat ...string) *string { 748 | return n.Attr("nomodule", pat...) 749 | } 750 | 751 | func (n *Node) Nonce(pat ...string) *string { 752 | return n.Attr("nonce", pat...) 753 | } 754 | 755 | func (n *Node) Novalidate(pat ...string) *string { 756 | return n.Attr("novalidate", pat...) 757 | } 758 | 759 | func (n *Node) Open(pat ...string) *string { 760 | return n.Attr("open", pat...) 761 | } 762 | 763 | func (n *Node) Optimum(pat ...string) *string { 764 | return n.Attr("optimum", pat...) 765 | } 766 | 767 | func (n *Node) Pattern(pat ...string) *string { 768 | return n.Attr("pattern", pat...) 769 | } 770 | 771 | func (n *Node) Ping(pat ...string) *string { 772 | return n.Attr("ping", pat...) 773 | } 774 | 775 | func (n *Node) Placeholder(pat ...string) *string { 776 | return n.Attr("placeholder", pat...) 777 | } 778 | 779 | func (n *Node) Playsinline(pat ...string) *string { 780 | return n.Attr("playsinline", pat...) 781 | } 782 | 783 | func (n *Node) Poster(pat ...string) *string { 784 | return n.Attr("poster", pat...) 785 | } 786 | 787 | func (n *Node) Preload(pat ...string) *string { 788 | return n.Attr("preload", pat...) 789 | } 790 | 791 | func (n *Node) Readonly(pat ...string) *string { 792 | return n.Attr("readonly", pat...) 793 | } 794 | 795 | func (n *Node) Referrerpolicy(pat ...string) *string { 796 | return n.Attr("referrerpolicy", pat...) 797 | } 798 | 799 | func (n *Node) Rel(pat ...string) *string { 800 | return n.Attr("rel", pat...) 801 | } 802 | 803 | func (n *Node) Required(pat ...string) *string { 804 | return n.Attr("required", pat...) 805 | } 806 | 807 | func (n *Node) Reversed(pat ...string) *string { 808 | return n.Attr("reversed", pat...) 809 | } 810 | 811 | func (n *Node) Rows(pat ...string) *string { 812 | return n.Attr("rows", pat...) 813 | } 814 | 815 | func (n *Node) Rowspan(pat ...string) *string { 816 | return n.Attr("rowspan", pat...) 817 | } 818 | 819 | func (n *Node) Sandbox(pat ...string) *string { 820 | return n.Attr("sandbox", pat...) 821 | } 822 | 823 | func (n *Node) Scope(pat ...string) *string { 824 | return n.Attr("scope", pat...) 825 | } 826 | 827 | func (n *Node) Selected(pat ...string) *string { 828 | return n.Attr("selected", pat...) 829 | } 830 | 831 | func (n *Node) Shape(pat ...string) *string { 832 | return n.Attr("shape", pat...) 833 | } 834 | 835 | func (n *Node) Size(pat ...string) *string { 836 | return n.Attr("size", pat...) 837 | } 838 | 839 | func (n *Node) Sizes(pat ...string) *string { 840 | return n.Attr("sizes", pat...) 841 | } 842 | 843 | func (n *Node) Slot_(pat ...string) *string { 844 | return n.Attr("slot", pat...) 845 | } 846 | 847 | func (n *Node) Span_(pat ...string) *string { 848 | return n.Attr("span", pat...) 849 | } 850 | 851 | func (n *Node) Spellcheck(pat ...string) *string { 852 | return n.Attr("spellcheck", pat...) 853 | } 854 | 855 | func (n *Node) Src(pat ...string) *string { 856 | return n.Attr("src", pat...) 857 | } 858 | 859 | func (n *Node) Srcdoc(pat ...string) *string { 860 | return n.Attr("srcdoc", pat...) 861 | } 862 | 863 | func (n *Node) Srclang(pat ...string) *string { 864 | return n.Attr("srclang", pat...) 865 | } 866 | 867 | func (n *Node) Srcset(pat ...string) *string { 868 | return n.Attr("srcset", pat...) 869 | } 870 | 871 | func (n *Node) Start(pat ...string) *string { 872 | return n.Attr("start", pat...) 873 | } 874 | 875 | func (n *Node) Step(pat ...string) *string { 876 | return n.Attr("step", pat...) 877 | } 878 | 879 | func (n *Node) Style_(pat ...string) *string { 880 | return n.Attr("style", pat...) 881 | } 882 | 883 | func (n *Node) Tabindex(pat ...string) *string { 884 | return n.Attr("tabindex", pat...) 885 | } 886 | 887 | func (n *Node) Target(pat ...string) *string { 888 | return n.Attr("target", pat...) 889 | } 890 | 891 | func (n *Node) Title_(pat ...string) *string { 892 | return n.Attr("title", pat...) 893 | } 894 | 895 | func (n *Node) Translate(pat ...string) *string { 896 | return n.Attr("translate", pat...) 897 | } 898 | 899 | func (n *Node) Type(pat ...string) *string { 900 | return n.Attr("type", pat...) 901 | } 902 | 903 | func (n *Node) Typemustmatch(pat ...string) *string { 904 | return n.Attr("typemustmatch", pat...) 905 | } 906 | 907 | func (n *Node) Updateviacache(pat ...string) *string { 908 | return n.Attr("updateviacache", pat...) 909 | } 910 | 911 | func (n *Node) Usemap(pat ...string) *string { 912 | return n.Attr("usemap", pat...) 913 | } 914 | 915 | func (n *Node) Value(pat ...string) *string { 916 | return n.Attr("value", pat...) 917 | } 918 | 919 | func (n *Node) Width(pat ...string) *string { 920 | return n.Attr("width", pat...) 921 | } 922 | 923 | func (n *Node) Workertype(pat ...string) *string { 924 | return n.Attr("workertype", pat...) 925 | } 926 | 927 | func (n *Node) Wrap(pat ...string) *string { 928 | return n.Attr("wrap", pat...) 929 | } 930 | -------------------------------------------------------------------------------- /chain.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014, Hǎiliàng Wáng. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package query 6 | 7 | import ( 8 | "regexp" 9 | 10 | . "h12.io/html-query/expr" 11 | ) 12 | 13 | func (n *Node) Satisfy(cs ...Checker) bool { 14 | return And(cs...)(n.InternalNode()) != nil 15 | } 16 | 17 | func (n *Node) Find(cs ...Checker) *Node { 18 | if n == nil { 19 | return nil 20 | } 21 | return NewNode(Find(cs...)(&n.n)) 22 | } 23 | 24 | func (n *Node) FindNext(cs ...Checker) *Node { 25 | if n == nil { 26 | return nil 27 | } 28 | return NewNode(FindSibling(cs...)(&n.n)) 29 | } 30 | 31 | func (n *Node) FindChild(cs ...Checker) *Node { 32 | return NewNode(FindChild(cs...)(&n.n)) 33 | } 34 | 35 | func (n *Node) find(c Checker, cs []Checker) *Node { 36 | if n == nil { 37 | return nil 38 | } 39 | return n.Find(append([]Checker{c}, cs...)...) 40 | } 41 | 42 | func (n *Node) NextSibling() *Node { 43 | if n == nil { 44 | return nil 45 | } 46 | return NewNode(NextSibling(&n.n)) 47 | } 48 | 49 | func (n *Node) PrevSibling() *Node { 50 | if n == nil { 51 | return nil 52 | } 53 | return NewNode(PrevSibling(&n.n)) 54 | } 55 | 56 | func (n *Node) Parent() *Node { 57 | if n == nil { 58 | return nil 59 | } 60 | return NewNode(Parent(&n.n)) 61 | } 62 | 63 | func (n *Node) Children(cs ...Checker) NodeIter { 64 | if n == nil { 65 | return NodeIter{nil} 66 | } 67 | return NodeIter{Children(&n.n, cs...)} 68 | } 69 | 70 | func (n *Node) Descendants(cs ...Checker) NodeIter { 71 | if n == nil { 72 | return NodeIter{nil} 73 | } 74 | return NodeIter{Descendants(&n.n, cs...)} 75 | } 76 | 77 | func (n *Node) Ahref(cs ...Checker) *Node { 78 | if n == nil { 79 | return nil 80 | } 81 | return n.find(Ahref, cs) 82 | } 83 | 84 | func (n *Node) TextNode(pat string) *TextNodeNode { 85 | if n == nil { 86 | return nil 87 | } 88 | rx := regexp.MustCompile(pat) 89 | cs := []Checker{Text_(rx)} 90 | return NewTextNodeNode(n.find(TextNode, cs), rx) 91 | } 92 | 93 | func also(c Checker, cs []Checker) []Checker { 94 | return append([]Checker{c}, cs...) 95 | } 96 | -------------------------------------------------------------------------------- /doc.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Hǎiliàng Wáng. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | /* 6 | Package query provides a fluent and functional interface 7 | for querying HTML DOM using Go. It is based on golang.org/x/net/html. 8 | */ 9 | package query 10 | -------------------------------------------------------------------------------- /examples/main.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014, Hǎiliàng Wáng. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package main 6 | 7 | import ( 8 | "fmt" 9 | "io" 10 | "net/http" 11 | 12 | "h12.io/html-query" 13 | . "h12.io/html-query/expr" 14 | ) 15 | 16 | func main() { 17 | r := get(`http://blog.golang.org/index`) 18 | defer r.Close() 19 | root, err := query.Parse(r) 20 | checkError(err) 21 | root.Div(Id("content")).Children(Class("blogtitle")).For(func(item *query.Node) { 22 | href := item.Ahref().Href() 23 | date := item.Span(Class("date")).Text() 24 | tags := item.Span(Class("tags")).Text() 25 | if href != nil { 26 | pn(*href) 27 | } 28 | if date != nil { 29 | pn(*date) 30 | } 31 | if tags != nil { 32 | p(*tags) 33 | } 34 | }) 35 | } 36 | 37 | func get(url string) io.ReadCloser { 38 | resp, err := http.Get(url) 39 | checkError(err) 40 | return resp.Body 41 | } 42 | 43 | func checkError(err error) { 44 | if err != nil { 45 | panic(err) 46 | } 47 | } 48 | 49 | func pn(v ...interface{}) { 50 | fmt.Print(v...) 51 | } 52 | 53 | func p(v ...interface{}) { 54 | fmt.Println(v...) 55 | } 56 | -------------------------------------------------------------------------------- /expr/auto_expr.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017, Hǎiliàng Wáng. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package expr 6 | 7 | import ( 8 | "golang.org/x/net/html/atom" 9 | ) 10 | 11 | var ( 12 | A = ElementChecker(atom.A) 13 | Abbr = ElementChecker(atom.Abbr) 14 | Address = ElementChecker(atom.Address) 15 | Area = ElementChecker(atom.Area) 16 | Article = ElementChecker(atom.Article) 17 | Aside = ElementChecker(atom.Aside) 18 | Audio = ElementChecker(atom.Audio) 19 | B = ElementChecker(atom.B) 20 | Base = ElementChecker(atom.Base) 21 | Bdi = ElementChecker(atom.Bdi) 22 | Bdo = ElementChecker(atom.Bdo) 23 | Blockquote = ElementChecker(atom.Blockquote) 24 | Body = ElementChecker(atom.Body) 25 | Br = ElementChecker(atom.Br) 26 | Button = ElementChecker(atom.Button) 27 | Canvas = ElementChecker(atom.Canvas) 28 | Caption = ElementChecker(atom.Caption) 29 | Cite = ElementChecker(atom.Cite) 30 | Code = ElementChecker(atom.Code) 31 | Col = ElementChecker(atom.Col) 32 | Colgroup = ElementChecker(atom.Colgroup) 33 | Data = ElementChecker(atom.Data) 34 | Datalist = ElementChecker(atom.Datalist) 35 | Dd = ElementChecker(atom.Dd) 36 | Del = ElementChecker(atom.Del) 37 | Details = ElementChecker(atom.Details) 38 | Dfn = ElementChecker(atom.Dfn) 39 | Dialog = ElementChecker(atom.Dialog) 40 | Div = ElementChecker(atom.Div) 41 | Dl = ElementChecker(atom.Dl) 42 | Dt = ElementChecker(atom.Dt) 43 | Em = ElementChecker(atom.Em) 44 | Embed = ElementChecker(atom.Embed) 45 | Fieldset = ElementChecker(atom.Fieldset) 46 | Figcaption = ElementChecker(atom.Figcaption) 47 | Figure = ElementChecker(atom.Figure) 48 | Footer = ElementChecker(atom.Footer) 49 | Form = ElementChecker(atom.Form) 50 | H1 = ElementChecker(atom.H1) 51 | H2 = ElementChecker(atom.H2) 52 | H3 = ElementChecker(atom.H3) 53 | H4 = ElementChecker(atom.H4) 54 | H5 = ElementChecker(atom.H5) 55 | H6 = ElementChecker(atom.H6) 56 | Head = ElementChecker(atom.Head) 57 | Header = ElementChecker(atom.Header) 58 | Hgroup = ElementChecker(atom.Hgroup) 59 | Hr = ElementChecker(atom.Hr) 60 | Html = ElementChecker(atom.Html) 61 | I = ElementChecker(atom.I) 62 | Iframe = ElementChecker(atom.Iframe) 63 | Img = ElementChecker(atom.Img) 64 | Input = ElementChecker(atom.Input) 65 | Ins = ElementChecker(atom.Ins) 66 | Kbd = ElementChecker(atom.Kbd) 67 | Label = ElementChecker(atom.Label) 68 | Legend = ElementChecker(atom.Legend) 69 | Li = ElementChecker(atom.Li) 70 | Link = ElementChecker(atom.Link) 71 | Map = ElementChecker(atom.Map) 72 | Mark = ElementChecker(atom.Mark) 73 | Menu = ElementChecker(atom.Menu) 74 | Meta = ElementChecker(atom.Meta) 75 | Meter = ElementChecker(atom.Meter) 76 | Nav = ElementChecker(atom.Nav) 77 | Noscript = ElementChecker(atom.Noscript) 78 | Object = ElementChecker(atom.Object) 79 | Ol = ElementChecker(atom.Ol) 80 | Optgroup = ElementChecker(atom.Optgroup) 81 | Option = ElementChecker(atom.Option) 82 | Output = ElementChecker(atom.Output) 83 | P = ElementChecker(atom.P) 84 | Param = ElementChecker(atom.Param) 85 | Pre = ElementChecker(atom.Pre) 86 | Progress = ElementChecker(atom.Progress) 87 | Q = ElementChecker(atom.Q) 88 | Rp = ElementChecker(atom.Rp) 89 | Rt = ElementChecker(atom.Rt) 90 | Ruby = ElementChecker(atom.Ruby) 91 | S = ElementChecker(atom.S) 92 | Samp = ElementChecker(atom.Samp) 93 | Script = ElementChecker(atom.Script) 94 | Section = ElementChecker(atom.Section) 95 | Select = ElementChecker(atom.Select) 96 | Small = ElementChecker(atom.Small) 97 | Source = ElementChecker(atom.Source) 98 | Span = ElementChecker(atom.Span) 99 | Strong = ElementChecker(atom.Strong) 100 | Style = ElementChecker(atom.Style) 101 | Sub = ElementChecker(atom.Sub) 102 | Summary = ElementChecker(atom.Summary) 103 | Sup = ElementChecker(atom.Sup) 104 | Table = ElementChecker(atom.Table) 105 | Tbody = ElementChecker(atom.Tbody) 106 | Td = ElementChecker(atom.Td) 107 | Textarea = ElementChecker(atom.Textarea) 108 | Tfoot = ElementChecker(atom.Tfoot) 109 | Th = ElementChecker(atom.Th) 110 | Thead = ElementChecker(atom.Thead) 111 | Time = ElementChecker(atom.Time) 112 | Title = ElementChecker(atom.Title) 113 | Tr = ElementChecker(atom.Tr) 114 | Track = ElementChecker(atom.Track) 115 | U = ElementChecker(atom.U) 116 | Ul = ElementChecker(atom.Ul) 117 | Var = ElementChecker(atom.Var) 118 | Video = ElementChecker(atom.Video) 119 | Wbr = ElementChecker(atom.Wbr) 120 | ) 121 | 122 | var ( 123 | Abbr_ = AttrChecker("abbr") 124 | Accept = AttrChecker("accept") 125 | AcceptCharset = SeperatedAttrChecker("accept-charset", ' ') 126 | Accesskey = SeperatedAttrChecker("accesskey", ' ') 127 | Action = AttrChecker("action") 128 | Allowfullscreen = AttrChecker("allowfullscreen") 129 | Allowpaymentrequest = AttrChecker("allowpaymentrequest") 130 | Allowusermedia = AttrChecker("allowusermedia") 131 | Alt = AttrChecker("alt") 132 | As = AttrChecker("as") 133 | Async = AttrChecker("async") 134 | Autocomplete = AttrChecker("autocomplete") 135 | Autofocus = AttrChecker("autofocus") 136 | Autoplay = AttrChecker("autoplay") 137 | Charset = AttrChecker("charset") 138 | Checked = AttrChecker("checked") 139 | Cite_ = AttrChecker("cite") 140 | Class = SeperatedAttrChecker("class", ' ') 141 | Color = AttrChecker("color") 142 | Cols = AttrChecker("cols") 143 | Colspan = AttrChecker("colspan") 144 | Content = AttrChecker("content") 145 | Contenteditable = AttrChecker("contenteditable") 146 | Controls = AttrChecker("controls") 147 | Coords = AttrChecker("coords") 148 | Crossorigin = AttrChecker("crossorigin") 149 | Data_ = AttrChecker("data") 150 | Datetime = AttrChecker("datetime") 151 | Default = AttrChecker("default") 152 | Defer = AttrChecker("defer") 153 | Dir = AttrChecker("dir") 154 | Dirname = AttrChecker("dirname") 155 | Disabled = AttrChecker("disabled") 156 | Download = AttrChecker("download") 157 | Draggable = AttrChecker("draggable") 158 | Enctype = AttrChecker("enctype") 159 | For = AttrChecker("for") 160 | Form_ = AttrChecker("form") 161 | Formaction = AttrChecker("formaction") 162 | Formenctype = AttrChecker("formenctype") 163 | Formmethod = AttrChecker("formmethod") 164 | Formnovalidate = AttrChecker("formnovalidate") 165 | Formtarget = AttrChecker("formtarget") 166 | Headers = SeperatedAttrChecker("headers", ' ') 167 | Height = AttrChecker("height") 168 | Hidden = AttrChecker("hidden") 169 | High = AttrChecker("high") 170 | Href = AttrChecker("href") 171 | Hreflang = AttrChecker("hreflang") 172 | HttpEquiv = AttrChecker("http-equiv") 173 | Id = AttrChecker("id") 174 | Inputmode = AttrChecker("inputmode") 175 | Integrity = AttrChecker("integrity") 176 | Is = AttrChecker("is") 177 | Ismap = AttrChecker("ismap") 178 | Itemid = AttrChecker("itemid") 179 | Itemprop = SeperatedAttrChecker("itemprop", ' ') 180 | Itemref = SeperatedAttrChecker("itemref", ' ') 181 | Itemscope = AttrChecker("itemscope") 182 | Itemtype = SeperatedAttrChecker("itemtype", ' ') 183 | Kind = AttrChecker("kind") 184 | Label_ = AttrChecker("label") 185 | Lang = AttrChecker("lang") 186 | List = AttrChecker("list") 187 | Loop = AttrChecker("loop") 188 | Low = AttrChecker("low") 189 | Manifest = AttrChecker("manifest") 190 | Max = AttrChecker("max") 191 | Maxlength = AttrChecker("maxlength") 192 | Media = AttrChecker("media") 193 | Method = AttrChecker("method") 194 | Min = AttrChecker("min") 195 | Minlength = AttrChecker("minlength") 196 | Multiple = AttrChecker("multiple") 197 | Muted = AttrChecker("muted") 198 | Name = AttrChecker("name") 199 | Nomodule = AttrChecker("nomodule") 200 | Nonce = AttrChecker("nonce") 201 | Novalidate = AttrChecker("novalidate") 202 | Open = AttrChecker("open") 203 | Optimum = AttrChecker("optimum") 204 | Pattern = AttrChecker("pattern") 205 | Ping = SeperatedAttrChecker("ping", ' ') 206 | Placeholder = AttrChecker("placeholder") 207 | Playsinline = AttrChecker("playsinline") 208 | Poster = AttrChecker("poster") 209 | Preload = AttrChecker("preload") 210 | Readonly = AttrChecker("readonly") 211 | Referrerpolicy = AttrChecker("referrerpolicy") 212 | Rel = SeperatedAttrChecker("rel", ' ') 213 | Required = AttrChecker("required") 214 | Reversed = AttrChecker("reversed") 215 | Rows = AttrChecker("rows") 216 | Rowspan = AttrChecker("rowspan") 217 | Sandbox = SeperatedAttrChecker("sandbox", ' ') 218 | Scope = AttrChecker("scope") 219 | Selected = AttrChecker("selected") 220 | Shape = AttrChecker("shape") 221 | Size = AttrChecker("size") 222 | Sizes = SeperatedAttrChecker("sizes", ' ') 223 | Slot_ = AttrChecker("slot") 224 | Span_ = AttrChecker("span") 225 | Spellcheck = AttrChecker("spellcheck") 226 | Src = AttrChecker("src") 227 | Srcdoc = AttrChecker("srcdoc") 228 | Srclang = AttrChecker("srclang") 229 | Srcset = AttrChecker("srcset") 230 | Start = AttrChecker("start") 231 | Step = AttrChecker("step") 232 | Style_ = AttrChecker("style") 233 | Tabindex = AttrChecker("tabindex") 234 | Target = AttrChecker("target") 235 | Title_ = AttrChecker("title") 236 | Translate = AttrChecker("translate") 237 | Type = AttrChecker("type") 238 | Typemustmatch = AttrChecker("typemustmatch") 239 | Updateviacache = AttrChecker("updateviacache") 240 | Usemap = AttrChecker("usemap") 241 | Value = AttrChecker("value") 242 | Width = AttrChecker("width") 243 | Workertype = AttrChecker("workertype") 244 | Wrap = AttrChecker("wrap") 245 | ) 246 | -------------------------------------------------------------------------------- /expr/bfs.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014, Hǎiliàng Wáng. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package expr 6 | 7 | import ( 8 | "container/list" 9 | 10 | "golang.org/x/net/html" 11 | ) 12 | 13 | // Broad first search in all descendants 14 | func Find(cs ...Checker) Checker { 15 | c := And(cs...) 16 | return func(n *html.Node) *html.Node { 17 | q := NewQueue() 18 | q.PushNodes(Children(n)) 19 | for q.Len() > 0 { 20 | t := q.Pop() 21 | if c(t) != nil { 22 | return t 23 | } else { 24 | q.PushNodes(Children(t)) 25 | } 26 | } 27 | return nil 28 | } 29 | } 30 | 31 | // Find in direct children 32 | func FindChild(cs ...Checker) Checker { 33 | c := And(cs...) 34 | return func(n *html.Node) *html.Node { 35 | for child := FirstChild(n); child != nil; child = NextSibling(child) { 36 | if c(child) != nil { 37 | return child 38 | } 39 | } 40 | return nil 41 | } 42 | } 43 | 44 | // Find in sibling nodes 45 | func FindSibling(cs ...Checker) Checker { 46 | c := And(cs...) 47 | return func(n *html.Node) *html.Node { 48 | for sibling := NextSibling(n); sibling != nil; sibling = NextSibling(sibling) { 49 | if c(sibling) != nil { 50 | return sibling 51 | } 52 | } 53 | return nil 54 | } 55 | } 56 | 57 | // FIFO queue. 58 | type Queue struct { 59 | l *list.List 60 | } 61 | 62 | func NewQueue() *Queue { 63 | return &Queue{list.New()} 64 | } 65 | 66 | func (q *Queue) Len() int { 67 | return q.l.Len() 68 | } 69 | 70 | func (q *Queue) Push(n *html.Node) { 71 | q.l.PushBack(n) 72 | } 73 | 74 | func (q *Queue) PushNodes(next Iter) { 75 | for node := next(); node != nil; node = next() { 76 | q.Push(node) 77 | } 78 | } 79 | 80 | func (q *Queue) Pop() *html.Node { 81 | if q.l.Front() == nil { 82 | return nil 83 | } 84 | return q.l.Remove(q.l.Front()).(*html.Node) 85 | } 86 | -------------------------------------------------------------------------------- /expr/checker.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014, Hǎiliàng Wáng. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package expr // import "h12.io/html-query/expr" 6 | 7 | import ( 8 | "regexp" 9 | "strings" 10 | 11 | "golang.org/x/net/html" 12 | "golang.org/x/net/html/atom" 13 | ) 14 | 15 | type Checker func(*html.Node) *html.Node 16 | 17 | func Not(c Checker) Checker { 18 | return func(n *html.Node) *html.Node { 19 | if c(n) == nil { 20 | return n 21 | } 22 | return nil 23 | } 24 | } 25 | 26 | func And(cs ...Checker) Checker { 27 | return func(n *html.Node) *html.Node { 28 | for _, c := range cs { 29 | if c(n) == nil { 30 | return nil 31 | } 32 | } 33 | return n 34 | } 35 | } 36 | 37 | func Pipe(cs ...Checker) Checker { 38 | return func(n *html.Node) *html.Node { 39 | for _, c := range cs { 40 | r := c(n) 41 | if r == nil { 42 | return nil 43 | } else { 44 | n = r 45 | } 46 | } 47 | return n 48 | } 49 | } 50 | 51 | func Or(cs ...Checker) Checker { 52 | return func(n *html.Node) *html.Node { 53 | for _, c := range cs { 54 | if c(n) != nil { 55 | return n 56 | } 57 | } 58 | return nil 59 | } 60 | } 61 | 62 | func FirstChild(n *html.Node) *html.Node { 63 | if n == nil { 64 | return nil 65 | } 66 | return n.FirstChild 67 | } 68 | 69 | func Parent(n *html.Node) *html.Node { 70 | if n == nil { 71 | return nil 72 | } 73 | return n.Parent 74 | } 75 | 76 | func NextSibling(n *html.Node) *html.Node { 77 | if n == nil { 78 | return nil 79 | } 80 | return n.NextSibling 81 | } 82 | 83 | func PrevSibling(n *html.Node) *html.Node { 84 | if n == nil { 85 | return nil 86 | } 87 | return n.PrevSibling 88 | } 89 | 90 | // Node Checkers 91 | // ============= 92 | 93 | func TypeChecker(t html.NodeType) Checker { 94 | return func(n *html.Node) *html.Node { 95 | if n != nil && n.Type == t { 96 | return n 97 | } 98 | return nil 99 | } 100 | } 101 | 102 | var ( 103 | ErrorNode = TypeChecker(html.ErrorNode) 104 | TextNode = TypeChecker(html.TextNode) 105 | DocumentNode = TypeChecker(html.DocumentNode) 106 | ElementNode = TypeChecker(html.ElementNode) 107 | CommentNode = TypeChecker(html.CommentNode) 108 | DoctypeNode = TypeChecker(html.DoctypeNode) 109 | ) 110 | 111 | func NonemptyTextNode(n *html.Node) *html.Node { 112 | if n == nil { 113 | return nil 114 | } 115 | if TextNode(n) != nil && strings.TrimSpace(n.Data) != "" { 116 | return n 117 | } 118 | return nil 119 | } 120 | 121 | func AtomChecker(a atom.Atom) Checker { 122 | return func(n *html.Node) *html.Node { 123 | if n.DataAtom == a { 124 | return n 125 | } 126 | return nil 127 | } 128 | } 129 | 130 | func ElementChecker(a atom.Atom) Checker { 131 | return And(ElementNode, AtomChecker(a)) 132 | } 133 | 134 | // Attribute Checkers 135 | // ================== 136 | 137 | func AttributeCmpChecker(key string, cmp func(string) bool) Checker { 138 | return func(n *html.Node) *html.Node { 139 | attr := GetAttr(n, key) 140 | if attr != nil && cmp(*attr) { 141 | return n 142 | } 143 | return nil 144 | } 145 | } 146 | 147 | func Attr(key, pat string) Checker { 148 | rx := regexp.MustCompile(pat) 149 | return AttributeCmpChecker(key, func(val string) bool { 150 | return rx.MatchString(val) 151 | }) 152 | } 153 | 154 | func AttrChecker(key string) func(string) Checker { 155 | return func(pat string) Checker { 156 | return Attr(key, pat) 157 | } 158 | } 159 | 160 | func HasAttr(key string) Checker { 161 | return func(n *html.Node) *html.Node { 162 | if GetAttr(n, key) != nil { 163 | return n 164 | } 165 | return nil 166 | } 167 | } 168 | 169 | func NoAttr(key string) Checker { 170 | return func(n *html.Node) *html.Node { 171 | if GetAttr(n, key) != nil { 172 | return nil 173 | } 174 | return n 175 | } 176 | } 177 | 178 | func fieldsToSet(val string, sep rune) map[string]bool { 179 | m := make(map[string]bool) 180 | fields := strings.FieldsFunc(val, func(r rune) bool { return r == sep }) 181 | for _, field := range fields { 182 | m[field] = true 183 | } 184 | return m 185 | } 186 | 187 | func SeperatedAttrChecker(name string, sep rune) func(...string) Checker { 188 | return func(classes ...string) Checker { 189 | return AttributeCmpChecker(name, func(val string) bool { 190 | s := fieldsToSet(val, sep) 191 | for _, class := range classes { 192 | if !s[class] { 193 | return false 194 | } 195 | } 196 | return true 197 | }) 198 | } 199 | } 200 | 201 | func Text_(rx *regexp.Regexp) Checker { 202 | return func(n *html.Node) *html.Node { 203 | if s := GetText(n); s != nil { 204 | if rx.MatchString(*s) { 205 | return n 206 | } 207 | } 208 | return nil 209 | } 210 | } 211 | 212 | func Text(pat string) Checker { 213 | return Text_(regexp.MustCompile(pat)) 214 | } 215 | 216 | func CaptionText(pat string) Checker { 217 | return Find(Caption, Text(pat)) 218 | } 219 | 220 | var ( 221 | Ahref = And(A, HasAttr("href")) 222 | ) 223 | -------------------------------------------------------------------------------- /expr/dfs.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014, Hǎiliàng Wáng. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package expr 6 | 7 | import ( 8 | "strconv" 9 | 10 | "golang.org/x/net/html" 11 | ) 12 | 13 | type Iter func() *html.Node 14 | 15 | // Pre-order depth first traversal in all descendants 16 | func Descendants(n *html.Node, cs ...Checker) Iter { 17 | c := And(cs...) 18 | s := NewStack() 19 | node := FirstChild(n) 20 | return func() *html.Node { 21 | for node != nil || s.Len() > 0 { 22 | if node != nil { 23 | s.Push(node) 24 | r := node 25 | node = FirstChild(node) 26 | if c(r) != nil { 27 | return r 28 | } 29 | } else { 30 | node = s.Pop() 31 | node = NextSibling(node) 32 | } 33 | } 34 | return nil 35 | } 36 | } 37 | 38 | func IterIter(next Iter, cs ...Checker) Iter { 39 | find := Find(cs...) 40 | return func() *html.Node { 41 | node := next() 42 | for node != nil { 43 | if n := find(node); n != nil { 44 | return n 45 | } 46 | } 47 | return nil 48 | } 49 | } 50 | 51 | func Children(n *html.Node, cs ...Checker) Iter { 52 | c := And(cs...) 53 | node := FirstChild(n) 54 | return func() *html.Node { 55 | for node != nil { 56 | r := node 57 | node = NextSibling(node) 58 | if c(r) != nil { 59 | return r 60 | } 61 | } 62 | return nil 63 | } 64 | } 65 | 66 | func Strings(next Iter, f StringGetter, pat ...string) []string { 67 | ss := []string{} 68 | p := GetPat(pat) 69 | 70 | // TODO: I have met a bug here once that the program hangs at the next() 71 | // function call, but I cannot find the data to reproduce it. So Just wait 72 | // and see it happens again. 73 | for node := next(); node != nil; node = next() { 74 | if s := f(node); s != nil { 75 | ss = append(ss, *GetSubmatch(s, p)) 76 | } 77 | } 78 | return ss 79 | } 80 | 81 | func Integers(next Iter, f StringGetter) []int { 82 | ss := []int{} 83 | for node := next(); node != nil; node = next() { 84 | s := f(node) 85 | if s != nil { 86 | if i, err := strconv.Atoi(*s); err == nil { 87 | ss = append(ss, i) 88 | } 89 | } 90 | } 91 | return ss 92 | } 93 | 94 | // FILO stack. 95 | type Stack struct { 96 | s []*html.Node 97 | } 98 | 99 | func NewStack() *Stack { 100 | return &Stack{} 101 | } 102 | 103 | func (s *Stack) Len() int { 104 | return len(s.s) 105 | } 106 | 107 | func (s *Stack) Push(n *html.Node) { 108 | s.s = append(s.s, n) 109 | } 110 | 111 | func (s *Stack) Pop() (n *html.Node) { 112 | if s.Len() == 0 { 113 | return nil 114 | } 115 | n, s.s = s.s[len(s.s)-1], s.s[:len(s.s)-1] 116 | return n 117 | } 118 | -------------------------------------------------------------------------------- /expr/getter.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014, Hǎiliàng Wáng. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package expr 6 | 7 | import ( 8 | "regexp" 9 | 10 | "golang.org/x/net/html" 11 | ) 12 | 13 | type StringGetter func(*html.Node) *string 14 | 15 | func GetAttr(n *html.Node, key string) *string { 16 | if n == nil { 17 | return nil 18 | } 19 | for _, a := range n.Attr { 20 | if a.Key == key { 21 | return &a.Val 22 | } 23 | } 24 | return nil 25 | } 26 | 27 | func GetAttrSubmatch(n *html.Node, key, pat string) *string { 28 | return GetSubmatch(GetAttr(n, key), pat) 29 | } 30 | 31 | func GetSubmatch_(s *string, rx *regexp.Regexp) *string { 32 | if s == nil { 33 | return nil 34 | } 35 | m := rx.FindStringSubmatch(*s) 36 | if m == nil || len(m) < 2 { 37 | return nil 38 | } 39 | return &m[1] 40 | } 41 | 42 | func GetSubmatch(s *string, pat string) *string { 43 | if pat == "" { 44 | return s 45 | } 46 | return GetSubmatch_(s, regexp.MustCompile(pat)) 47 | } 48 | 49 | func GetTextNodeText(n *html.Node) *string { 50 | if NonemptyTextNode(n) != nil { 51 | return &n.Data 52 | } 53 | return nil 54 | } 55 | 56 | func GetText(n *html.Node) *string { 57 | if s := GetTextNodeText(n); s != nil { 58 | return s 59 | } 60 | 61 | for c := FirstChild(n); c != nil; c = NextSibling(c) { 62 | if s := GetTextNodeText(c); s != nil { 63 | return s 64 | } 65 | } 66 | return nil 67 | } 68 | 69 | func GetSrc(n *html.Node) *string { 70 | if n == nil { 71 | return nil 72 | } 73 | return GetAttr(n, "src") 74 | } 75 | 76 | func GetHref(n *html.Node) *string { 77 | if n == nil { 78 | return nil 79 | } 80 | return GetAttr(n, "href") 81 | } 82 | 83 | func GetPat(pat []string) string { 84 | if len(pat) > 1 { 85 | panic("pat should be either ommited or only one string.") 86 | } else if len(pat) == 0 { 87 | return "" // empty string indicates that the whole string should be got. 88 | } 89 | return pat[0] 90 | } 91 | 92 | /* 93 | func AttrValueGetter(key string) StringGetter { 94 | return func(n *html.Node) *string { 95 | return GetAttrValue(n, key) 96 | } 97 | } 98 | */ 99 | -------------------------------------------------------------------------------- /expr/util.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014, Hǎiliàng Wáng. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package expr 6 | 7 | import ( 8 | "strconv" 9 | "strings" 10 | "time" 11 | ) 12 | 13 | func ToInt(ps *string) *int { 14 | if ps == nil { 15 | return nil 16 | } 17 | i, err := strconv.Atoi(strings.TrimSpace(*ps)) 18 | if err != nil { 19 | return nil 20 | } 21 | return &i 22 | } 23 | 24 | func ToFloat(ps *string) *float64 { 25 | if ps == nil { 26 | return nil 27 | } 28 | f, err := strconv.ParseFloat(strings.TrimSpace(*ps), 64) 29 | if err != nil { 30 | return nil 31 | } 32 | return &f 33 | } 34 | 35 | func ToHex(ps *string) *int { 36 | if ps == nil { 37 | return nil 38 | } 39 | i64, err := strconv.ParseInt(strings.TrimSpace(*ps), 16, 64) 40 | if err != nil { 41 | return nil 42 | } 43 | i := int(i64) 44 | return &i 45 | } 46 | 47 | func ToTime(ps *string, layout string) *time.Time { 48 | if ps == nil { 49 | return nil 50 | } 51 | t, err := time.Parse(layout, *ps) 52 | if err != nil { 53 | return nil 54 | } 55 | return &t 56 | } 57 | -------------------------------------------------------------------------------- /gen/.gitignore: -------------------------------------------------------------------------------- 1 | gen 2 | *.htm 3 | *.html 4 | -------------------------------------------------------------------------------- /gen/gen.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014, Hǎiliàng Wáng. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package main 6 | 7 | import ( 8 | "fmt" 9 | "io" 10 | "log" 11 | "os" 12 | "os/exec" 13 | "strings" 14 | ) 15 | 16 | func (spec *Spec) GenerateExpr() { 17 | elemTable, attrTable := spec.ElemTable, spec.AttrTable 18 | file := "output/auto_expr.go" 19 | f, err := os.Create(file) 20 | c(err) 21 | fp(f, `// Copyright 2017, Hǎiliàng Wáng. All rights reserved.`) 22 | fp(f, `// Use of this source code is governed by a BSD-style`) 23 | fp(f, `// license that can be found in the LICENSE file.`) 24 | fp(f, ``) 25 | fp(f, `package expr`) 26 | fp(f, "import (") 27 | fp(f, `"golang.org/x/net/html/atom"`) 28 | fp(f, ")") 29 | 30 | fp(f, "var (") 31 | for _, elem := range elemTable.Elements() { 32 | if elemTable.Skip[elem.Name] { 33 | continue 34 | } 35 | nodeId := toid(elem.Name) 36 | fp(f, nodeId, "=", "ElementChecker(atom.", toid(elem.Name), ")") 37 | } 38 | fp(f, ")") 39 | fp(f, "") 40 | 41 | fp(f, "var (") 42 | for _, attr := range attrTable.Attributes() { 43 | attrId := toid(attr.Name) 44 | if elemTable.Set[attr.Name] != nil { 45 | attrId += "_" 46 | } 47 | if strings.Contains(attr.Type, "space-separated tokens") { 48 | fp(f, attrId, ` = SeperatedAttrChecker("`, attr.Name, `", ' ')`) 49 | } else { 50 | fp(f, attrId, ` = AttrChecker("`, attr.Name, `")`) 51 | } 52 | } 53 | fp(f, ")") 54 | 55 | f.Close() 56 | format(file) 57 | } 58 | 59 | func (spec *Spec) GenerateChain() { 60 | file := "output/auto_chain.go" 61 | f, err := os.Create(file) 62 | c(err) 63 | fp(f, `// Copyright 2017, Hǎiliàng Wáng. All rights reserved.`) 64 | fp(f, `// Use of this source code is governed by a BSD-style`) 65 | fp(f, `// license that can be found in the LICENSE file.`) 66 | fp(f, ``) 67 | fp(f, "package query") 68 | fp(f, "import (") 69 | fp(f, `. "h12.io/html-query/expr"`) 70 | fp(f, ")") 71 | 72 | spec.generateChainSmall(f) 73 | 74 | //spec.generateChainBloaded(f) 75 | 76 | fp(f, "") 77 | f.Close() 78 | format(file) 79 | } 80 | 81 | func (spec *Spec) generateChainSmall(f io.Writer) { 82 | elemTable, attrTable := spec.ElemTable, spec.AttrTable 83 | for _, elem := range elemTable.Elements() { 84 | if elemTable.Skip[elem.Name] { 85 | continue 86 | } 87 | nodeId := toid(elem.Name) 88 | nodeFinderSmall(f, nodeId) 89 | } 90 | 91 | for _, attr := range attrTable.Attributes() { 92 | attrId := toid(attr.Name) 93 | if elemTable.Set[attr.Name] != nil { 94 | attrId += "_" 95 | } 96 | nodeAttribute(f, attr.Name, attrId, "Node") 97 | } 98 | } 99 | 100 | func (spec *Spec) generateChainBloaded(f io.Writer) { 101 | elemTable, attrTable := spec.ElemTable, spec.AttrTable 102 | for _, elem := range elemTable.Elements() { 103 | if elemTable.Skip[elem.Name] { 104 | continue 105 | } 106 | nodeId := toid(elem.Name) 107 | nodeType := nodeId + "Node" 108 | fp(f, "") 109 | fp(f, "// ", nodeId) 110 | fp(f, "") 111 | nodeDefinition(f, nodeType) 112 | nodeConstructor(f, nodeType) 113 | finderName := nodeId 114 | if a, ok := attrTable.Set[elem.Name]; ok && a.IsGlobal { 115 | finderName += "Node" 116 | } 117 | nodeFinder(f, finderName, nodeId, nodeType) 118 | for _, attr := range elem.Attributes { 119 | if !attr.IsGlobal { 120 | attrId := toid(attr.Name) 121 | nodeAttribute(f, attr.Name, attrId, nodeType) 122 | } 123 | } 124 | fp(f, "") 125 | } 126 | 127 | for _, attr := range attrTable.Attributes() { 128 | if attr.IsGlobal { 129 | attrId := toid(attr.Name) 130 | nodeAttribute(f, attr.Name, attrId, "Node") 131 | } 132 | } 133 | } 134 | 135 | func nodeDefinition(f io.Writer, name string) { 136 | fp(f, "type ", name, " struct {") 137 | fp(f, "Node") 138 | fp(f, "}") 139 | fp(f, "") 140 | } 141 | 142 | func nodeConstructor(f io.Writer, name string) { 143 | fp(f, "func New", name, "(n *Node) *", name, "{") 144 | fp(f, "if n == nil {") 145 | fp(f, "return nil") 146 | fp(f, "}") 147 | fp(f, "return &", name, "{*n}") 148 | fp(f, "}") 149 | fp(f, "") 150 | } 151 | 152 | func nodeFinder(f io.Writer, finderName, nodeId, nodeType string) { 153 | fp(f, "func (n *Node) ", finderName, "(cs ...Checker) *", nodeType, " {") 154 | fp(f, "return New", nodeType, "(n.find(", nodeId, ", cs))") 155 | fp(f, "}") 156 | fp(f, "") 157 | } 158 | 159 | func nodeFinderSmall(f io.Writer, nodeId string) { 160 | fp(f, "func (n *Node) ", nodeId, "(cs ...Checker) *Node {") 161 | fp(f, "return n.find(", nodeId, ", cs)") 162 | fp(f, "}") 163 | fp(f, "") 164 | } 165 | 166 | func nodeAttribute(f io.Writer, attrName, attrId, nodeType string) { 167 | fp(f, "func (n *", nodeType, ") ", attrId, "(pat ...string) *string {") 168 | fp(f, `return n.Attr("`, attrName, `", pat...)`) 169 | fp(f, "}") 170 | fp(f, "") 171 | 172 | } 173 | 174 | func toid(s string) string { 175 | s = strings.Title(s) 176 | s = strings.Replace(s, "-", "", -1) 177 | s = strings.Replace(s, " ", "", -1) 178 | return s 179 | } 180 | 181 | func format(file string) { 182 | cmd := exec.Command("go", "fmt", file) 183 | c(cmd.Start()) 184 | if err := cmd.Wait(); err != nil { 185 | log.Fatal("go fmt " + file + ": " + err.Error()) 186 | } 187 | } 188 | 189 | func fp(w io.Writer, v ...interface{}) { 190 | fmt.Fprint(w, v...) 191 | fmt.Fprintln(w) 192 | } 193 | -------------------------------------------------------------------------------- /gen/main.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014, Hǎiliàng Wáng. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package main 6 | 7 | import ( 8 | "os" 9 | ) 10 | 11 | func main() { 12 | htmlSpec := "html_spec.htm" 13 | f, err := os.Open(htmlSpec) 14 | if err != nil { 15 | DownloadSpec(htmlSpec) 16 | f, err = os.Open(htmlSpec) 17 | c(err) 18 | } 19 | defer f.Close() 20 | 21 | spec := parseSpec(f) 22 | 23 | os.Mkdir("output", 0755) 24 | spec.GenerateExpr() 25 | spec.GenerateChain() 26 | } 27 | -------------------------------------------------------------------------------- /gen/spec.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014, Hǎiliàng Wáng. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package main 6 | 7 | import ( 8 | "io" 9 | "net/http" 10 | "os" 11 | "sort" 12 | "strings" 13 | 14 | "h12.io/html-query" 15 | . "h12.io/html-query/expr" 16 | ) 17 | 18 | const ( 19 | SpecUrl = `https://html.spec.whatwg.org/multipage/indices.html` 20 | 21 | // SPEC_URL = `http://www.w3.org/html/wg/drafts/html/master/single-page.html` 22 | ) 23 | 24 | // skip the two because they does not appear in exp/html 25 | var ( 26 | ignoredElements = map[string]bool{ 27 | "MathML": true, 28 | "SVG": true, 29 | "main": true, 30 | "menuitem": true, 31 | "template": true, 32 | "picture": true, 33 | "slot": true, 34 | 35 | "autonomous custom elements": true, 36 | } 37 | ) 38 | 39 | type Element struct { 40 | Name string 41 | Attributes []*Attribute 42 | } 43 | 44 | type Attribute struct { 45 | Name string 46 | Type string 47 | IsGlobal bool 48 | } 49 | 50 | type AttributeTable struct { 51 | Set map[string]*Attribute 52 | } 53 | 54 | type ElementTable struct { 55 | Set map[string]*Element 56 | Skip map[string]bool 57 | } 58 | 59 | type Spec struct { 60 | ElemTable *ElementTable 61 | AttrTable *AttributeTable 62 | } 63 | 64 | func DownloadSpec(file string) { 65 | resp, err := http.Get(SpecUrl) 66 | c(err) 67 | defer resp.Body.Close() 68 | f, err := os.Create(file) 69 | c(err) 70 | defer f.Close() 71 | io.Copy(f, resp.Body) 72 | } 73 | 74 | func parseSpec(file io.Reader) *Spec { 75 | root, err := query.Parse(file) 76 | c(err) 77 | attrTable := parseAttributeTable(root) 78 | elemTable := parseElementTable(root, attrTable) 79 | return &Spec{elemTable, attrTable} 80 | } 81 | 82 | func (t *ElementTable) Elements() []*Element { 83 | names := make([]string, len(t.Set)) 84 | i := 0 85 | for k, _ := range t.Set { 86 | names[i] = k 87 | i++ 88 | } 89 | sort.Sort(sort.StringSlice(names)) 90 | elements := make([]*Element, len(names)) 91 | for i := range elements { 92 | elements[i] = t.Set[names[i]] 93 | } 94 | return elements 95 | } 96 | 97 | func (t *AttributeTable) Attributes() []*Attribute { 98 | names := make([]string, len(t.Set)) 99 | i := 0 100 | for k, _ := range t.Set { 101 | names[i] = k 102 | i++ 103 | } 104 | sort.Sort(sort.StringSlice(names)) 105 | attrs := make([]*Attribute, len(names)) 106 | for i := range attrs { 107 | attrs[i] = t.Set[names[i]] 108 | } 109 | return attrs 110 | } 111 | 112 | func parseAttributeTable(root *query.Node) *AttributeTable { 113 | attrSet := make(map[string]*Attribute) 114 | attrTable := root.Table(CaptionText("List of attributes")) 115 | if attrTable == nil { 116 | panic("Cannot find List of attributes") 117 | } 118 | for _, tr := range attrTable.Tbody().Children(Tr).All() { 119 | name := *tr.Th().Code().Text() 120 | attr := &Attribute{Name: name} 121 | td := tr.Children(Td).All() 122 | if elemName := td[0].A().Text(); elemName != nil { 123 | if *elemName == "HTML elements" { 124 | attr.IsGlobal = true 125 | } 126 | } 127 | attr.Type = strings.Replace(*td[2].PlainText(), "\n", "", -1) 128 | // Attention: attribute may be duplicated, just choose the first one 129 | // but set isglobal if one of it is global 130 | if attrSet[name] == nil { 131 | attrSet[name] = attr 132 | } else if attr.IsGlobal { 133 | attrSet[name].IsGlobal = true 134 | } 135 | } 136 | return &AttributeTable{Set: attrSet} 137 | } 138 | func parseElementTable(root *query.Node, attrTable *AttributeTable) *ElementTable { 139 | elemSet := make(map[string]*Element) 140 | attrSet := attrTable.Set 141 | elementTable := root.Table(CaptionText("List of elements")) 142 | for _, tr := range elementTable.Tbody().Children(Tr).All() { 143 | td := tr.Children(Td).All() 144 | for _, elemLink := range tr.Th().Descendants(Ahref).All() { 145 | elem := &Element{Name: strings.TrimSpace(*elemLink.Text())} 146 | for _, attrLink := range td[4].Descendants(Ahref).All() { 147 | attrName := strings.TrimSpace(*attrLink.Text()) 148 | if attr := attrSet[attrName]; attr != nil { 149 | elem.Attributes = append(elem.Attributes, attr) 150 | } 151 | } 152 | elemSet[elem.Name] = elem 153 | } 154 | } 155 | return &ElementTable{ 156 | Set: elemSet, 157 | Skip: ignoredElements, 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /gen/util.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014, Hǎiliàng Wáng. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package main 6 | 7 | import ( 8 | "log" 9 | ) 10 | 11 | func c(err error) { 12 | if err != nil { 13 | panic(err) 14 | log.Fatal(err) 15 | } 16 | } 17 | 18 | func p(v ...interface{}) { 19 | log.Println(v...) 20 | } 21 | -------------------------------------------------------------------------------- /node.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014, Hǎiliàng Wáng. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package query // import "h12.io/html-query" 6 | 7 | import ( 8 | "bytes" 9 | "io" 10 | "regexp" 11 | "strings" 12 | 13 | "golang.org/x/net/html" 14 | . "h12.io/html-query/expr" 15 | ) 16 | 17 | // Node represents a HTML node. 18 | // Wrap html.Node so that chainable interface is possible 19 | // Use pointer of it because we want to test with nil. 20 | type Node struct { 21 | n html.Node 22 | } 23 | 24 | func NewNode(n *html.Node) *Node { 25 | if n == nil { 26 | return nil 27 | } 28 | return &Node{*n} 29 | } 30 | 31 | func Parse(r io.Reader) (*Node, error) { 32 | n, err := html.Parse(r) 33 | if err != nil { 34 | return nil, err 35 | } 36 | return NewNode(n), nil 37 | } 38 | 39 | func (n *Node) InternalNode() *html.Node { 40 | return &n.n 41 | } 42 | 43 | func (n *Node) Attr(key string, pat ...string) *string { 44 | if n == nil { 45 | return nil 46 | } 47 | return GetAttrSubmatch(n.InternalNode(), key, GetPat(pat)) 48 | } 49 | 50 | /* 51 | func (n *Node) AttrSubmatch(key, pat string) *string { 52 | if n == nil { 53 | return nil 54 | } 55 | return GetAttrSubmatch(n.InternalNode(), key, pat) 56 | } 57 | */ 58 | 59 | func (n *Node) getAttr(key string, pat ...string) *string { 60 | return n.Attr(key, pat...) 61 | } 62 | 63 | func (n *Node) Text(pat ...string) *string { 64 | if n == nil { 65 | return nil 66 | } 67 | return GetSubmatch(GetText(&n.n), GetPat(pat)) 68 | } 69 | 70 | func (n *Node) AllText(pat ...string) *string { 71 | ss := []string{} 72 | for _, n := range n.Descendants(TextNode).All() { 73 | if text := n.Text(pat...); text != nil && *text != "" { 74 | ss = append(ss, *text) 75 | } 76 | } 77 | s := html.UnescapeString(strings.Join(ss, " ")) 78 | if s != "" { 79 | return &s 80 | } 81 | return nil 82 | } 83 | 84 | func (n *Node) Render() *string { 85 | if n == nil { 86 | return nil 87 | } 88 | var b bytes.Buffer 89 | err := html.Render(&b, &n.n) 90 | if err != nil { 91 | return nil 92 | } 93 | s := b.String() 94 | return &s 95 | } 96 | 97 | func (n *Node) RenderTagOnly() *string { 98 | if n == nil { 99 | return nil 100 | } 101 | var b bytes.Buffer 102 | 103 | if n.n.Type == html.ElementNode { 104 | err := renderOpeningTag(&b, &n.n) 105 | if err != nil { 106 | return nil 107 | } 108 | } else { 109 | err := renderSimpleNode(&b, &n.n) 110 | if err != nil { 111 | return nil 112 | } 113 | } 114 | s := b.String() 115 | return &s 116 | 117 | } 118 | 119 | func (n *Node) RenderChildren() *string { 120 | if n == nil { 121 | return nil 122 | } 123 | var b bytes.Buffer 124 | node := FirstChild(&n.n) 125 | for node != nil { 126 | err := html.Render(&b, node) 127 | if err != nil { 128 | return nil 129 | } 130 | node = node.NextSibling 131 | } 132 | s := b.String() 133 | return &s 134 | } 135 | 136 | type TextNodeNode struct { 137 | Node 138 | rx *regexp.Regexp 139 | } 140 | 141 | func NewTextNodeNode(n *Node, rx *regexp.Regexp) *TextNodeNode { 142 | if n == nil { 143 | return nil 144 | } 145 | return &TextNodeNode{*n, rx} 146 | } 147 | 148 | func (n *TextNodeNode) Submatch() *string { 149 | val := n.Text() 150 | if val == nil { 151 | return nil 152 | } 153 | m := n.rx.FindStringSubmatch(*val) 154 | if m == nil || len(m) < 2 { 155 | return nil 156 | } 157 | return &m[1] 158 | } 159 | 160 | type NodeIter struct { 161 | Iter 162 | } 163 | 164 | func (i NodeIter) find(c Checker, cs []Checker) NodeIter { 165 | return NodeIter{IterIter(i.Iter, also(c, cs)...)} 166 | } 167 | 168 | func (i NodeIter) Find(cs ...Checker) NodeIter { 169 | return NodeIter{IterIter(i.Iter, cs...)} 170 | } 171 | 172 | func (i NodeIter) For(visit func(n *Node)) { 173 | for n := i.Next(); n != nil; n = i.Next() { 174 | visit(n) 175 | } 176 | } 177 | 178 | func (i NodeIter) Next() *Node { 179 | next := i.Iter 180 | if next == nil { 181 | return nil 182 | } 183 | if node := next(); node != nil { 184 | return NewNode(node) 185 | } 186 | return nil 187 | } 188 | 189 | func (i NodeIter) All() (nodes []*Node) { 190 | if i.Iter == nil { 191 | return 192 | } 193 | next := i.Iter 194 | for node := next(); node != nil; node = next() { 195 | nodes = append(nodes, NewNode(node)) 196 | } 197 | return 198 | } 199 | 200 | func (i NodeIter) Strings(f StringGetter, pat ...string) []string { 201 | if i.Iter == nil { 202 | return nil 203 | } 204 | return Strings(i.Iter, f, pat...) 205 | } 206 | 207 | func (i NodeIter) Integers(f StringGetter) []int { 208 | if i.Iter == nil { 209 | return nil 210 | } 211 | return Integers(i.Iter, f) 212 | } 213 | 214 | type NodeStack struct { 215 | *Stack 216 | } 217 | 218 | /* 219 | func (s NodeStack) All() (nodes []*Node) { 220 | for _, node := range s.Stack.s { 221 | nodes = append(nodes, NewNode(node)) 222 | } 223 | return 224 | } 225 | */ 226 | 227 | // --------------------------------- 228 | // If needed, autogenerate these routines 229 | 230 | // node methods 231 | 232 | func (i NodeIter) A(cs ...Checker) NodeIter { 233 | return i.find(A, cs) 234 | } 235 | 236 | func (i NodeIter) H2(cs ...Checker) NodeIter { 237 | return i.find(H2, cs) 238 | } 239 | 240 | func (i NodeIter) H3(cs ...Checker) NodeIter { 241 | return i.find(H3, cs) 242 | } 243 | func (i NodeIter) H4(cs ...Checker) NodeIter { 244 | return i.find(H4, cs) 245 | } 246 | 247 | func (i NodeIter) Div(cs ...Checker) NodeIter { 248 | return i.find(Div, cs) 249 | } 250 | 251 | func (i NodeIter) Td(cs ...Checker) NodeIter { 252 | return i.find(Td, cs) 253 | } 254 | 255 | // attr methods 256 | 257 | func (i NodeIter) Href(pat ...string) []string { 258 | if i.Iter == nil { 259 | return nil 260 | } 261 | return Strings(i.Iter, GetHref, pat...) 262 | } 263 | -------------------------------------------------------------------------------- /plain.go: -------------------------------------------------------------------------------- 1 | // Copyright 2005, Hǎiliàng Wáng. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package query 6 | 7 | import ( 8 | "bytes" 9 | "strings" 10 | 11 | "golang.org/x/net/html" 12 | "h12.io/html-query/expr" 13 | ) 14 | 15 | func (n *Node) PlainText() *string { 16 | if n == nil { 17 | return nil 18 | } 19 | var w bytes.Buffer 20 | if err := renderPlain(&w, &n.n); err != nil { 21 | return nil 22 | } 23 | s := strings.TrimSpace(w.String()) 24 | return &s 25 | } 26 | 27 | func renderPlain(w writer, n *html.Node) error { 28 | switch n.Type { 29 | case html.TextNode: 30 | w.WriteString(n.Data) 31 | case html.DocumentNode: 32 | for c := n.FirstChild; c != nil; c = c.NextSibling { 33 | if err := renderPlain(w, c); err != nil { 34 | return err 35 | } 36 | } 37 | return nil 38 | case html.ElementNode: 39 | return renderPlainElementNode(w, n) 40 | } 41 | return nil 42 | } 43 | 44 | func renderPlainElementNode(w writer, n *html.Node) error { 45 | if c := n.FirstChild; c != nil && c.Type == html.TextNode && strings.HasPrefix(c.Data, "\n") { 46 | switch n.Data { 47 | case "pre", "listing", "textarea": 48 | if err := w.WriteByte('\n'); err != nil { 49 | return err 50 | } 51 | } 52 | } 53 | 54 | switch n.Data { 55 | case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp": 56 | for c := n.FirstChild; c != nil; c = c.NextSibling { 57 | if c.Type == html.TextNode { 58 | if _, err := w.WriteString(c.Data); err != nil { 59 | return err 60 | } 61 | } else { 62 | if err := renderPlain(w, c); err != nil { 63 | return err 64 | } 65 | } 66 | } 67 | if n.Data == "plaintext" { 68 | return plaintextAbort 69 | } 70 | return nil 71 | case "a": 72 | if n.FirstChild != nil && isURL(n.FirstChild.Data) { 73 | renderPlainChild(w, n) 74 | } else if url := expr.GetAttr(n, "href"); url != nil && *url != "" { 75 | w.WriteString("[") 76 | renderPlainChild(w, n) 77 | w.WriteString("](") 78 | w.WriteString(*url) 79 | w.WriteString(")") 80 | } else { 81 | renderPlainChild(w, n) 82 | } 83 | return nil 84 | } 85 | 86 | renderPlainChild(w, n) 87 | // write break after children are written 88 | switch n.Data { 89 | case "p", "br", "div": 90 | writeBreak(w) 91 | } 92 | return nil 93 | } 94 | 95 | func isURL(s string) bool { 96 | s = strings.TrimSpace(s) 97 | return strings.Contains(s, "http://") || 98 | strings.Contains(s, "@") 99 | } 100 | 101 | func renderPlainChild(w writer, n *html.Node) error { 102 | for c := n.FirstChild; c != nil; c = c.NextSibling { 103 | if err := renderPlain(w, c); err != nil { 104 | return err 105 | } 106 | } 107 | return nil 108 | } 109 | -------------------------------------------------------------------------------- /pretty.go: -------------------------------------------------------------------------------- 1 | // The code in this file is copied and modified from 2 | // http://code.google.com/p/go.net. 3 | 4 | // Copyright 2011 The Go Authors. All rights reserved. 5 | // Use of this source code is governed by a BSD-style 6 | // license that can be found in the LICENSE file under 7 | // http://code.google.com/p/go.net. 8 | 9 | package query 10 | 11 | import ( 12 | "bufio" 13 | "errors" 14 | "fmt" 15 | "io" 16 | "os" 17 | "strings" 18 | 19 | "golang.org/x/net/html" 20 | ) 21 | 22 | type writer interface { 23 | io.Writer 24 | WriteByte(c byte) error // in Go 1.1, use io.ByteWriter 25 | WriteString(string) (int, error) 26 | } 27 | 28 | func (n *Node) PrettyPrint() { 29 | n.PrettyRender(os.Stdout, 4) 30 | } 31 | 32 | // PrettyRender renders prettily the parse tree n to the given writer, 33 | // for easily viewing as plain text. 34 | func (n *Node) PrettyRender(w io.Writer, indentSize int) error { 35 | if n == nil { 36 | return nil 37 | } 38 | if x, ok := w.(writer); ok { 39 | return render(x, &n.n, indentSize) 40 | } 41 | buf := bufio.NewWriter(w) 42 | if err := render(buf, &n.n, indentSize); err != nil { 43 | return err 44 | } 45 | return buf.Flush() 46 | } 47 | 48 | // plaintextAbort is returned from render1 when a element 49 | // has been rendered. No more end tags should be rendered after that. 50 | var plaintextAbort = errors.New("html: internal error (plaintext abort)") 51 | 52 | func render(w writer, n *html.Node, size int) error { 53 | err := render1(w, n, -1, size) 54 | if err == plaintextAbort { 55 | err = nil 56 | } 57 | return err 58 | } 59 | 60 | func render1(w writer, n *html.Node, level, size int) error { 61 | if !isSpace(n) && n.Type != html.DocumentNode && n.Type != html.DoctypeNode { 62 | if err := writeBreak(w); err != nil { 63 | return err 64 | } 65 | if err := writeIndent(w, level, size); err != nil { 66 | return err 67 | } 68 | } 69 | 70 | if err := renderSimpleNode(w, n); err != nil { 71 | return err 72 | } 73 | 74 | switch n.Type { 75 | case html.DocumentNode: 76 | for c := n.FirstChild; c != nil; c = c.NextSibling { 77 | if err := render1(w, c, level+1, size); err != nil { 78 | return err 79 | } 80 | } 81 | return nil 82 | case html.ElementNode: 83 | return renderElementNode(w, n, level, size) 84 | } 85 | return nil 86 | } 87 | 88 | func renderSimpleNode(w writer, n *html.Node) error { 89 | // Render non-element nodes; these are the easy cases. 90 | switch n.Type { 91 | case html.ErrorNode: 92 | return errors.New("html: cannot render an html.ErrorNode node") 93 | case html.TextNode: 94 | return escape(w, n.Data) 95 | case html.CommentNode: 96 | if _, err := w.WriteString("<!--"); err != nil { 97 | return err 98 | } 99 | if _, err := w.WriteString(n.Data); err != nil { 100 | return err 101 | } 102 | if _, err := w.WriteString("-->"); err != nil { 103 | return err 104 | } 105 | return nil 106 | case html.DoctypeNode: 107 | if _, err := w.WriteString("<!DOCTYPE "); err != nil { 108 | return err 109 | } 110 | if _, err := w.WriteString(n.Data); err != nil { 111 | return err 112 | } 113 | if n.Attr != nil { 114 | var p, s string 115 | for _, a := range n.Attr { 116 | switch a.Key { 117 | case "public": 118 | p = a.Val 119 | case "system": 120 | s = a.Val 121 | } 122 | } 123 | if p != "" { 124 | if _, err := w.WriteString(" PUBLIC "); err != nil { 125 | return err 126 | } 127 | if err := writeQuoted(w, p); err != nil { 128 | return err 129 | } 130 | if s != "" { 131 | if err := w.WriteByte(' '); err != nil { 132 | return err 133 | } 134 | if err := writeQuoted(w, s); err != nil { 135 | return err 136 | } 137 | } 138 | } else if s != "" { 139 | if _, err := w.WriteString(" SYSTEM "); err != nil { 140 | return err 141 | } 142 | if err := writeQuoted(w, s); err != nil { 143 | return err 144 | } 145 | } 146 | } 147 | if err := w.WriteByte('>'); err != nil { 148 | return err 149 | } 150 | return nil 151 | case html.ElementNode, html.DocumentNode: 152 | // No-op. 153 | default: 154 | return errors.New("html: unknown node type") 155 | } 156 | return nil 157 | } 158 | 159 | func renderElementNode(w writer, n *html.Node, level, size int) error { 160 | if err := renderOpeningTag(w, n); err != nil { 161 | return err 162 | } 163 | 164 | // Add initial newline where there is danger of a newline beging ignored. 165 | if c := n.FirstChild; c != nil && c.Type == html.TextNode && strings.HasPrefix(c.Data, "\n") { 166 | switch n.Data { 167 | case "pre", "listing", "textarea": 168 | if err := w.WriteByte('\n'); err != nil { 169 | return err 170 | } 171 | } 172 | } 173 | 174 | // Render any child nodes. 175 | switch n.Data { 176 | case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp": 177 | for c := n.FirstChild; c != nil; c = c.NextSibling { 178 | if c.Type == html.TextNode { 179 | if _, err := w.WriteString(c.Data); err != nil { 180 | return err 181 | } 182 | } else { 183 | if err := render1(w, c, level+1, size); err != nil { 184 | return err 185 | } 186 | } 187 | } 188 | if n.Data == "plaintext" { 189 | // Don't render anything else. <plaintext> must be the 190 | // last element in the file, with no closing tag. 191 | return plaintextAbort 192 | } 193 | default: 194 | for c := n.FirstChild; c != nil; c = c.NextSibling { 195 | if err := render1(w, c, level+1, size); err != nil { 196 | return err 197 | } 198 | } 199 | } 200 | 201 | // Render the </xxx> closing tag. 202 | if err := writeBreak(w); err != nil { 203 | return err 204 | } 205 | if err := writeIndent(w, level, size); err != nil { 206 | return err 207 | } 208 | if _, err := w.WriteString("</"); err != nil { 209 | return err 210 | } 211 | if _, err := w.WriteString(n.Data); err != nil { 212 | return err 213 | } 214 | if err := w.WriteByte('>'); err != nil { 215 | return err 216 | } 217 | return nil 218 | } 219 | 220 | // writeQuoted writes s to w surrounded by quotes. Normally it will use double 221 | // quotes, but if s contains a double quote, it will use single quotes. 222 | // It is used for writing the identifiers in a doctype declaration. 223 | // In valid HTML, they can't contain both types of quotes. 224 | func writeQuoted(w writer, s string) error { 225 | var q byte = '"' 226 | if strings.Contains(s, `"`) { 227 | q = '\'' 228 | } 229 | if err := w.WriteByte(q); err != nil { 230 | return err 231 | } 232 | if _, err := w.WriteString(s); err != nil { 233 | return err 234 | } 235 | if err := w.WriteByte(q); err != nil { 236 | return err 237 | } 238 | return nil 239 | } 240 | 241 | // Section 12.1.2, "Elements", gives this list of void elements. Void elements 242 | // are those that can't have any contents. 243 | var voidElements = map[string]bool{ 244 | "area": true, 245 | "base": true, 246 | "br": true, 247 | "col": true, 248 | "command": true, 249 | "embed": true, 250 | "hr": true, 251 | "img": true, 252 | "input": true, 253 | "keygen": true, 254 | "link": true, 255 | "meta": true, 256 | "param": true, 257 | "source": true, 258 | "track": true, 259 | "wbr": true, 260 | } 261 | 262 | const escapedChars = "&'<>\"\r" 263 | 264 | func escape(w writer, s string) error { 265 | s = strings.TrimSpace(s) 266 | 267 | i := strings.IndexAny(s, escapedChars) 268 | for i != -1 { 269 | if _, err := w.WriteString(s[:i]); err != nil { 270 | return err 271 | } 272 | var esc string 273 | switch s[i] { 274 | case '&': 275 | esc = "&amp;" 276 | case '\'': 277 | // "&#39;" is shorter than "&apos;" and apos was not in HTML until HTML5. 278 | esc = "&#39;" 279 | case '<': 280 | esc = "&lt;" 281 | case '>': 282 | esc = "&gt;" 283 | case '"': 284 | // "&#34;" is shorter than "&quot;". 285 | esc = "&#34;" 286 | case '\r': 287 | esc = "&#13;" 288 | default: 289 | panic("unrecognized escape character") 290 | } 291 | s = s[i+1:] 292 | if _, err := w.WriteString(esc); err != nil { 293 | return err 294 | } 295 | i = strings.IndexAny(s, escapedChars) 296 | } 297 | _, err := w.WriteString(s) 298 | return err 299 | } 300 | 301 | func writeIndent(w writer, level, size int) error { 302 | for i := 0; i < level*size; i++ { 303 | if _, err := w.WriteString(` `); err != nil { 304 | return err 305 | } 306 | } 307 | return nil 308 | } 309 | 310 | func writeBreak(w writer) error { 311 | _, err := w.Write([]byte{'\n'}) 312 | return err 313 | } 314 | 315 | func isSpace(n *html.Node) bool { 316 | return n != nil && n.Type == html.TextNode && strings.TrimSpace(n.Data) == "" 317 | } 318 | 319 | func renderOpeningTag(w writer, n *html.Node) error { 320 | // Render the <xxx> opening tag. 321 | if err := w.WriteByte('<'); err != nil { 322 | return err 323 | } 324 | if _, err := w.WriteString(n.Data); err != nil { 325 | return err 326 | } 327 | for _, a := range n.Attr { 328 | if err := w.WriteByte(' '); err != nil { 329 | return err 330 | } 331 | if a.Namespace != "" { 332 | if _, err := w.WriteString(a.Namespace); err != nil { 333 | return err 334 | } 335 | if err := w.WriteByte(':'); err != nil { 336 | return err 337 | } 338 | } 339 | if _, err := w.WriteString(a.Key); err != nil { 340 | return err 341 | } 342 | if _, err := w.WriteString(`="`); err != nil { 343 | return err 344 | } 345 | if err := escape(w, a.Val); err != nil { 346 | return err 347 | } 348 | if err := w.WriteByte('"'); err != nil { 349 | return err 350 | } 351 | } 352 | if voidElements[n.Data] { 353 | if n.FirstChild != nil { 354 | return fmt.Errorf("html: void element <%s> has child nodes", n.Data) 355 | } 356 | _, err := w.WriteString("/>") 357 | if err != nil { 358 | return err 359 | } 360 | return nil 361 | } 362 | if err := w.WriteByte('>'); err != nil { 363 | return err 364 | } 365 | return nil 366 | } 367 | -------------------------------------------------------------------------------- /util.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014, Hǎiliàng Wáng. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package query 6 | 7 | import ( 8 | "fmt" 9 | ) 10 | 11 | func p(v ...interface{}) { 12 | fmt.Println(v...) 13 | } 14 | --------------------------------------------------------------------------------