├── .godocdown.md ├── .travis.yml ├── LICENSE.md ├── README.md ├── decoder.go ├── decoder_test.go ├── doc.go ├── go.mod ├── go.sum ├── unmarshal-error.go ├── unmarshal.go ├── unmarshal_test.go └── util.go /.godocdown.md: -------------------------------------------------------------------------------- 1 | # goq 2 | [![Build Status](https://travis-ci.org/andrewstuart/goq.svg?branch=master)](https://travis-ci.org/andrewstuart/goq) 3 | [![GoDoc](https://godoc.org/astuart.co/goq?status.svg)](https://godoc.org/astuart.co/goq) 4 | [![Coverage Status](https://coveralls.io/repos/github/andrewstuart/goq/badge.svg?branch=master)](https://coveralls.io/github/andrewstuart/goq?branch=master) 5 | [![Go Report Card](https://goreportcard.com/badge/astuart.co/goq)](https://goreportcard.com/report/astuart.co/goq) 6 | 7 | ## Example 8 | 9 | ```go 10 | import ( 11 | "log" 12 | "net/http" 13 | 14 | "astuart.co/goq" 15 | ) 16 | 17 | // Structured representation for github file name table 18 | type example struct { 19 | Title string `goquery:"h1"` 20 | Files []string `goquery:"table.files tbody tr.js-navigation-item td.content,text"` 21 | } 22 | 23 | func main() { 24 | res, err := http.Get("https://github.com/andrewstuart/goq") 25 | if err != nil { 26 | log.Fatal(err) 27 | } 28 | defer res.Body.Close() 29 | 30 | var ex example 31 | 32 | err = goq.NewDecoder(res.Body).Decode(&ex) 33 | if err != nil { 34 | log.Fatal(err) 35 | } 36 | 37 | log.Println(ex.Title, ex.Files) 38 | } 39 | ``` 40 | 41 | ## Details 42 | 43 | {{ .Emit }} 44 | 45 | ## TODO 46 | 47 | - Callable goquery methods with args, via reflection 48 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | go_import_path: astuart.co/goq 3 | 4 | matrix: 5 | allow_failures: 6 | - go: tip 7 | 8 | before_install: 9 | - go get github.com/mattn/goveralls 10 | script: 11 | - $HOME/gopath/bin/goveralls -service=travis-ci 12 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | Copyright (c) 2017 Andrew Stuart 3 | 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 18 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 19 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 21 | OR OTHER DEALINGS IN THE SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # goq 2 | [![Build Status](https://travis-ci.org/andrewstuart/goq.svg?branch=master)](https://travis-ci.org/andrewstuart/goq) 3 | [![GoDoc](https://godoc.org/astuart.co/goq?status.svg)](https://godoc.org/astuart.co/goq) 4 | [![Coverage Status](https://coveralls.io/repos/github/andrewstuart/goq/badge.svg?branch=master)](https://coveralls.io/github/andrewstuart/goq?branch=master) 5 | [![Go Report Card](https://goreportcard.com/badge/astuart.co/goq)](https://goreportcard.com/report/astuart.co/goq) 6 | 7 | ## Example 8 | 9 | ```go 10 | import ( 11 | "log" 12 | "net/http" 13 | 14 | "astuart.co/goq" 15 | ) 16 | 17 | // Structured representation for github file name table 18 | type example struct { 19 | Title string `goquery:"h1"` 20 | Files []string `goquery:"table.files tbody tr.js-navigation-item td.content,text"` 21 | } 22 | 23 | func main() { 24 | res, err := http.Get("https://github.com/andrewstuart/goq") 25 | if err != nil { 26 | log.Fatal(err) 27 | } 28 | defer res.Body.Close() 29 | 30 | var ex example 31 | 32 | err = goq.NewDecoder(res.Body).Decode(&ex) 33 | if err != nil { 34 | log.Fatal(err) 35 | } 36 | 37 | log.Println(ex.Title, ex.Files) 38 | } 39 | ``` 40 | 41 | ## Details 42 | 43 | # goq 44 | -- 45 | import "astuart.co/goq" 46 | 47 | Package goq was built to allow users to declaratively unmarshal HTML into go 48 | structs using struct tags composed of css selectors. 49 | 50 | I've made a best effort to behave very similarly to JSON and XML decoding as 51 | well as exposing as much information as possible in the event of an error to 52 | help you debug your Unmarshaling issues. 53 | 54 | When creating struct types to be unmarshaled into, the following general rules 55 | apply: 56 | 57 | - Any type that implements the Unmarshaler interface will be passed a slice of 58 | *html.Node so that manual unmarshaling may be done. This takes the highest 59 | precedence. 60 | 61 | - Any struct fields may be annotated with goquery metadata, which takes the form 62 | of an element selector followed by arbitrary comma-separated "value selectors." 63 | 64 | - A value selector may be one of `html`, `text`, or `[someAttrName]`. `html` and 65 | `text` will result in the methods of the same name being called on the 66 | `*goquery.Selection` to obtain the value. `[someAttrName]` will result in 67 | `*goquery.Selection.Attr("someAttrName")` being called for the value. 68 | 69 | - A primitive value type will default to the text value of the resulting nodes 70 | if no value selector is given. 71 | 72 | - At least one value selector is required for maps, to determine the map key. 73 | The key type must follow both the rules applicable to go map indexing, as well 74 | as these unmarshaling rules. The value of each key will be unmarshaled in the 75 | same way the element value is unmarshaled. 76 | 77 | - For maps, keys will be retreived from the *same level* of the DOM. The key 78 | selector may be arbitrarily nested, though. The first level of children with any 79 | number of matching elements will be used, though. 80 | 81 | - For maps, any values *must* be nested *below* the level of the key selector. 82 | Parents or siblings of the element matched by the key selector will not be 83 | considered. 84 | 85 | - Once used, a "value selector" will be shifted off of the comma-separated list. 86 | This allows you to nest arbitrary levels of value selectors. For example, the 87 | type `[]map[string][]string` would require one selector for the map key, and 88 | take an optional second selector for the values of the string slice. 89 | 90 | - Any struct type encountered in nested types (e.g. map[string]SomeStruct) will 91 | override any remaining "value selectors" that had not been used. For example, 92 | given: 93 | 94 | struct S { 95 | F string `goquery:",[bang]"` 96 | } 97 | 98 | struct { 99 | T map[string]S `goquery:"#someId,[foo],[bar],[baz]"` 100 | } 101 | 102 | `[foo]` will be used to determine the string map key,but `[bar]` and `[baz]` 103 | will be ignored, with the `[bang]` tag present S struct type taking precedence. 104 | 105 | ## Usage 106 | 107 | #### func NodeSelector 108 | 109 | ```go 110 | func NodeSelector(nodes []*html.Node) *goquery.Selection 111 | ``` 112 | NodeSelector is a quick utility function to get a goquery.Selection from a slice 113 | of *html.Node. Useful for performing unmarshaling, since the decision was made 114 | to use []*html.Node for maximum flexibility. 115 | 116 | #### func Unmarshal 117 | 118 | ```go 119 | func Unmarshal(bs []byte, v interface{}) error 120 | ``` 121 | Unmarshal takes a byte slice and a destination pointer to any interface{}, and 122 | unmarshals the document into the destination based on the rules above. Any error 123 | returned here will likely be of type CannotUnmarshalError, though an initial 124 | goquery error will pass through directly. 125 | 126 | #### func UnmarshalSelection 127 | 128 | ```go 129 | func UnmarshalSelection(s *goquery.Selection, iface interface{}) error 130 | ``` 131 | UnmarshalSelection will unmarshal a goquery.goquery.Selection into an interface 132 | appropriately annoated with goquery tags. 133 | 134 | #### type CannotUnmarshalError 135 | 136 | ```go 137 | type CannotUnmarshalError struct { 138 | Err error 139 | Val string 140 | FldOrIdx interface{} 141 | } 142 | ``` 143 | 144 | CannotUnmarshalError represents an error returned by the goquery Unmarshaler and 145 | helps consumers in programmatically diagnosing the cause of their error. 146 | 147 | #### func (*CannotUnmarshalError) Error 148 | 149 | ```go 150 | func (e *CannotUnmarshalError) Error() string 151 | ``` 152 | 153 | #### type Decoder 154 | 155 | ```go 156 | type Decoder struct { 157 | } 158 | ``` 159 | 160 | Decoder implements the same API you will see in encoding/xml and encoding/json 161 | except that we do not currently support proper streaming decoding as it is not 162 | supported by goquery upstream. 163 | 164 | #### func NewDecoder 165 | 166 | ```go 167 | func NewDecoder(r io.Reader) *Decoder 168 | ``` 169 | NewDecoder returns a new decoder given an io.Reader 170 | 171 | #### func (*Decoder) Decode 172 | 173 | ```go 174 | func (d *Decoder) Decode(dest interface{}) error 175 | ``` 176 | Decode will unmarshal the contents of the decoder when given an instance of an 177 | annotated type as its argument. It will return any errors encountered during 178 | either parsing the document or unmarshaling into the given object. 179 | 180 | #### type Unmarshaler 181 | 182 | ```go 183 | type Unmarshaler interface { 184 | UnmarshalHTML([]*html.Node) error 185 | } 186 | ``` 187 | 188 | Unmarshaler allows for custom implementations of unmarshaling logic 189 | 190 | ## TODO 191 | 192 | - Callable goquery methods with args, via reflection 193 | -------------------------------------------------------------------------------- /decoder.go: -------------------------------------------------------------------------------- 1 | package goq 2 | 3 | import ( 4 | "io" 5 | "sync" 6 | 7 | "github.com/PuerkitoBio/goquery" 8 | ) 9 | 10 | // Decoder implements the same API you will see in encoding/xml and 11 | // encoding/json except that we do not currently support proper streaming 12 | // decoding as it is not supported by goquery upstream. 13 | type Decoder struct { 14 | err error 15 | doc *goquery.Document 16 | cache sync.Map 17 | } 18 | 19 | // NewDecoder returns a new decoder given an io.Reader 20 | func NewDecoder(r io.Reader) *Decoder { 21 | d := &Decoder{} 22 | d.doc, d.err = goquery.NewDocumentFromReader(r) 23 | return d 24 | } 25 | 26 | // Decode will unmarshal the contents of the decoder when given an instance of 27 | // an annotated type as its argument. It will return any errors encountered 28 | // during either parsing the document or unmarshaling into the given object. 29 | func (d *Decoder) Decode(dest interface{}) error { 30 | if d.err != nil { 31 | return d.err 32 | } 33 | if d.doc == nil { 34 | return &CannotUnmarshalError{ 35 | Reason: "resulting document was nil", 36 | } 37 | } 38 | 39 | return UnmarshalSelection(d.doc.Selection, dest) 40 | } 41 | -------------------------------------------------------------------------------- /decoder_test.go: -------------------------------------------------------------------------------- 1 | package goq 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestDecoder(t *testing.T) { 11 | asrt := assert.New(t) 12 | 13 | var p page 14 | 15 | asrt.NoError(NewDecoder(strings.NewReader(hnPage)).Decode(&p)) 16 | asrt.Len(p.Items, 30) 17 | } 18 | -------------------------------------------------------------------------------- /doc.go: -------------------------------------------------------------------------------- 1 | // Package goq was built to allow users to declaratively unmarshal HTML into go 2 | // structs using struct tags composed of css selectors. 3 | // 4 | // I've made a best effort to behave very similarly to JSON and XML decoding as 5 | // well as exposing as much information as possible in the event of an error to 6 | // help you debug your Unmarshaling issues. 7 | // 8 | // When creating struct types to be unmarshaled into, the following general 9 | // rules apply: 10 | // 11 | // - Any type that implements the Unmarshaler interface will be passed a slice 12 | // of *html.Node so that manual unmarshaling may be done. This takes the 13 | // highest precedence. 14 | // 15 | // - Any struct fields may be annotated with goquery metadata, which takes the 16 | // form of an element selector followed by arbitrary comma-separated "value 17 | // selectors." 18 | // 19 | // - A value selector may be one of `html`, `text`, or `[someAttrName]`. `html` 20 | // and `text` will result in the methods of the same name being called on the 21 | // `*goquery.Selection` to obtain the value. `[someAttrName]` will result in 22 | // `*goquery.Selection.Attr("someAttrName")` being called for the value. 23 | // 24 | // - A primitive value type will default to the text value of the resulting 25 | // nodes if no value selector is given. 26 | // 27 | // - At least one value selector is required for maps, to determine the map key. 28 | // The key type must follow both the rules applicable to go map indexing, as 29 | // well as these unmarshaling rules. The value of each key will be unmarshaled 30 | // in the same way the element value is unmarshaled. 31 | // 32 | // - For maps, keys will be retreived from the *same level* of the DOM. The key 33 | // selector may be arbitrarily nested, though. The first level of children 34 | // with any number of matching elements will be used, though. 35 | // 36 | // - For maps, any values *must* be nested *below* the level of the key 37 | // selector. Parents or siblings of the element matched by the key selector will 38 | // not be considered. 39 | // 40 | // - Once used, a "value selector" will be shifted off of the comma-separated 41 | // list. This allows you to nest arbitrary levels of value selectors. For 42 | // example, the type `[]map[string][]string` would require one selector for the 43 | // map key, and take an optional second selector for the values of the string 44 | // slice. 45 | // 46 | // - Any struct type encountered in nested types (e.g. map[string]SomeStruct) 47 | // will override any remaining "value selectors" that had not been used. For 48 | // example, given: 49 | // struct S { 50 | // F string `goquery:",[bang]"` 51 | // } 52 | // 53 | // struct { 54 | // T map[string]S `goquery:"#someId,[foo],[bar],[baz]"` 55 | // } 56 | // `[foo]` will be used to determine the string map key,but `[bar]` and `[baz]` 57 | // will be ignored, with the `[bang]` tag present S struct type taking 58 | // precedence. 59 | package goq 60 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module astuart.co/goq 2 | 3 | go 1.12 4 | 5 | require ( 6 | github.com/PuerkitoBio/goquery v1.7.1 7 | github.com/stretchr/testify v1.3.0 8 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 // indirect 9 | golang.org/x/net v0.0.0-20210825183410-e898025ed96a 10 | ) 11 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk= 2 | github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg= 3 | github.com/PuerkitoBio/goquery v1.7.1 h1:oE+T06D+1T7LNrn91B4aERsRIeCLJ/oPSa6xB9FPnz4= 4 | github.com/PuerkitoBio/goquery v1.7.1/go.mod h1:XY0pP4kfraEmmV1O7Uf6XyjoslwsneBbgeDjLYuN8xY= 5 | github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o= 6 | github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= 7 | github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5h18aE= 8 | github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY= 9 | github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= 10 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 11 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 12 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 13 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 14 | github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= 15 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 16 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 17 | golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 18 | golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 19 | golang.org/x/net v0.0.0-20190606173856-1492cefac77f h1:IWHgpgFqnL5AhBUBZSgBdjl2vkQUEzcY+JNKWfcgAU0= 20 | golang.org/x/net v0.0.0-20190606173856-1492cefac77f/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= 21 | golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= 22 | golang.org/x/net v0.0.0-20210825183410-e898025ed96a h1:bRuuGXV8wwSdGTB+CtJf+FjgO1APK1CoO39T4BN/XBw= 23 | golang.org/x/net v0.0.0-20210825183410-e898025ed96a/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= 24 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 25 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 26 | golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 27 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= 28 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 29 | golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 30 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 31 | -------------------------------------------------------------------------------- /unmarshal-error.go: -------------------------------------------------------------------------------- 1 | package goq 2 | 3 | import ( 4 | "fmt" 5 | "reflect" 6 | ) 7 | 8 | // All "Reason" fields within CannotUnmarshalError will be constants and part of 9 | // this list 10 | const ( 11 | nonPointer = "non-pointer value" 12 | nilValue = "destination argument is nil" 13 | documentReadError = "error reading goquery document" 14 | arrayLengthMismatch = "array length does not match document elements found" 15 | customUnmarshalError = "a custom Unmarshaler implementation threw an error" 16 | typeConversionError = "a type conversion error occurred" 17 | mapKeyUnmarshalError = "error unmarshaling a map key" 18 | missingValueSelector = "at least one value selector must be passed to use as map index" 19 | ) 20 | 21 | // CannotUnmarshalError represents an error returned by the goquery Unmarshaler 22 | // and helps consumers in programmatically diagnosing the cause of their error. 23 | type CannotUnmarshalError struct { 24 | Err error 25 | Val string 26 | FldOrIdx interface{} 27 | 28 | V reflect.Value 29 | Reason string 30 | } 31 | 32 | // This type is a mid-level abstraction to help understand the error printing logic 33 | type errChain struct { 34 | chain []*CannotUnmarshalError 35 | val string 36 | tail error 37 | } 38 | 39 | // tPath returns the type path in the same string format one might use to access 40 | // the nested value in go code. This should hopefully help make debugging easier. 41 | func (e errChain) tPath() string { 42 | nest := "" 43 | 44 | for _, err := range e.chain { 45 | if err.FldOrIdx != nil { 46 | switch nesting := err.FldOrIdx.(type) { 47 | case string: 48 | switch err.V.Type().Kind() { 49 | case reflect.Map: 50 | nest += fmt.Sprintf("[%q]", nesting) 51 | case reflect.Struct: 52 | nest += fmt.Sprintf(".%s", nesting) 53 | } 54 | case int: 55 | nest += fmt.Sprintf("[%d]", nesting) 56 | case *int: 57 | nest += fmt.Sprintf("[%d]", *nesting) 58 | default: 59 | fmt.Printf("err.FldOrIdx = %#v\n", err.FldOrIdx) 60 | nest += fmt.Sprintf("[%v]", nesting) 61 | } 62 | } 63 | } 64 | 65 | return nest 66 | } 67 | 68 | func (e errChain) last() *CannotUnmarshalError { 69 | return e.chain[len(e.chain)-1] 70 | } 71 | 72 | // Error gives a human-readable error message for debugging purposes. 73 | func (e errChain) Error() string { 74 | last := e.last() 75 | 76 | // Avoid panic if we cannot get a type name for the Value 77 | t := "unknown: invalid value" 78 | if last.V.IsValid() { 79 | t = last.V.Type().String() 80 | } 81 | 82 | msg := "could not unmarshal " 83 | 84 | if e.val != "" { 85 | msg += fmt.Sprintf("value %q ", e.val) 86 | } 87 | 88 | msg += fmt.Sprintf( 89 | "into '%s%s' (type %s): %s", 90 | e.chain[0].V.Type(), 91 | e.tPath(), 92 | t, 93 | last.Reason, 94 | ) 95 | 96 | // If a generic error was reported elsewhere, report its message last 97 | if e.tail != nil { 98 | msg = msg + ": " + e.tail.Error() 99 | } 100 | 101 | return msg 102 | } 103 | 104 | // Traverse e.Err, printing hopefully helpful type info until there are no more 105 | // chained errors. 106 | func (e *CannotUnmarshalError) unwind() *errChain { 107 | str := &errChain{chain: []*CannotUnmarshalError{}} 108 | for { 109 | str.chain = append(str.chain, e) 110 | 111 | if e.Val != "" { 112 | str.val = e.Val 113 | } 114 | 115 | // Terminal error was of type *CannotUnmarshalError and had no children 116 | if e.Err == nil { 117 | return str 118 | } 119 | 120 | if e2, ok := e.Err.(*CannotUnmarshalError); ok { 121 | e = e2 122 | continue 123 | } 124 | 125 | // Child error was not a *CannotUnmarshalError; print its message 126 | str.tail = e.Err 127 | return str 128 | } 129 | } 130 | 131 | func (e *CannotUnmarshalError) Error() string { 132 | return e.unwind().Error() 133 | } 134 | -------------------------------------------------------------------------------- /unmarshal.go: -------------------------------------------------------------------------------- 1 | package goq 2 | 3 | import ( 4 | "bytes" 5 | "reflect" 6 | "strconv" 7 | "strings" 8 | "sync" 9 | 10 | "github.com/PuerkitoBio/goquery" 11 | 12 | "golang.org/x/net/html" 13 | ) 14 | 15 | // Unmarshaler allows for custom implementations of unmarshaling logic 16 | type Unmarshaler interface { 17 | UnmarshalHTML([]*html.Node) error 18 | } 19 | 20 | // NodeSelector is a quick utility function to get a goquery.Selection from a 21 | // slice of *html.Node. Useful for performing unmarshaling, since the decision 22 | // was made to use []*html.Node for maximum flexibility. 23 | func NodeSelector(nodes []*html.Node) *goquery.Selection { 24 | sel := &goquery.Selection{} 25 | return sel.AddNodes(nodes...) 26 | } 27 | 28 | type valFunc func(*goquery.Selection) string 29 | 30 | type goqueryTag string 31 | 32 | const ( 33 | prePfx = '!' 34 | tagName = "goquery" 35 | ignoreTag = "!ignore" 36 | ) 37 | 38 | func (tag goqueryTag) preprocess(s *goquery.Selection) *goquery.Selection { 39 | arr := strings.Split(string(tag), ",") 40 | var offset int 41 | for len(arr)-1 > offset && arr[offset][0] == prePfx { 42 | meth := arr[offset][1:] 43 | v := reflect.ValueOf(s).MethodByName(meth) 44 | if !v.IsValid() { 45 | return s 46 | } 47 | 48 | result := v.Call(nil) 49 | 50 | if sel, ok := result[0].Interface().(*goquery.Selection); ok { 51 | s = sel 52 | } 53 | offset++ 54 | } 55 | return s 56 | } 57 | 58 | func (tag goqueryTag) selector(which int) string { 59 | arr := strings.Split(string(tag), ",") 60 | if which > len(arr)-1 { 61 | return "" 62 | } 63 | var offset int 64 | for len(arr) > offset && arr[offset][0] == prePfx { 65 | offset++ 66 | } 67 | return arr[which+offset] 68 | } 69 | 70 | var ( 71 | textVal valFunc = func(s *goquery.Selection) string { 72 | return strings.TrimSpace(s.Text()) 73 | } 74 | htmlVal = func(s *goquery.Selection) string { 75 | str, _ := s.Html() 76 | return strings.TrimSpace(str) 77 | } 78 | 79 | vfCache sync.Map 80 | ) 81 | 82 | func attrFunc(attr string) valFunc { 83 | return func(s *goquery.Selection) string { 84 | str, _ := s.Attr(attr) 85 | return str 86 | } 87 | } 88 | 89 | func (tag goqueryTag) valFunc() valFunc { 90 | 91 | if fn, ok := vfCache.Load(tag); ok { 92 | return fn.(valFunc) 93 | } 94 | 95 | srcArr := strings.Split(string(tag), ",") 96 | if len(srcArr) < 2 { 97 | vfCache.Store(tag, textVal) 98 | return textVal 99 | } 100 | 101 | src := srcArr[1] 102 | 103 | var f valFunc 104 | switch { 105 | case src[0] == '[': 106 | // [someattr] will return value of .Attr("someattr") 107 | attr := src[1 : len(src)-1] 108 | f = attrFunc(attr) 109 | case src == "html": 110 | f = htmlVal 111 | case src == "text": 112 | f = textVal 113 | default: 114 | f = textVal 115 | } 116 | 117 | vfCache.Store(tag, f) 118 | return f 119 | } 120 | 121 | // popVal should allow us to handle arbitrarily nested maps as well as the 122 | // cleanly handling the possiblity of map[literal]literal by just delegating 123 | // back to `unmarshalByType`. 124 | func (tag goqueryTag) popVal() goqueryTag { 125 | arr := strings.Split(string(tag), ",") 126 | if len(arr) < 2 { 127 | return tag 128 | } 129 | newA := []string{arr[0]} 130 | newA = append(newA, arr[2:]...) 131 | 132 | return goqueryTag(strings.Join(newA, ",")) 133 | } 134 | 135 | // Unmarshal takes a byte slice and a destination pointer to any 136 | // interface{}, and unmarshals the document into the destination based on the 137 | // rules above. Any error returned here will likely be of type 138 | // CannotUnmarshalError, though an initial goquery error will pass through 139 | // directly. 140 | func Unmarshal(bs []byte, v interface{}) error { 141 | d, err := goquery.NewDocumentFromReader(bytes.NewReader(bs)) 142 | 143 | if err != nil { 144 | return err 145 | } 146 | 147 | return UnmarshalSelection(d.Selection, v) 148 | } 149 | 150 | func wrapUnmErr(err error, v reflect.Value) error { 151 | if err == nil { 152 | return nil 153 | } 154 | 155 | return &CannotUnmarshalError{ 156 | V: v, 157 | Reason: customUnmarshalError, 158 | Err: err, 159 | } 160 | } 161 | 162 | // UnmarshalSelection will unmarshal a goquery.goquery.Selection into an interface 163 | // appropriately annoated with goquery tags. 164 | func UnmarshalSelection(s *goquery.Selection, iface interface{}) error { 165 | v := reflect.ValueOf(iface) 166 | 167 | // Must come before v.IsNil() else IsNil panics on NonPointer value 168 | if v.Kind() != reflect.Ptr { 169 | return &CannotUnmarshalError{V: v, Reason: nonPointer} 170 | } 171 | 172 | if iface == nil || v.IsNil() { 173 | return &CannotUnmarshalError{V: v, Reason: nilValue} 174 | } 175 | 176 | u, v := indirect(v) 177 | 178 | if u != nil { 179 | return wrapUnmErr(u.UnmarshalHTML(s.Nodes), v) 180 | } 181 | 182 | return unmarshalByType(s, v, "") 183 | } 184 | 185 | func unmarshalByType(s *goquery.Selection, v reflect.Value, tag goqueryTag) error { 186 | u, v := indirect(v) 187 | 188 | if u != nil { 189 | return wrapUnmErr(u.UnmarshalHTML(s.Nodes), v) 190 | } 191 | 192 | // Handle special cases where we can just set the value directly 193 | switch val := v.Interface().(type) { 194 | case []*html.Node: 195 | val = append(val, s.Nodes...) 196 | v.Set(reflect.ValueOf(val)) 197 | return nil 198 | } 199 | 200 | t := v.Type() 201 | 202 | switch t.Kind() { 203 | case reflect.Struct: 204 | return unmarshalStruct(s, v) 205 | case reflect.Slice: 206 | return unmarshalSlice(s, v, tag) 207 | case reflect.Array: 208 | return unmarshalArray(s, v, tag) 209 | case reflect.Map: 210 | return unmarshalMap(s, v, tag) 211 | default: 212 | vf := tag.valFunc() 213 | str := vf(s) 214 | err := unmarshalLiteral(str, v) 215 | if err != nil { 216 | return &CannotUnmarshalError{ 217 | V: v, 218 | Reason: typeConversionError, 219 | Err: err, 220 | Val: str, 221 | } 222 | } 223 | return nil 224 | } 225 | } 226 | 227 | func unmarshalLiteral(s string, v reflect.Value) error { 228 | t := v.Type() 229 | 230 | switch t.Kind() { 231 | case reflect.Interface: 232 | if t.NumMethod() == 0 { 233 | // For empty interfaces, just set to a string 234 | nv := reflect.New(reflect.TypeOf(s)).Elem() 235 | nv.Set(reflect.ValueOf(s)) 236 | v.Set(nv) 237 | } 238 | case reflect.Bool: 239 | i, err := strconv.ParseBool(s) 240 | if err != nil { 241 | return err 242 | } 243 | v.SetBool(i) 244 | case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: 245 | i, err := strconv.ParseInt(s, 10, 64) 246 | if err != nil { 247 | return err 248 | } 249 | v.SetInt(i) 250 | case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: 251 | i, err := strconv.ParseUint(s, 10, 64) 252 | if err != nil { 253 | return err 254 | } 255 | v.SetUint(i) 256 | case reflect.Float32, reflect.Float64: 257 | i, err := strconv.ParseFloat(s, 64) 258 | if err != nil { 259 | return err 260 | } 261 | v.SetFloat(i) 262 | case reflect.String: 263 | v.SetString(s) 264 | } 265 | return nil 266 | } 267 | 268 | func unmarshalStruct(s *goquery.Selection, v reflect.Value) error { 269 | t := v.Type() 270 | 271 | for i := 0; i < t.NumField(); i++ { 272 | tag := goqueryTag(t.Field(i).Tag.Get(tagName)) 273 | 274 | if tag == ignoreTag { 275 | continue 276 | } 277 | 278 | // If tag is empty and the object doesn't implement Unmarshaler, skip 279 | if tag == "" { 280 | if u, _ := indirect(v.Field(i)); u == nil { 281 | continue 282 | } 283 | } 284 | 285 | sel := tag.preprocess(s) 286 | if tag != "" { 287 | selStr := tag.selector(0) 288 | sel = sel.Find(selStr) 289 | } 290 | 291 | err := unmarshalByType(sel, v.Field(i), tag) 292 | if err != nil { 293 | return &CannotUnmarshalError{ 294 | Reason: typeConversionError, 295 | Err: err, 296 | V: v, 297 | FldOrIdx: t.Field(i).Name, 298 | } 299 | } 300 | } 301 | return nil 302 | } 303 | 304 | func unmarshalArray(s *goquery.Selection, v reflect.Value, tag goqueryTag) error { 305 | if v.Type().Len() != len(s.Nodes) { 306 | return &CannotUnmarshalError{ 307 | Reason: arrayLengthMismatch, 308 | V: v, 309 | } 310 | } 311 | 312 | for i := 0; i < v.Type().Len(); i++ { 313 | err := unmarshalByType(s.Eq(i), v.Index(i), tag) 314 | if err != nil { 315 | return &CannotUnmarshalError{ 316 | Reason: typeConversionError, 317 | Err: err, 318 | V: v, 319 | FldOrIdx: i, 320 | } 321 | } 322 | } 323 | 324 | return nil 325 | } 326 | 327 | func unmarshalSlice(s *goquery.Selection, v reflect.Value, tag goqueryTag) error { 328 | slice := v 329 | eleT := v.Type().Elem() 330 | 331 | for i := 0; i < s.Length(); i++ { 332 | newV := reflect.New(TypeDeref(eleT)) 333 | 334 | err := unmarshalByType(s.Eq(i), newV, tag) 335 | 336 | if err != nil { 337 | return &CannotUnmarshalError{ 338 | Reason: typeConversionError, 339 | Err: err, 340 | V: v, 341 | FldOrIdx: i, 342 | } 343 | } 344 | 345 | if eleT.Kind() != reflect.Ptr { 346 | newV = newV.Elem() 347 | } 348 | 349 | v = reflect.Append(v, newV) 350 | } 351 | 352 | slice.Set(v) 353 | return nil 354 | } 355 | 356 | func childrenUntilMatch(s *goquery.Selection, sel string) *goquery.Selection { 357 | orig := s 358 | s = s.Children() 359 | for s.Length() != 0 && s.Filter(sel).Length() == 0 { 360 | s = s.Children() 361 | } 362 | if s.Length() == 0 { 363 | return orig 364 | } 365 | return s.Filter(sel) 366 | } 367 | 368 | func unmarshalMap(s *goquery.Selection, v reflect.Value, tag goqueryTag) error { 369 | // Make new map here because indirect for some Reason doesn't help us out 370 | if v.IsNil() { 371 | v.Set(reflect.MakeMap(v.Type())) 372 | } 373 | 374 | keyT, eleT := v.Type().Key(), v.Type().Elem() 375 | 376 | if tag.selector(1) == "" { 377 | // We need minimum one value selector to determine the map key 378 | return &CannotUnmarshalError{ 379 | Reason: missingValueSelector, 380 | V: v, 381 | } 382 | } 383 | 384 | valTag := tag 385 | 386 | // Find children at the same level that match the given selector 387 | s = childrenUntilMatch(s, tag.selector(1)) 388 | // Then augment the selector we will pass down to the next unmarshal step 389 | valTag = valTag.popVal() 390 | 391 | var err error 392 | s.EachWithBreak(func(_ int, subS *goquery.Selection) bool { 393 | newK, newV := reflect.New(TypeDeref(keyT)), reflect.New(TypeDeref(eleT)) 394 | 395 | err = unmarshalByType(subS, newK, tag) 396 | if err != nil { 397 | err = &CannotUnmarshalError{ 398 | Reason: mapKeyUnmarshalError, 399 | V: v, 400 | Err: err, 401 | FldOrIdx: newK.Interface(), 402 | Val: valTag.valFunc()(subS), 403 | } 404 | return false 405 | } 406 | 407 | err = unmarshalByType(subS, newV, valTag) 408 | if err != nil { 409 | return false 410 | } 411 | 412 | if eleT.Kind() != reflect.Ptr { 413 | newV = newV.Elem() 414 | } 415 | if keyT.Kind() != reflect.Ptr { 416 | newK = newK.Elem() 417 | } 418 | 419 | v.SetMapIndex(newK, newV) 420 | 421 | return true 422 | }) 423 | 424 | if err != nil { 425 | return &CannotUnmarshalError{ 426 | Reason: typeConversionError, 427 | Err: err, 428 | V: v, 429 | } 430 | } 431 | 432 | return nil 433 | } 434 | -------------------------------------------------------------------------------- /unmarshal_test.go: -------------------------------------------------------------------------------- 1 | package goq 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | "strings" 7 | "testing" 8 | 9 | "golang.org/x/net/html" 10 | 11 | "github.com/PuerkitoBio/goquery" 12 | "github.com/stretchr/testify/assert" 13 | ) 14 | 15 | const testPage = ` 16 | 17 | 18 | 19 | 20 | 21 | 22 |

23 | 40 |

FOO!!!

41 | 42 | 47 | 59 |
60 | 1 61 | true 62 | false 63 | 1.2345 64 | -123 65 | 100 66 |
67 | 68 | 69 | ` 70 | 71 | type Page struct { 72 | Resources []Resource `goquery:"#resources .resource"` 73 | FooBar FooBar 74 | } 75 | 76 | type Resource struct { 77 | Name string `goquery:".name"` 78 | } 79 | 80 | type Attr struct { 81 | Key, Value string 82 | } 83 | 84 | type FooBar struct { 85 | Attrs []Attr 86 | Val int 87 | unmarshalWasCalled bool 88 | } 89 | 90 | type AttrSelectorTest struct { 91 | Header H2 `goquery:"#anchor-header"` 92 | } 93 | 94 | type H2 struct { 95 | Location string `goquery:"a,[href]"` 96 | } 97 | 98 | type sliceAttrSelector struct { 99 | // For arrays/slices, type []primitive can use a source attribute 100 | Things []bool `goquery:".foobar [arr=\"true\"],[arr]"` 101 | } 102 | 103 | func (f *FooBar) UnmarshalHTML(nodes []*html.Node) error { 104 | f.unmarshalWasCalled = true 105 | 106 | s := NodeSelector(nodes) 107 | 108 | f.Attrs = []Attr{} 109 | for _, node := range s.Find(".foobar thing").Nodes { 110 | for _, attr := range node.Attr { 111 | f.Attrs = append(f.Attrs, Attr{Key: attr.Key, Value: attr.Val}) 112 | } 113 | } 114 | thing := s.Find("thing") 115 | 116 | thingText := thing.Text() 117 | 118 | i, err := strconv.Atoi(thingText) 119 | f.Val = i 120 | return err 121 | } 122 | 123 | type ErrorFooBar struct{} 124 | 125 | var errTestUnmarshal = fmt.Errorf("A wild error appeared") 126 | 127 | func (e *ErrorFooBar) UnmarshalHTML([]*html.Node) error { 128 | return errTestUnmarshal 129 | } 130 | 131 | var vals = []string{"Foo", "Bar", "Baz", "Bang", "Zip"} 132 | 133 | func TestUnmarshal(t *testing.T) { 134 | asrt := assert.New(t) 135 | 136 | asrt.Implements((*Unmarshaler)(nil), new(FooBar)) 137 | 138 | var p Page 139 | 140 | asrt.NoError(Unmarshal([]byte(testPage), &p)) 141 | asrt.Len(p.Resources, 5) 142 | 143 | for i, val := range vals { 144 | asrt.Equal(val, p.Resources[i].Name) 145 | } 146 | 147 | asrt.True(p.FooBar.unmarshalWasCalled, "Unmarshal should have been called.") 148 | asrt.Equal(1, p.FooBar.Val) 149 | asrt.Len(p.FooBar.Attrs, 1) 150 | asrt.Equal("foo", p.FooBar.Attrs[0].Key) 151 | asrt.Equal("yes", p.FooBar.Attrs[0].Value) 152 | } 153 | 154 | func TestArrayUnmarshal(t *testing.T) { 155 | asrt := assert.New(t) 156 | 157 | var a struct { 158 | Resources [5]Resource `goquery:"#resources .resource"` 159 | } 160 | 161 | asrt.NoError(Unmarshal([]byte(testPage), &a)) 162 | for i, val := range vals { 163 | asrt.Equal(val, a.Resources[i].Name) 164 | } 165 | } 166 | 167 | func TestBoolean(t *testing.T) { 168 | asrt := assert.New(t) 169 | 170 | var a struct { 171 | BoolTest struct { 172 | Foo bool `goquery:"foo"` 173 | Bar bool `goquery:"bar"` 174 | } `goquery:".foobar"` 175 | } 176 | 177 | asrt.NoError(Unmarshal([]byte(testPage), &a)) 178 | 179 | asrt.Equal(true, a.BoolTest.Foo) 180 | asrt.Equal(false, a.BoolTest.Bar) 181 | } 182 | 183 | func BenchmarkBoolean(b *testing.B) { 184 | var a struct { 185 | BoolTest struct { 186 | Foo bool `goquery:"foo"` 187 | Bar bool `goquery:"bar"` 188 | } `goquery:".foobar"` 189 | } 190 | d, _ := goquery.NewDocumentFromReader(strings.NewReader(testPage)) 191 | for i := 0; i < b.N; i++ { 192 | UnmarshalSelection(d.Selection, &a) 193 | } 194 | } 195 | 196 | func TestNumbers(t *testing.T) { 197 | asrt := assert.New(t) 198 | 199 | var a struct { 200 | BoolTest struct { 201 | Int int `goquery:"int"` 202 | Float float32 `goquery:"float"` 203 | Uint uint16 `goquery:"uint"` 204 | } `goquery:".foobar"` 205 | } 206 | 207 | asrt.NoError(Unmarshal([]byte(testPage), &a)) 208 | 209 | asrt.Equal(float32(1.2345), a.BoolTest.Float) 210 | asrt.Equal(-123, a.BoolTest.Int) 211 | asrt.Equal(uint16(100), a.BoolTest.Uint) 212 | } 213 | 214 | func checkErr(asrt *assert.Assertions, err error) *CannotUnmarshalError { 215 | asrt.Error(err) 216 | asrt.IsType((*CannotUnmarshalError)(nil), err) 217 | return err.(*CannotUnmarshalError) 218 | } 219 | 220 | func TestUnmarshalError(t *testing.T) { 221 | asrt := assert.New(t) 222 | 223 | var a []ErrorFooBar 224 | 225 | err := Unmarshal([]byte(testPage), &a) 226 | 227 | asrt.Contains(err.Error(), "[]goq.ErrorFooBar[0]") 228 | 229 | e := checkErr(asrt, err) 230 | e2 := checkErr(asrt, e.Err) 231 | 232 | asrt.Equal(errTestUnmarshal, e2.Err) 233 | asrt.Equal(customUnmarshalError, e2.Reason) 234 | } 235 | 236 | func TestNilUnmarshal(t *testing.T) { 237 | asrt := assert.New(t) 238 | 239 | var a *Page 240 | 241 | err := Unmarshal([]byte{}, a) 242 | e := checkErr(asrt, err) 243 | asrt.Equal(nilValue, e.Reason) 244 | } 245 | 246 | func TestNonPointer(t *testing.T) { 247 | asrt := assert.New(t) 248 | 249 | var a Page 250 | e := checkErr(asrt, Unmarshal([]byte{}, a)) 251 | asrt.Equal(nonPointer, e.Reason) 252 | } 253 | 254 | func TestWrongArrayLength(t *testing.T) { 255 | asrt := assert.New(t) 256 | 257 | var a struct { 258 | Resources [1]Resource `goquery:".resource"` 259 | } 260 | 261 | err := Unmarshal([]byte(testPage), &a) 262 | 263 | e := checkErr(asrt, err) 264 | asrt.Equal(typeConversionError, e.Reason) 265 | e2 := checkErr(asrt, e.Err) 266 | asrt.Equal(arrayLengthMismatch, e2.Reason) 267 | 268 | asrt.Contains(e.Error(), "Resource") 269 | asrt.Contains(e.Error(), "array length") 270 | } 271 | 272 | func TestInvalidLiteral(t *testing.T) { 273 | asrt := assert.New(t) 274 | 275 | var a struct { 276 | Foo int `goquery:"foo"` 277 | } 278 | 279 | err := Unmarshal([]byte(testPage), &a) 280 | 281 | e := checkErr(asrt, err).unwind() 282 | 283 | asrt.Len(e.chain, 2) 284 | asrt.Error(e.tail) 285 | asrt.Contains(err.Error(), e.tail.Error()) 286 | asrt.Contains(err.Error(), "\"true\"") 287 | asrt.Equal("true", e.val) 288 | 289 | asrt.Equal(typeConversionError, e.chain[0].Reason) 290 | asrt.Equal(typeConversionError, e.chain[1].Reason) 291 | } 292 | 293 | func TestInvalidArrayEleType(t *testing.T) { 294 | asrt := assert.New(t) 295 | 296 | var a struct { 297 | Resources [5]int `goquery:".resource"` 298 | } 299 | 300 | err := Unmarshal([]byte(testPage), &a) 301 | e := checkErr(asrt, err).unwind() 302 | asrt.Len(e.chain, 3) 303 | } 304 | 305 | func TestAttributeSelector(t *testing.T) { 306 | asrt := assert.New(t) 307 | 308 | var a AttrSelectorTest 309 | 310 | asrt.NoError(Unmarshal([]byte(testPage), &a)) 311 | asrt.Equal("https://foo.com", a.Header.Location) 312 | } 313 | 314 | func TestSliceAttrSelector(t *testing.T) { 315 | asrt := assert.New(t) 316 | 317 | var a sliceAttrSelector 318 | 319 | asrt.NoError(Unmarshal([]byte(testPage), &a)) 320 | asrt.Len(a.Things, 2) 321 | asrt.True(a.Things[0]) 322 | asrt.True(a.Things[1]) 323 | } 324 | 325 | type MapTest struct { 326 | // For map[primitive]primitive we use syntax selector,keySource,valSource 327 | Names map[string]string `goquery:"#structured-list li,[name],[val]"` 328 | // For map[primitive]Object we use the same syntax as a []primitive 329 | Resources map[string]Resource `goquery:"#resources .resource,[order]"` 330 | 331 | Nested map[string]map[string]string `goquery:"#nested-map,[name],[name],text"` 332 | } 333 | 334 | func TestMapQuery(t *testing.T) { 335 | asrt := assert.New(t) 336 | 337 | a := MapTest{} 338 | 339 | asrt.NoError(Unmarshal([]byte(testPage), &a)) 340 | asrt.Len(a.Names, 3) 341 | asrt.Equal("flip", a.Names["foo"]) 342 | 343 | asrt.Len(a.Resources, 5) 344 | asrt.Len(a.Nested, 2) 345 | asrt.Len(a.Nested["first"], 3) 346 | asrt.Len(a.Nested["second"], 3) 347 | } 348 | 349 | func TestMapNonStringKey(t *testing.T) { 350 | asrt := assert.New(t) 351 | 352 | var a struct { 353 | Map map[int]Resource `goquery:".resource,[order]"` 354 | } 355 | 356 | asrt.NoError(Unmarshal([]byte(testPage), &a)) 357 | asrt.Len(a.Map, 5) 358 | asrt.Equal(a.Map[1].Name, "Bar") 359 | } 360 | 361 | func TestErroringKey(t *testing.T) { 362 | asrt := assert.New(t) 363 | 364 | var a struct { 365 | Map map[ErrorFooBar]Resource `goquery:".resource,[order]"` 366 | } 367 | err := checkErr(asrt, Unmarshal([]byte(testPage), &a)) 368 | asrt.Equal(errTestUnmarshal, err.unwind().tail) 369 | } 370 | 371 | func TestDirectInsertion(t *testing.T) { 372 | asrt := assert.New(t) 373 | 374 | var a struct { 375 | Nodes []*html.Node `goquery:"ul#resources .resource"` 376 | } 377 | 378 | asrt.NoError(Unmarshal([]byte(testPage), &a)) 379 | asrt.Len(a.Nodes, 5) 380 | } 381 | 382 | func TestInnerHtml(t *testing.T) { 383 | asrt := assert.New(t) 384 | 385 | var a struct { 386 | HTML []string `goquery:"ul#resources .resource,html"` 387 | } 388 | 389 | asrt.NoError(Unmarshal([]byte(testPage), &a)) 390 | asrt.Len(a.HTML, 5) 391 | asrt.Equal(a.HTML[0], `
Foo
`) 392 | } 393 | 394 | func TestMapShortTag(t *testing.T) { 395 | asrt := assert.New(t) 396 | 397 | var a struct { 398 | Names map[string]string `goquery:"#structured-list li,[name]"` 399 | } 400 | 401 | asrt.NoError(Unmarshal([]byte(testPage), &a)) 402 | asrt.Len(a.Names, 3) 403 | // Test that we just use inner text when missing a value selector 404 | asrt.Equal("foo", a.Names["foo"]) 405 | asrt.Equal("bar", a.Names["bar"]) 406 | } 407 | 408 | func TestNoKeySelector(t *testing.T) { 409 | asrt := assert.New(t) 410 | 411 | var a struct { 412 | Names map[string]string `goquery:"#structured-list li"` 413 | } 414 | 415 | err := checkErr(asrt, Unmarshal([]byte(testPage), &a)) 416 | asrt.Equal(missingValueSelector, err.unwind().last().Reason) 417 | } 418 | 419 | func TestMapInnerError(t *testing.T) { 420 | asrt := assert.New(t) 421 | 422 | var a struct { 423 | Names map[string]ErrorFooBar `goquery:"#structured-list li,[name]"` 424 | } 425 | err := checkErr(asrt, Unmarshal([]byte(testPage), &a)) 426 | asrt.Equal(errTestUnmarshal, err.unwind().tail) 427 | } 428 | 429 | func TestInterfaceDecode(t *testing.T) { 430 | asrt := assert.New(t) 431 | var a struct { 432 | IF interface{} `goquery:"#structured-list li"` 433 | } 434 | asrt.NoError(Unmarshal([]byte(testPage), &a)) 435 | asrt.Equal("foobarbaz", a.IF.(string)) 436 | } 437 | 438 | const hnPage = ` 439 | 440 | 441 | Hacker News
442 | 448 | 594 |
443 | 447 |
Hacker News 444 | new | comments | show | ask | jobs | submit 445 | login 446 |
449 | 450 | 452 | 453 | 454 | 456 | 457 | 458 | 460 | 461 | 462 | 464 | 465 | 466 | 468 | 469 | 470 | 472 | 473 | 474 | 476 | 477 | 478 | 480 | 481 | 482 | 484 | 485 | 486 | 488 | 489 | 490 | 492 | 493 | 494 | 496 | 497 | 498 | 500 | 501 | 502 | 504 | 505 | 506 | 508 | 509 | 510 | 512 | 513 | 514 | 516 | 517 | 518 | 520 | 521 | 522 | 524 | 525 | 526 | 528 | 529 | 530 | 532 | 533 | 534 | 536 | 537 | 538 | 540 | 541 | 542 | 544 | 545 | 546 | 548 | 549 | 550 | 552 | 553 | 554 | 556 | 557 | 558 | 560 | 561 | 562 | 564 | 565 | 566 | 569 | 572 | 576 | 577 | 578 | 579 | 589 | 590 | 591 | 592 |
1. Introducing Lottie: Airbnb's tool for adding animations to native apps (medium.com)
451 | 295 points by dikaiosune 4 hours ago | hide | 59 comments
2. Ask HN: Who is hiring? (February 2017)
455 | 438 points by whoishiring 7 hours ago | hide | 698 comments
3. Apple Said to Work on Mac Chip That Would Lessen Intel Role (bloomberg.com)
459 | 107 points by VeXocide 2 hours ago | hide | 106 comments
4. The JVM is not that heavy (opensourcery.co.za)
463 | 235 points by khy 5 hours ago | hide | 151 comments
5. This Ferrari 250 GT PF Coupe Was Hidden in a Hollywood Apartment for Decades (petrolicious.com)
467 | 75 points by 6stringmerc 2 hours ago | hide | 26 comments
6. The .NET Language Strategy (microsoft.com)
471 | 140 points by benaadams 3 hours ago | hide | 108 comments
7. Data Loss at GitLab (2ndquadrant.com)
475 | 220 points by umairshahid 6 hours ago | hide | 68 comments
8. Show HN: Arrived – Stack Overflow for US Immigration (play.google.com)
479 | 159 points by wjmclaugh 6 hours ago | hide | 62 comments
9. Unlearning descriptive statistics (debrouwere.org)
483 | 219 points by stdbrouw 8 hours ago | hide | 67 comments
10. Tech firms are departing from their see-no-evil stance on society and politics (economist.com)
487 | 28 points by martincmartin 2 hours ago | hide | 2 comments
11. The Tactics That Certain GBA Cartridges Use to Defeat Emulation Software (mgba.io)
491 | 61 points by kibwen 3 hours ago | hide | 25 comments
12. Stop Filing Bugs, File a Container (runkit.com)
495 | 94 points by tolmasky 4 hours ago | hide | 23 comments
13. Collaborative Editing for Vim (github.com)
499 | 22 points by JepZ 2 hours ago | hide | 4 comments
14. Four Column ASCII (garbagecollected.org)
503 | 432 points by nishs 14 hours ago | hide | 54 comments
15. Causal inference in python (github.com)
507 | 10 points by aleyan 1 hour ago | hide | discuss
16. Bright (W15 – developing countries solar) is hiring its first front-end eng (SF) (greenhouse.io)
511 | 1 hour ago | hide
17. Scott Kelly's DNA shows unexpected telomere lengthening after year in space (nature.com)
515 | 132 points by ramzyo 8 hours ago | hide | 49 comments
18. Million requests per second with Python (medium.com)
519 | 371 points by d_theorist 13 hours ago | hide | 134 comments
19. Facebook Ordered to Pay $500M in Oculus Lawsuit (variety.com)
523 | 135 points by _pius 3 hours ago | hide | 75 comments
20. “Becoming Warren Buffett,” the Man, Not the Investor (newyorker.com)
527 | 153 points by artsandsci 9 hours ago | hide | 74 comments
21. Cambridge Analytica: The Data That Turned the World Upside Down (vice.com)
531 | 99 points by lakeeffect 6 hours ago | hide | 45 comments
22. Causality in machine learning (unofficialgoogledatascience.com)
535 | 61 points by maverick_iceman 7 hours ago | hide | 17 comments
23. Dolphin Progress Report: January 2017 (dolphin-emu.org)
539 | 53 points by dEnigma 3 hours ago | hide | 7 comments
24. Tesla Motors, Inc. Is Now Officially Tesla, Inc (techcrunch.com)
543 | 168 points by pearlsteinj 8 hours ago | hide | 72 comments
25. Living a Meaningful Life (bbc.com)
547 | 110 points by bootload 12 hours ago | hide | 24 comments
26. 6.S191: Introduction to Deep Learning (introtodeeplearning.com)
551 | 150 points by seycombi 11 hours ago | hide | 12 comments
27. An Email Thread Between a Developer and Gigster (andychase.me)
555 | 667 points by mfts0 9 hours ago | hide | 230 comments
28. Ask HN: When and how should I release my open source tool?
559 | 20 points by flaque 1 hour ago | hide | 7 comments
29. Fake News Challenge (fakenewschallenge.org)
563 | 153 points by phreeza 6 hours ago | hide | 150 comments
567 | 30. 568 | 573 | SHA-512 is 1.5x faster than SHA-256 on 64-bit platforms 574 | (stackexchange.com) 575 |
580 | 7 points by 581 | steffenweber 582 | 51 minutes ago 583 | 584 | | 585 | hide 586 | | 587 | discuss 588 |
More
593 |

Guidelines 595 | | FAQ 596 | | Support 597 | | API 598 | | Security 599 | | Lists 600 | | Bookmarklet 601 | | DMCA 602 | | Apply to YC 603 | | Contact

Search: 604 |
605 |
` 606 | 607 | type page struct { 608 | Items map[int]*item `goquery:".itemlist,[id]"` 609 | } 610 | 611 | type pageNoPtr struct { 612 | Items map[int]item `goquery:".itemlist,[id]"` 613 | } 614 | 615 | type score int 616 | 617 | func (s *score) UnmarshalHTML(nodes []*html.Node) error { 618 | sel := NodeSelector(nodes) 619 | num := strings.Split(sel.Text(), " ")[0] 620 | if num == "" { 621 | return nil 622 | } 623 | n, err := strconv.ParseInt(num, 10, 64) 624 | if err != nil { 625 | return err 626 | } 627 | 628 | *s = score(n) 629 | return nil 630 | } 631 | 632 | type item struct { 633 | Link string `goquery:".title a,[href]"` 634 | Site string `goquery:".title .sitestr,text"` 635 | Points score `goquery:"!Next,.score,text"` 636 | } 637 | 638 | func TestHNPage(t *testing.T) { 639 | asrt := assert.New(t) 640 | 641 | var p page 642 | 643 | asrt.NoError(Unmarshal([]byte(hnPage), &p)) 644 | asrt.Len(p.Items, 30) 645 | asrt.NotNil(p.Items[13546354]) 646 | 647 | i := p.Items[13546354] 648 | asrt.Equal("http://crypto.stackexchange.com/questions/26336/sha512-faster-than-sha256", i.Link) 649 | asrt.Equal("stackexchange.com", i.Site) 650 | asrt.Equal(7, int(i.Points)) 651 | 652 | var p2 pageNoPtr 653 | 654 | asrt.NoError(Unmarshal([]byte(hnPage), &p2)) 655 | asrt.Len(p2.Items, 30) 656 | asrt.NotNil(p2.Items[13546354]) 657 | 658 | i2 := p2.Items[13546354] 659 | asrt.Equal("http://crypto.stackexchange.com/questions/26336/sha512-faster-than-sha256", i2.Link) 660 | asrt.Equal("stackexchange.com", i2.Site) 661 | asrt.Equal(7, int(i2.Points)) 662 | } 663 | -------------------------------------------------------------------------------- /util.go: -------------------------------------------------------------------------------- 1 | package goq 2 | 3 | import "reflect" 4 | 5 | // TypeDeref returns the underlying type if the given type is a pointer. 6 | func TypeDeref(t reflect.Type) reflect.Type { 7 | for t != nil && t.Kind() == reflect.Ptr { 8 | t = t.Elem() 9 | } 10 | return t 11 | } 12 | 13 | // indirect is stolen mostly from pkg/encoding/json/decode.go and removed some 14 | // cases (handling `null`) that goquery doesn't need to handle. 15 | func indirect(v reflect.Value) (Unmarshaler, reflect.Value) { 16 | if v.Kind() != reflect.Ptr && v.Type().Name() != "" && v.CanAddr() { 17 | v = v.Addr() 18 | } 19 | for { 20 | // Load value from interface, but only if the result will be 21 | // usefully addressable. 22 | if v.Kind() == reflect.Interface && !v.IsNil() { 23 | e := v.Elem() 24 | if e.Kind() == reflect.Ptr && !e.IsNil() && (e.Elem().Kind() == reflect.Ptr) { 25 | v = e 26 | continue 27 | } 28 | } 29 | 30 | if v.Kind() != reflect.Ptr { 31 | break 32 | } 33 | 34 | if v.IsNil() { 35 | v.Set(reflect.New(TypeDeref(v.Type()))) 36 | } 37 | if v.Type().NumMethod() > 0 { 38 | if u, ok := v.Interface().(Unmarshaler); ok { 39 | return u, reflect.Value{} 40 | } 41 | } 42 | v = v.Elem() 43 | } 44 | return nil, v 45 | } 46 | --------------------------------------------------------------------------------