├── fetcher ├── gopkg │ ├── _testdata │ │ └── encoding │ │ │ ├── out.json │ │ │ └── data.zip │ ├── gopkg_imps.gop │ └── gop_autogen.go ├── torch │ ├── _testdata │ │ ├── invalid │ │ │ ├── out.json │ │ │ └── data.zip │ │ └── eye │ │ │ ├── data.zip │ │ │ └── out.json │ ├── pysig_torch.gop │ └── gop_autogen.go ├── githubisstask │ ├── _testdata │ │ └── gopkgsupport │ │ │ ├── data.zip │ │ │ └── out.json │ ├── github_issue_task.gop │ └── gop_autogen.go ├── hrefs │ ├── hrefs.gop │ └── gop_autogen.go └── fetch.go ├── _testdata ├── github │ └── repos │ │ ├── data.zip │ │ └── out.json └── text │ └── eyesig │ ├── data.zip │ └── out.json ├── go.mod ├── go.sum ├── tutorial ├── 01-Links │ ├── links.gop │ └── gop_autogen.go └── 02-GithubRepos │ ├── repos.gop │ └── gop_autogen.go ├── .gitignore ├── .github ├── dependabot.yml └── workflows │ └── go.yml ├── stream ├── http │ ├── nocache │ │ └── nocache.go │ ├── httpstrm.go │ └── cached │ │ └── cached.go ├── inline │ └── inline.go ├── stream_test.go ├── stream.go └── zip │ └── zipstrm.go ├── builtin_test.go ├── chore ├── stdpkgprogress │ └── go_stdpkg_progress.go ├── hreflinks │ └── links.go ├── gopkgimps │ └── gopkgimps.go ├── pysigfetch │ └── pysigfetch.go └── gostdpkgs │ └── gostdpkgs.go ├── README.md ├── cmd └── hdq │ ├── internal │ ├── fetch │ │ └── fetch.go │ ├── help │ │ └── help.go │ └── base │ │ └── base.go │ └── hdq.go ├── hdqtest └── hdqtest.go ├── hdq_test.go ├── html_utils.go ├── LICENSE ├── hdq_helper.go └── hdq.go /fetcher/gopkg/_testdata/encoding/out.json: -------------------------------------------------------------------------------- 1 | { 2 | "path": "", 3 | "importedBy": 14960 4 | } -------------------------------------------------------------------------------- /fetcher/torch/_testdata/invalid/out.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "", 3 | "sig": "\u003cNULL\u003e" 4 | } -------------------------------------------------------------------------------- /_testdata/github/repos/data.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goplus/hdq/HEAD/_testdata/github/repos/data.zip -------------------------------------------------------------------------------- /_testdata/text/eyesig/data.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goplus/hdq/HEAD/_testdata/text/eyesig/data.zip -------------------------------------------------------------------------------- /fetcher/torch/_testdata/eye/data.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goplus/hdq/HEAD/fetcher/torch/_testdata/eye/data.zip -------------------------------------------------------------------------------- /fetcher/gopkg/_testdata/encoding/data.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goplus/hdq/HEAD/fetcher/gopkg/_testdata/encoding/data.zip -------------------------------------------------------------------------------- /fetcher/torch/_testdata/invalid/data.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goplus/hdq/HEAD/fetcher/torch/_testdata/invalid/data.zip -------------------------------------------------------------------------------- /_testdata/text/eyesig/out.json: -------------------------------------------------------------------------------- 1 | "torch.eye(n, m=None, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) → Tensor¶" -------------------------------------------------------------------------------- /fetcher/githubisstask/_testdata/gopkgsupport/data.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goplus/hdq/HEAD/fetcher/githubisstask/_testdata/gopkgsupport/data.zip -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/goplus/hdq 2 | 3 | go 1.18 4 | 5 | require ( 6 | github.com/qiniu/x v1.15.1 7 | golang.org/x/net v0.34.0 8 | ) 9 | 10 | retract v0.8.0 11 | -------------------------------------------------------------------------------- /fetcher/torch/_testdata/eye/out.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "", 3 | "type": "function", 4 | "sig": "(n, m=None, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) → Tensor" 5 | } -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/qiniu/x v1.15.1 h1:avE+YQaowp8ZExjylOeSM73rUo3MQKBAYVxh4NJ8dY8= 2 | github.com/qiniu/x v1.15.1/go.mod h1:AiovSOCaRijaf3fj+0CBOpR1457pn24b0Vdb1JpwhII= 3 | golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0= 4 | golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k= 5 | -------------------------------------------------------------------------------- /tutorial/01-Links/links.gop: -------------------------------------------------------------------------------- 1 | import ( 2 | "github.com/goplus/hdq" 3 | "os" 4 | ) 5 | 6 | func links(r any) []string { 7 | doc := hdq.Source(r) 8 | return [link for a <- doc.any.a if link := a.href?:""; link != ""] 9 | } 10 | 11 | for link in links("zip:../../_testdata/github/repos/data.zip#index.htm") { 12 | echo link 13 | } 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | _htmlab 2 | .DS_Store 3 | .gop 4 | coverage.txt 5 | index.htm 6 | # gop_autogen*.go 7 | 8 | # Binaries for programs and plugins 9 | *.exe 10 | *.exe~ 11 | *.dll 12 | *.so 13 | *.dylib 14 | 15 | # Test binary, built with `go test -c` 16 | *.test 17 | 18 | # Output of the go coverage tool, specifically when used with LiteIDE 19 | *.out 20 | 21 | # Dependency directories (remove the comment below to include it) 22 | # vendor/ 23 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: github-actions 9 | directory: / 10 | labels: 11 | - dependabot 12 | - actions 13 | schedule: 14 | interval: daily 15 | 16 | - package-ecosystem: "gomod" # See documentation for possible values 17 | directory: "/" # Location of package manifests 18 | schedule: 19 | interval: "daily" 20 | -------------------------------------------------------------------------------- /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | 11 | build: 12 | strategy: 13 | matrix: 14 | go-version: [1.18.x, 1.21.x] 15 | os: [ubuntu-latest, macos-latest] 16 | runs-on: ${{ matrix.os }} 17 | steps: 18 | - uses: actions/checkout@v5 19 | 20 | - name: Set up Go 21 | uses: actions/setup-go@v6 22 | with: 23 | go-version: ${{ matrix.go-version }} 24 | 25 | - name: Build 26 | run: go build -v ./... 27 | 28 | - name: Test 29 | run: go test -v -coverprofile=coverage.txt -covermode=atomic ./... 30 | 31 | - name: Codecov 32 | uses: codecov/codecov-action@v5 33 | with: 34 | token: ${{ secrets.CODECOV_TOKEN }} 35 | -------------------------------------------------------------------------------- /stream/http/nocache/nocache.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The GoPlus Authors (goplus.org) 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package nocache 17 | 18 | import ( 19 | "github.com/goplus/hdq/stream" 20 | "github.com/goplus/hdq/stream/http" 21 | ) 22 | 23 | func init() { 24 | stream.Register("http", http.Open) 25 | stream.Register("https", http.Open) 26 | } 27 | -------------------------------------------------------------------------------- /builtin_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The GoPlus Authors (goplus.org) 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package hdq 17 | 18 | import "testing" 19 | 20 | func TestCached(t *testing.T) { 21 | cached := []cachedGetter{ 22 | new(fixNodes), 23 | new(anyNodes), 24 | new(oneNode), 25 | } 26 | for _, v := range cached { 27 | v.Cached() 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /chore/stdpkgprogress/go_stdpkg_progress.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | "strconv" 7 | "strings" 8 | 9 | "github.com/goplus/hdq/fetcher" 10 | "github.com/goplus/hdq/fetcher/githubisstask" 11 | _ "github.com/goplus/hdq/stream/http/nocache" 12 | ) 13 | 14 | const importedBy = "Imported By: " 15 | 16 | // Usage: stdpkgprogress 17 | func main() { 18 | doc, err := fetcher.FromInput("githubisstask", "goplus/llgo#642") 19 | if err != nil { 20 | panic(err) 21 | } 22 | var done, total float64 23 | var ndone, ntotal int 24 | ret := doc.(githubisstask.Result) 25 | for _, task := range ret.Tasks { 26 | desc := task.Desc // fmt* (Imported By: 4513111) 27 | if pos := strings.Index(desc, "Imported By: "); pos > 0 { 28 | ntext := strings.TrimSuffix(desc[pos+len(importedBy):], ")") 29 | if n, e := strconv.Atoi(ntext); e == nil { 30 | w := math.Log2(float64(n) + 1) 31 | total += w 32 | ntotal += n 33 | if task.Done { 34 | done += w 35 | ndone += n 36 | } 37 | } 38 | } 39 | } 40 | np := float64(ndone) / float64(ntotal) * 100 41 | fmt.Printf("Progress: %.2f%% (%.2f%%)\n", done/total*100, np) 42 | } 43 | -------------------------------------------------------------------------------- /stream/inline/inline.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The GoPlus Authors (goplus.org) 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package inline 17 | 18 | import ( 19 | "io" 20 | "strings" 21 | 22 | "github.com/goplus/hdq/stream" 23 | ) 24 | 25 | type nilCloser struct { 26 | io.Reader 27 | } 28 | 29 | func (p *nilCloser) Close() error { 30 | return nil 31 | } 32 | 33 | // Open opens a inline text object. 34 | func Open(url string) (io.ReadCloser, error) { 35 | file := strings.TrimPrefix(url, "inline:") 36 | r := strings.NewReader(file) 37 | return &nilCloser{r}, nil 38 | } 39 | 40 | func init() { 41 | stream.Register("inline", Open) 42 | } 43 | -------------------------------------------------------------------------------- /fetcher/hrefs/hrefs.gop: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The GoPlus Authors (goplus.org) 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package links 17 | 18 | import ( 19 | "github.com/goplus/hdq" 20 | "github.com/goplus/hdq/fetcher" 21 | ) 22 | 23 | type Result struct { 24 | URL string `json:"url,omitempty"` 25 | Hrefs []string `json:"hrefs,omitempty"` 26 | } 27 | 28 | // New collects all href links from a html document. 29 | func New(input any, doc hdq.NodeSet) Result { 30 | hrefs := [link for a <- doc.any.a if link := a.href?:""; link != ""] 31 | return {input.(string), hrefs} 32 | } 33 | 34 | // URL returns the input URL for the given input. 35 | func URL(input any) string { 36 | return input.(string) 37 | } 38 | 39 | func init() { 40 | fetcher.Register("hrefs", New, URL) 41 | } 42 | -------------------------------------------------------------------------------- /tutorial/01-Links/gop_autogen.go: -------------------------------------------------------------------------------- 1 | // Code generated by gop (Go+); DO NOT EDIT. 2 | 3 | package main 4 | 5 | import ( 6 | "fmt" 7 | "github.com/goplus/hdq" 8 | ) 9 | 10 | const _ = true 11 | //line tutorial/01-Links/links.gop:6:1 12 | func links(r interface{}) []string { 13 | //line tutorial/01-Links/links.gop:7:1 14 | doc := hdq.Source(r) 15 | //line tutorial/01-Links/links.gop:8:1 16 | return func() (_gop_ret []string) { 17 | //line tutorial/01-Links/links.gop:8:1 18 | doc.Any().A().Gop_Enum(func(a hdq.NodeSet) { 19 | //line tutorial/01-Links/links.gop:8:1 20 | if 21 | //line tutorial/01-Links/links.gop:8:1 22 | link := func() (_gop_ret string) { 23 | //line tutorial/01-Links/links.gop:8:1 24 | var _gop_err error 25 | //line tutorial/01-Links/links.gop:8:1 26 | _gop_ret, _gop_err = a.Href__0() 27 | //line tutorial/01-Links/links.gop:8:1 28 | if _gop_err != nil { 29 | //line tutorial/01-Links/links.gop:8:1 30 | return "" 31 | } 32 | //line tutorial/01-Links/links.gop:8:1 33 | return 34 | }(); link != "" { 35 | //line tutorial/01-Links/links.gop:8:1 36 | _gop_ret = append(_gop_ret, link) 37 | } 38 | }) 39 | //line tutorial/01-Links/links.gop:8:1 40 | return 41 | }() 42 | } 43 | //line tutorial/01-Links/links.gop:11 44 | func main() { 45 | for 46 | //line tutorial/01-Links/links.gop:11:1 47 | _, link := range links("zip:../../_testdata/github/repos/data.zip#index.htm") { 48 | //line tutorial/01-Links/links.gop:12:1 49 | fmt.Println(link) 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /stream/stream_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021 The GoPlus Authors (goplus.org) 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package stream_test 17 | 18 | import ( 19 | "io" 20 | "testing" 21 | 22 | "github.com/goplus/hdq/stream" 23 | _ "github.com/goplus/hdq/stream/inline" 24 | ) 25 | 26 | func TestBasic(t *testing.T) { 27 | f, err := stream.Open("inline:hello") 28 | if err != nil { 29 | t.Fatal("Open failed:", err) 30 | } 31 | b, err := io.ReadAll(f) 32 | if err != nil { 33 | t.Fatal("ioutil.ReadAll failed:", err) 34 | } 35 | if string(b) != "hello" { 36 | t.Fatal("unexpected data") 37 | } 38 | } 39 | 40 | func TestUnknownScheme(t *testing.T) { 41 | _, err := stream.Open("bad://foo") 42 | if err == nil || err.Error() != "hdq/stream.Open bad://foo: unknown scheme" { 43 | t.Fatal("Open failed:", err) 44 | } 45 | } 46 | 47 | func TestOpenFile(t *testing.T) { 48 | _, err := stream.Open("/bin/not-exists/foo") 49 | if err == nil { 50 | t.Fatal("Open local file success?") 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /chore/hreflinks/links.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The GoPlus Authors (goplus.org) 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package main 17 | 18 | import ( 19 | "encoding/json" 20 | "fmt" 21 | "io" 22 | "log" 23 | "os" 24 | "strings" 25 | 26 | "github.com/goplus/hdq/fetcher" 27 | _ "github.com/goplus/hdq/fetcher/hrefs" 28 | _ "github.com/goplus/hdq/stream/http/nocache" 29 | ) 30 | 31 | // Usage: hreflinks [url ...] 32 | func main() { 33 | if len(os.Args) < 2 { 34 | fmt.Fprintln(os.Stderr, "Usage: hreflinks [url ...]") 35 | os.Exit(1) 36 | } 37 | urls := os.Args[1:] 38 | if len(urls) == 1 && urls[0] == "-" { 39 | b, _ := io.ReadAll(os.Stdin) 40 | urls = strings.Split(strings.TrimSpace(string(b)), "\n") 41 | } 42 | docs := make([]any, 0, len(urls)) 43 | for _, url := range urls { 44 | log.Println("==> Fetch", url) 45 | doc, err := fetcher.FromInput("hrefs", url) 46 | if err != nil { 47 | panic(err) 48 | } 49 | docs = append(docs, doc) 50 | } 51 | json.NewEncoder(os.Stdout).Encode(docs) 52 | } 53 | -------------------------------------------------------------------------------- /chore/gopkgimps/gopkgimps.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The GoPlus Authors (goplus.org) 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package main 17 | 18 | import ( 19 | "fmt" 20 | "log" 21 | "os" 22 | "sort" 23 | 24 | "github.com/goplus/hdq/fetcher" 25 | "github.com/goplus/hdq/fetcher/gopkg" 26 | _ "github.com/goplus/hdq/stream/http/cached" 27 | ) 28 | 29 | // Usage: gopkgimps [pkgPath ...] 30 | func main() { 31 | if len(os.Args) < 2 { 32 | fmt.Fprintln(os.Stderr, "Usage: gopkgimps [pkgPath ...]") 33 | os.Exit(1) 34 | } 35 | names := os.Args[1:] 36 | docs := make([]gopkg.Result, 0, len(names)) 37 | for _, name := range names { 38 | log.Println("==> Fetch", name) 39 | doc, err := fetcher.FromInput("gopkg", name) 40 | if err != nil { 41 | panic(err) 42 | } 43 | docs = append(docs, doc.(gopkg.Result)) 44 | } 45 | sort.Slice(docs, func(i, j int) bool { 46 | return docs[i].ImportedBy > docs[j].ImportedBy 47 | }) 48 | for _, doc := range docs { 49 | if doc.ImportedBy == 0 { 50 | break 51 | } 52 | fmt.Printf("- [ ] %s (Imported By: %d)\n", doc.Path, doc.ImportedBy) 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /fetcher/gopkg/gopkg_imps.gop: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The GoPlus Authors (goplus.org) 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package gopkg 17 | 18 | import ( 19 | "strings" 20 | 21 | "github.com/goplus/hdq" 22 | "github.com/goplus/hdq/fetcher" 23 | ) 24 | 25 | type Result struct { 26 | Path string `json:"path"` 27 | ImportedBy int `json:"importedBy"` 28 | } 29 | 30 | // New creates a new Result from a html document. 31 | func New(input any, doc hdq.NodeSet) Result { 32 | const importedByPrefix = "Imported By:" 33 | path := input.(string) 34 | a := doc.any.a.attribute("aria-label", v => strings.hasPrefix(v, importedByPrefix)).one 35 | if !a.ok { 36 | return {path, 0} 37 | } 38 | label := a.attr("aria-label")! 39 | labelVal := strings.trimSpace(label[len(importedByPrefix):]) 40 | importedBy := strings.replaceAll(labelVal, ",", "").int! 41 | return {path, importedBy} 42 | } 43 | 44 | // URL returns the input URL for the given input. 45 | func URL(input any) string { 46 | return "https://pkg.go.dev/" + input.(string) 47 | } 48 | 49 | func init() { 50 | fetcher.Register("gopkg", New, URL) 51 | } 52 | -------------------------------------------------------------------------------- /fetcher/githubisstask/_testdata/gopkgsupport/out.json: -------------------------------------------------------------------------------- 1 | { 2 | "issue": "", 3 | "tasks": [ 4 | { 5 | "desc": "fmt* (Imported By: 4513111)", 6 | "done": true 7 | }, 8 | { 9 | "desc": "time* (Imported By: 2238303)", 10 | "done": true 11 | }, 12 | { 13 | "desc": "strings (Imported By: 2104027)", 14 | "done": true 15 | }, 16 | { 17 | "desc": "os* (Imported By: 1956039)", 18 | "done": true 19 | }, 20 | { 21 | "desc": "context (Imported By: 1460725)", 22 | "done": true 23 | }, 24 | { 25 | "desc": "net/http (Imported By: 1415440)", 26 | "done": false 27 | }, 28 | { 29 | "desc": "errors (Imported By: 1294097)", 30 | "done": true 31 | }, 32 | { 33 | "desc": "io (Imported By: 1268791)", 34 | "done": true 35 | }, 36 | { 37 | "desc": "strconv (Imported By: 1206047)", 38 | "done": true 39 | }, 40 | { 41 | "desc": "encoding/json (Imported By: 1201739)", 42 | "done": false 43 | }, 44 | { 45 | "desc": "log (Imported By: 1117493)", 46 | "done": true 47 | }, 48 | { 49 | "desc": "sync* (Imported By: 1075441)", 50 | "done": true 51 | }, 52 | { 53 | "desc": "bytes (Imported By: 1021859)", 54 | "done": true 55 | }, 56 | { 57 | "desc": "io/ioutil (Imported By: 794650)", 58 | "done": true 59 | }, 60 | { 61 | "desc": "reflect* (Imported By: 647872)", 62 | "done": true 63 | }, 64 | { 65 | "desc": "path/filepath (Imported By: 541715)", 66 | "done": true 67 | }, 68 | { 69 | "desc": "net (Imported By: 538522)", 70 | "done": false 71 | }, 72 | { 73 | "desc": "math (Imported By: 502954)", 74 | "done": true 75 | } 76 | ] 77 | } -------------------------------------------------------------------------------- /fetcher/githubisstask/github_issue_task.gop: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The GoPlus Authors (goplus.org) 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package githubisstask 17 | 18 | import ( 19 | "strings" 20 | 21 | "github.com/goplus/hdq" 22 | "github.com/goplus/hdq/fetcher" 23 | ) 24 | 25 | type Task struct { 26 | Desc string `json:"desc"` 27 | Done bool `json:"done"` 28 | } 29 | 30 | type Result struct { 31 | Issue string `json:"issue"` // goplus/llgo#642 32 | Tasks []Task `json:"tasks"` 33 | } 34 | 35 | // New creates a new Result from a html document. 36 | func New(input any, doc hdq.NodeSet) Result { 37 | issue := input.(string) 38 | taskList := doc.any.ul.class("contains-task-list").one 39 | tasks := [Task{li.text!, li.firstElementChild.hasAttr("checked")} for li <- taskList.child.li] 40 | return {issue, tasks} 41 | } 42 | 43 | // URL returns the input URL for the given name. 44 | func URL(input any) string { 45 | issue := input.(string) 46 | if strings.hasPrefix(issue, "https://github.com/") { 47 | return issue 48 | } 49 | return "https://github.com/" + strings.Replace(issue, "#", "/issues/", 1) 50 | } 51 | 52 | func init() { 53 | fetcher.Register("githubisstask", New, URL) 54 | } 55 | -------------------------------------------------------------------------------- /chore/pysigfetch/pysigfetch.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The GoPlus Authors (goplus.org) 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package main 17 | 18 | import ( 19 | "encoding/json" 20 | "fmt" 21 | "io" 22 | "log" 23 | "os" 24 | "strings" 25 | 26 | "github.com/goplus/hdq/fetcher" 27 | _ "github.com/goplus/hdq/fetcher/torch" 28 | _ "github.com/goplus/hdq/stream/http/cached" 29 | ) 30 | 31 | type module struct { 32 | Name string `json:"name"` 33 | Items []any `json:"items"` 34 | } 35 | 36 | // Usage: pysigfetch module [name ...] 37 | func main() { 38 | if len(os.Args) < 3 { 39 | fmt.Fprintln(os.Stderr, "Usage: pysigfetch module [name ...]") 40 | os.Exit(1) 41 | } 42 | moduleName := os.Args[1] 43 | names := os.Args[2:] 44 | if len(names) == 1 && names[0] == "-" { 45 | b, _ := io.ReadAll(os.Stdin) 46 | names = strings.Split(strings.TrimSpace(string(b)), " ") 47 | } 48 | docs := make([]any, 0, len(names)) 49 | for _, name := range names { 50 | log.Println("==> Fetch", name) 51 | doc, err := fetcher.FromInput(moduleName, name) 52 | if err != nil { 53 | panic(err) 54 | } 55 | docs = append(docs, doc) 56 | } 57 | json.NewEncoder(os.Stdout).Encode(module{moduleName, docs}) 58 | } 59 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | hdq - HTML DOM Query Language for XGo 2 | ======== 3 | 4 | [![Build Status](https://github.com/goplus/hdq/actions/workflows/go.yml/badge.svg)](https://github.com/goplus/hdq/actions/workflows/go.yml) 5 | [![Go Report Card](https://goreportcard.com/badge/github.com/goplus/hdq)](https://goreportcard.com/report/github.com/goplus/hdq) 6 | [![GitHub release](https://img.shields.io/github/v/tag/goplus/hdq.svg?label=release)](https://github.com/goplus/hdq/releases) 7 | [![Coverage Status](https://codecov.io/gh/goplus/hdq/branch/main/graph/badge.svg)](https://codecov.io/gh/goplus/hdq) 8 | [![Language](https://img.shields.io/badge/language-XGo-blue.svg)](https://github.com/goplus/gop) 9 | [![GoDoc](https://img.shields.io/badge/godoc-reference-teal.svg)](https://pkg.go.dev/mod/github.com/goplus/hdq) 10 | 11 | ## Summary about hdq 12 | 13 | hdq is a XGo package for processing HTML documents. 14 | 15 | ## Tutorials 16 | 17 | ### Collect links of a html page 18 | 19 | How to collect all links of a html page? If you use `hdq`, it is very easy. 20 | 21 | ```go 22 | import "github.com/goplus/hdq" 23 | 24 | func links(url any) []string { 25 | doc := hdq.Source(url) 26 | return [link for a in doc.any.a if link := a.href?:""; link != ""] 27 | } 28 | ``` 29 | 30 | At first, we call `hdq.Source(url)` to create a `node set` named `doc`. `doc` is a node set which only contains one node, the root node. 31 | 32 | Then, select all `a` elements by `doc.any.a`. Here `doc.any` means all nodes in the html document. 33 | 34 | Then, we visit all these `a` elements, get `href` attribute value and assign it to the variable `link`. If link is not empty, collect it. 35 | 36 | At last, we return all collected links. Goto [tutorial/01-Links](tutorial/01-Links/links.gop) to get the full source code. 37 | -------------------------------------------------------------------------------- /stream/stream.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021 The GoPlus Authors (goplus.org) 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package stream 17 | 18 | import ( 19 | "errors" 20 | "io" 21 | "io/fs" 22 | "os" 23 | "strings" 24 | ) 25 | 26 | var ( 27 | ErrUnknownScheme = errors.New("unknown scheme") 28 | ) 29 | 30 | // ------------------------------------------------------------------------------------- 31 | 32 | type OpenFunc = func(file string) (io.ReadCloser, error) 33 | 34 | var ( 35 | openers = map[string]OpenFunc{} 36 | ) 37 | 38 | // Register registers a scheme with an open function. 39 | func Register(scheme string, open OpenFunc) { 40 | openers[scheme] = open 41 | } 42 | 43 | func Open(url string) (io.ReadCloser, error) { 44 | scheme := schemeOf(url) 45 | if scheme == "" { 46 | return os.Open(url) 47 | } 48 | if open, ok := openers[scheme]; ok { 49 | return open(url) 50 | } 51 | return nil, &fs.PathError{Op: "hdq/stream.Open", Err: ErrUnknownScheme, Path: url} 52 | } 53 | 54 | func schemeOf(url string) (scheme string) { 55 | pos := strings.IndexAny(url, ":/") 56 | if pos > 0 { 57 | if url[pos] == ':' { 58 | return url[:pos] 59 | } 60 | } 61 | return "" 62 | } 63 | 64 | // ------------------------------------------------------------------------------------- 65 | -------------------------------------------------------------------------------- /chore/gostdpkgs/gostdpkgs.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The GoPlus Authors (goplus.org) 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package main 17 | 18 | import ( 19 | "fmt" 20 | "os" 21 | "runtime" 22 | "strings" 23 | ) 24 | 25 | func main() { 26 | dir := runtime.GOROOT() + "/src/" 27 | fis, err := os.ReadDir(dir) 28 | check(err) 29 | pkgs := collect(nil, fis, dir, "") 30 | fmt.Println(strings.Join(pkgs, "\n")) 31 | } 32 | 33 | func collect(pkgs []string, fis []os.DirEntry, dir, base string) []string { 34 | for _, fi := range fis { 35 | if !fi.IsDir() { 36 | continue 37 | } 38 | if name := fi.Name(); name != "cmd" && name != "internal" && name != "vendor" && name != "testdata" { 39 | nameSlash := name + "/" 40 | pkgDir := dir + nameSlash 41 | pkgFis, err := os.ReadDir(pkgDir) 42 | check(err) 43 | if hasGoFiles(pkgFis) { 44 | pkgs = append(pkgs, base+name) 45 | } 46 | pkgs = collect(pkgs, pkgFis, pkgDir, base+nameSlash) 47 | } 48 | } 49 | return pkgs 50 | } 51 | 52 | func hasGoFiles(fis []os.DirEntry) bool { 53 | for _, fi := range fis { 54 | if !fi.IsDir() && strings.HasSuffix(fi.Name(), ".go") { 55 | return true 56 | } 57 | } 58 | return false 59 | } 60 | 61 | func check(err error) { 62 | if err != nil { 63 | panic(err) 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /fetcher/hrefs/gop_autogen.go: -------------------------------------------------------------------------------- 1 | // Code generated by gop (Go+); DO NOT EDIT. 2 | 3 | package links 4 | 5 | import ( 6 | "github.com/goplus/hdq" 7 | "github.com/goplus/hdq/fetcher" 8 | ) 9 | 10 | const GopPackage = "github.com/goplus/hdq" 11 | const _ = true 12 | 13 | type Result struct { 14 | URL string `json:"url,omitempty"` 15 | Hrefs []string `json:"hrefs,omitempty"` 16 | } 17 | //line fetcher/hrefs/hrefs.gop:28:1 18 | // New collects all href links from a html document. 19 | func New(input interface{}, doc hdq.NodeSet) Result { 20 | //line fetcher/hrefs/hrefs.gop:30:1 21 | hrefs := func() (_gop_ret []string) { 22 | //line fetcher/hrefs/hrefs.gop:30:1 23 | doc.Any().A().Gop_Enum(func(a hdq.NodeSet) { 24 | //line fetcher/hrefs/hrefs.gop:30:1 25 | if 26 | //line fetcher/hrefs/hrefs.gop:30:1 27 | link := func() (_gop_ret string) { 28 | //line fetcher/hrefs/hrefs.gop:30:1 29 | var _gop_err error 30 | //line fetcher/hrefs/hrefs.gop:30:1 31 | _gop_ret, _gop_err = a.Href__0() 32 | //line fetcher/hrefs/hrefs.gop:30:1 33 | if _gop_err != nil { 34 | //line fetcher/hrefs/hrefs.gop:30:1 35 | return "" 36 | } 37 | //line fetcher/hrefs/hrefs.gop:30:1 38 | return 39 | }(); link != "" { 40 | //line fetcher/hrefs/hrefs.gop:30:1 41 | _gop_ret = append(_gop_ret, link) 42 | } 43 | }) 44 | //line fetcher/hrefs/hrefs.gop:30:1 45 | return 46 | }() 47 | //line fetcher/hrefs/hrefs.gop:31:1 48 | return Result{input.(string), hrefs} 49 | } 50 | //line fetcher/hrefs/hrefs.gop:34:1 51 | // URL returns the input URL for the given input. 52 | func URL(input interface{}) string { 53 | //line fetcher/hrefs/hrefs.gop:36:1 54 | return input.(string) 55 | } 56 | //line fetcher/hrefs/hrefs.gop:39:1 57 | func init() { 58 | //line fetcher/hrefs/hrefs.gop:40:1 59 | fetcher.Register("hrefs", New, URL) 60 | } 61 | -------------------------------------------------------------------------------- /stream/http/httpstrm.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021 The GoPlus Authors (goplus.org) 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package http 17 | 18 | import ( 19 | "errors" 20 | "io" 21 | "net/http" 22 | ) 23 | 24 | var ( 25 | // DefaultUserAgent is the default UserAgent and is used by HTTPSource. 26 | DefaultUserAgent string 27 | ReqHeaderProc func(req *http.Request) 28 | ) 29 | 30 | // ------------------------------------------------------------------------------------- 31 | 32 | // Open opens a http file object. 33 | func Open(url string) (io.ReadCloser, error) { 34 | resp, err := Get(url) 35 | if err != nil { 36 | return nil, err 37 | } 38 | return resp.Body, nil 39 | } 40 | 41 | func Get(url string) (resp *http.Response, err error) { 42 | req, err := http.NewRequest("GET", url, nil) 43 | if err != nil { 44 | return nil, err 45 | } 46 | if DefaultUserAgent != "" { 47 | req.Header.Set("User-Agent", DefaultUserAgent) 48 | } 49 | if ReqHeaderProc != nil { 50 | ReqHeaderProc(req) 51 | } 52 | if resp, err = http.DefaultClient.Do(req); err != nil { 53 | return 54 | } 55 | if resp.StatusCode/100 != 2 { 56 | resp.Body.Close() 57 | err = errors.New(resp.Status) 58 | } 59 | return 60 | } 61 | 62 | // ------------------------------------------------------------------------------------- 63 | -------------------------------------------------------------------------------- /tutorial/02-GithubRepos/repos.gop: -------------------------------------------------------------------------------- 1 | package repos 2 | 3 | import ( 4 | "github.com/goplus/hdq" 5 | "golang.org/x/net/html/atom" 6 | ) 7 | 8 | // ----------------------------------------------------------------------------- 9 | 10 | type Repo struct { 11 | Repo string 12 | ForkedFrom string 13 | Title string 14 | Language string 15 | UpdateTime string 16 | Forks int 17 | } 18 | 19 | func newRepo(node hdq.NodeSet) Repo { 20 | aRepo := node.any.a.attr("itemprop", "name codeRepository").one 21 | repo := aRepo.href! 22 | root := aRepo.parentN(3).one 23 | forkedFrom := root.any.span.any.textContains("Forked from").one.nextSibling(1).a.href?:"" 24 | title := root.any.p.attr("itemprop", "description").text?:"" 25 | language := root.any.span.attr("itemprop", "programmingLanguage").one.text?:"" 26 | updateTime := root.any.element("relative-time").one.attr("datetime")?:"" 27 | forks := root.any.a.attr("href", repo+"/network/members").int?:0 28 | return { 29 | Repo: repo, 30 | ForkedFrom: forkedFrom, 31 | Title: title, 32 | Language: language, 33 | UpdateTime: updateTime, 34 | Forks: forks, 35 | } 36 | } 37 | 38 | // ----------------------------------------------------------------------------- 39 | 40 | type Result struct { 41 | Repos []Repo 42 | Next string 43 | } 44 | 45 | // New creates a new Result from a html document. 46 | func New(_ any, doc hdq.NodeSet) Result { 47 | // divRepos := doc.any.div.id("user-repositories-list").one 48 | divRepos := doc.any.element(atom.Div).id("user-repositories-list").one 49 | repoList := divRepos.child.ul.one 50 | repos := [newRepo(x) for x <- repoList.child.li] 51 | next := doc.any.div.class("paginate-container").one.any.a.childEqualText("Next").href?:"" 52 | return {Repos: repos, Next: next} 53 | } 54 | 55 | // ----------------------------------------------------------------------------- 56 | -------------------------------------------------------------------------------- /stream/zip/zipstrm.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021 The GoPlus Authors (goplus.org) 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package zip 17 | 18 | import ( 19 | "archive/zip" 20 | "io" 21 | "strings" 22 | "syscall" 23 | 24 | "github.com/goplus/hdq/stream" 25 | ) 26 | 27 | // ------------------------------------------------------------------------------------- 28 | 29 | type readCloser struct { 30 | io.ReadCloser 31 | zipf *zip.ReadCloser 32 | } 33 | 34 | func (p *readCloser) Close() error { 35 | p.ReadCloser.Close() 36 | return p.zipf.Close() 37 | } 38 | 39 | // Open opens a zipped file object. 40 | func Open(url string) (io.ReadCloser, error) { 41 | file := strings.TrimPrefix(url, "zip:") 42 | pos := strings.Index(file, "#") 43 | if pos <= 0 { 44 | return nil, syscall.EINVAL 45 | } 46 | zipfile, name := file[:pos], file[pos+1:] 47 | zipf, err := zip.OpenReader(zipfile) 48 | if err != nil { 49 | return nil, err 50 | } 51 | for _, fi := range zipf.File { 52 | if fi.Name == name { 53 | f, err := fi.Open() 54 | if err != nil { 55 | return nil, err 56 | } 57 | return &readCloser{f, zipf}, nil 58 | } 59 | } 60 | return nil, syscall.ENOENT 61 | } 62 | 63 | func init() { 64 | // zip:file#index.htm 65 | stream.Register("zip", Open) 66 | } 67 | 68 | // ------------------------------------------------------------------------------------- 69 | -------------------------------------------------------------------------------- /cmd/hdq/internal/fetch/fetch.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024 The GoPlus Authors (goplus.org). All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Package fetch implements the "hdq fetch" command. 18 | package fetch 19 | 20 | import ( 21 | "encoding/json" 22 | "io" 23 | "log" 24 | "os" 25 | "strings" 26 | 27 | "github.com/goplus/hdq/cmd/hdq/internal/base" 28 | "github.com/goplus/hdq/fetcher" 29 | ) 30 | 31 | // hdq fetch 32 | var Cmd = &base.Command{ 33 | UsageLine: "hdq fetch [flags] pageType [input ...]", 34 | Short: "Fetch objects from the html source with the specified pageType and input", 35 | } 36 | 37 | func init() { 38 | Cmd.Run = runCmd 39 | } 40 | 41 | func runCmd(cmd *base.Command, args []string) { 42 | if len(args) < 2 { 43 | cmd.Usage(os.Stderr) 44 | return 45 | } 46 | pageType := args[0] 47 | inputs := args[1:] 48 | if len(inputs) == 1 && inputs[0] == "-" { 49 | b, _ := io.ReadAll(os.Stdin) 50 | inputs = strings.Split(strings.TrimSpace(string(b)), " ") 51 | } 52 | docs := make([]any, 0, len(inputs)) 53 | for _, input := range inputs { 54 | log.Println("==> Fetch", input) 55 | doc, err := fetcher.FromInput(pageType, input) 56 | if err != nil { 57 | panic(err) 58 | } 59 | docs = append(docs, doc) 60 | } 61 | enc := json.NewEncoder(os.Stdout) 62 | enc.SetIndent("", " ") 63 | enc.Encode(docs) 64 | } 65 | -------------------------------------------------------------------------------- /fetcher/torch/pysig_torch.gop: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The GoPlus Authors (goplus.org) 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package torch 17 | 18 | import ( 19 | "strings" 20 | 21 | "github.com/goplus/hdq" 22 | "github.com/goplus/hdq/fetcher" 23 | ) 24 | 25 | // ----------------------------------------------------------------------------- 26 | 27 | const ( 28 | spaces = " \t\r\n¶" 29 | ) 30 | 31 | type Result struct { 32 | Name string `json:"name"` 33 | Type string `json:"type,omitempty"` 34 | Doc string `json:"doc,omitempty"` 35 | Sig string `json:"sig"` 36 | URL string `json:"url,omitempty"` 37 | } 38 | 39 | // New creates a new Result from a html document. 40 | func New(input any, doc hdq.NodeSet) Result { 41 | name := input.(string) 42 | url := name 43 | if name != "" { 44 | url = URL(input) 45 | } 46 | if doc.ok { 47 | fn := doc.any.dl.class("py function").one 48 | decl := fn.firstElementChild.dt.text! 49 | pos := strings.indexByte(decl, '(') 50 | if pos > 0 { 51 | sig := decl[pos:] 52 | return {name, "function", "", strings.trimRight(sig, spaces), url} 53 | } 54 | } 55 | return {name, "", "", "", url} 56 | } 57 | 58 | // URL returns the input URL for the given input. 59 | func URL(input any) string { 60 | return "https://pytorch.org/docs/stable/generated/torch." + input.(string) + ".html" 61 | } 62 | 63 | func init() { 64 | fetcher.Register("torch", New, URL) 65 | } 66 | 67 | // ----------------------------------------------------------------------------- 68 | -------------------------------------------------------------------------------- /hdqtest/hdqtest.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The GoPlus Authors (goplus.org) 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package hdqtest 17 | 18 | import ( 19 | "encoding/json" 20 | "log" 21 | "os" 22 | "path" 23 | "reflect" 24 | "strings" 25 | "testing" 26 | 27 | "github.com/goplus/hdq/fetcher" 28 | ) 29 | 30 | // FromDir tests all html files in a directory. 31 | // optional params: [filename, scheme] 32 | func FromDir(t *testing.T, sel, relDir string, conv fetcher.Conv, params ...string) { 33 | dir, err := os.Getwd() 34 | if err != nil { 35 | t.Fatal("Getwd failed:", err) 36 | } 37 | dir = path.Join(dir, relDir) 38 | fis, err := os.ReadDir(dir) 39 | if err != nil { 40 | t.Fatal("ReadDir failed:", err) 41 | } 42 | vConv := reflect.ValueOf(conv) 43 | scheme, fname := "", "/in.html" 44 | if len(params) > 0 { 45 | fname = "/" + params[0] 46 | if len(params) > 1 { 47 | scheme = params[1] + ":" 48 | } 49 | } 50 | for _, fi := range fis { 51 | name := fi.Name() 52 | if !fi.IsDir() || strings.HasPrefix(name, "_") { 53 | continue 54 | } 55 | t.Run(name, func(t *testing.T) { 56 | testFrom(t, dir+"/"+name, sel, vConv, fname, scheme) 57 | }) 58 | } 59 | } 60 | 61 | func testFrom(t *testing.T, pkgDir, sel string, conv reflect.Value, fname, scheme string) { 62 | if sel != "" && !strings.Contains(pkgDir, sel) { 63 | return 64 | } 65 | log.Println("Parsing", pkgDir) 66 | in := scheme + pkgDir + fname 67 | out := pkgDir + "/out.json" 68 | b, err := os.ReadFile(out) 69 | if err != nil { 70 | t.Fatal("ReadFile failed:", err) 71 | } 72 | expected := string(b) 73 | ret := fetcher.Convert(conv, "", in) 74 | retb, _ := json.MarshalIndent(ret, "", "\t") 75 | if v := string(retb); v != expected { 76 | t.Fatalf("\n==> got:\n%s\n==> expected:\n%s\n", v, expected) 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /fetcher/fetch.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The GoPlus Authors (goplus.org) 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package fetcher 17 | 18 | import ( 19 | "errors" 20 | "reflect" 21 | 22 | "github.com/goplus/hdq" 23 | ) 24 | 25 | // func(input any, doc hdq.NodeSet) 26 | type Conv = any 27 | 28 | // ----------------------------------------------------------------------------- 29 | 30 | // Convert converts a html source to an object. 31 | func Convert(conv reflect.Value, input, source any) any { 32 | doc := reflect.ValueOf(hdq.Source(source)) 33 | out := conv.Call([]reflect.Value{reflect.ValueOf(input), doc}) 34 | return out[0].Interface() 35 | } 36 | 37 | // ----------------------------------------------------------------------------- 38 | 39 | var ( 40 | ErrUnknownPageType = errors.New("unknown page type") 41 | ) 42 | 43 | // New creates a new object from a html source by a registered converter. 44 | func New(pageType string, input, source any) (any, error) { 45 | page, ok := convs[pageType] 46 | if !ok { 47 | return nil, ErrUnknownPageType 48 | } 49 | return Convert(page.Conv, input, source), nil 50 | } 51 | 52 | // FromInput creates a new object from the html source with the specified input. 53 | func FromInput(pageType string, input any) (any, error) { 54 | page, ok := convs[pageType] 55 | if !ok { 56 | return nil, ErrUnknownPageType 57 | } 58 | url := page.URL(input) 59 | return Convert(page.Conv, input, url), nil 60 | } 61 | 62 | // sitePageType represents a site page type. 63 | type sitePageType struct { 64 | Conv reflect.Value 65 | URL func(input any) string 66 | } 67 | 68 | var ( 69 | convs = map[string]sitePageType{} 70 | ) 71 | 72 | // Register registers a convType with a convert function. 73 | func Register(pageType string, conv Conv, urlOf func(input any) string) { 74 | vConv := reflect.ValueOf(conv) 75 | convs[pageType] = sitePageType{vConv, urlOf} 76 | } 77 | 78 | // ----------------------------------------------------------------------------- 79 | -------------------------------------------------------------------------------- /cmd/hdq/hdq.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021 The GoPlus Authors (goplus.org) 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package main 17 | 18 | import ( 19 | "flag" 20 | "fmt" 21 | "os" 22 | "strings" 23 | 24 | "github.com/goplus/hdq/cmd/hdq/internal/base" 25 | "github.com/goplus/hdq/cmd/hdq/internal/fetch" 26 | "github.com/goplus/hdq/cmd/hdq/internal/help" 27 | "github.com/qiniu/x/log" 28 | 29 | _ "github.com/goplus/hdq/fetcher/githubisstask" 30 | _ "github.com/goplus/hdq/fetcher/gopkg" 31 | _ "github.com/goplus/hdq/fetcher/hrefs" 32 | _ "github.com/goplus/hdq/fetcher/torch" 33 | _ "github.com/goplus/hdq/stream/http/cached" 34 | ) 35 | 36 | func mainUsage() { 37 | help.PrintUsage(os.Stderr, base.Hdq) 38 | os.Exit(2) 39 | } 40 | 41 | func init() { 42 | flag.Usage = mainUsage 43 | base.Hdq.Commands = []*base.Command{ 44 | fetch.Cmd, 45 | } 46 | } 47 | 48 | func main() { 49 | flag.Parse() 50 | args := flag.Args() 51 | if len(args) < 1 { 52 | flag.Usage() 53 | } 54 | log.SetFlags(log.Ldefault &^ log.LstdFlags) 55 | 56 | base.CmdName = args[0] // for error messages 57 | if args[0] == "help" { 58 | help.Help(os.Stderr, args[1:]) 59 | return 60 | } 61 | 62 | BigCmdLoop: 63 | for bigCmd := base.Hdq; ; { 64 | for _, cmd := range bigCmd.Commands { 65 | if cmd.Name() != args[0] { 66 | continue 67 | } 68 | args = args[1:] 69 | if len(cmd.Commands) > 0 { 70 | bigCmd = cmd 71 | if len(args) == 0 { 72 | help.PrintUsage(os.Stderr, bigCmd) 73 | os.Exit(2) 74 | } 75 | if args[0] == "help" { 76 | help.Help(os.Stderr, append(strings.Split(base.CmdName, " "), args[1:]...)) 77 | return 78 | } 79 | base.CmdName += " " + args[0] 80 | continue BigCmdLoop 81 | } 82 | if !cmd.Runnable() { 83 | continue 84 | } 85 | cmd.Run(cmd, args) 86 | return 87 | } 88 | helpArg := "" 89 | if i := strings.LastIndex(base.CmdName, " "); i >= 0 { 90 | helpArg = " " + base.CmdName[:i] 91 | } 92 | fmt.Fprintf(os.Stderr, "hdq %s: unknown command\nRun 'hdq help%s' for usage.\n", base.CmdName, helpArg) 93 | os.Exit(2) 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /fetcher/torch/gop_autogen.go: -------------------------------------------------------------------------------- 1 | // Code generated by gop (Go+); DO NOT EDIT. 2 | 3 | package torch 4 | 5 | import ( 6 | "github.com/goplus/hdq" 7 | "github.com/goplus/hdq/fetcher" 8 | "github.com/qiniu/x/errors" 9 | "strings" 10 | ) 11 | 12 | const GopPackage = "github.com/goplus/hdq" 13 | const _ = true 14 | const spaces = " \t\r\n¶" 15 | 16 | type Result struct { 17 | Name string `json:"name"` 18 | Type string `json:"type,omitempty"` 19 | Doc string `json:"doc,omitempty"` 20 | Sig string `json:"sig"` 21 | URL string `json:"url,omitempty"` 22 | } 23 | //line fetcher/torch/pysig_torch.gop:39:1 24 | // New creates a new Result from a html document. 25 | func New(input interface{}, doc hdq.NodeSet) Result { 26 | //line fetcher/torch/pysig_torch.gop:41:1 27 | name := input.(string) 28 | //line fetcher/torch/pysig_torch.gop:42:1 29 | url := name 30 | //line fetcher/torch/pysig_torch.gop:43:1 31 | if name != "" { 32 | //line fetcher/torch/pysig_torch.gop:60:1 33 | url = URL(input) 34 | } 35 | //line fetcher/torch/pysig_torch.gop:46:1 36 | if doc.Ok() { 37 | //line fetcher/torch/pysig_torch.gop:47:1 38 | fn := doc.Any().Dl().Class("py function").One() 39 | //line fetcher/torch/pysig_torch.gop:48:1 40 | decl := func() (_gop_ret string) { 41 | //line fetcher/torch/pysig_torch.gop:48:1 42 | var _gop_err error 43 | //line fetcher/torch/pysig_torch.gop:48:1 44 | _gop_ret, _gop_err = fn.FirstElementChild().Dt().Text__0() 45 | //line fetcher/torch/pysig_torch.gop:48:1 46 | if _gop_err != nil { 47 | //line fetcher/torch/pysig_torch.gop:48:1 48 | _gop_err = errors.NewFrame(_gop_err, "fn.firstElementChild.dt.text", "fetcher/torch/pysig_torch.gop", 48, "torch.New") 49 | //line fetcher/torch/pysig_torch.gop:48:1 50 | panic(_gop_err) 51 | } 52 | //line fetcher/torch/pysig_torch.gop:48:1 53 | return 54 | }() 55 | //line fetcher/torch/pysig_torch.gop:49:1 56 | pos := strings.IndexByte(decl, '(') 57 | //line fetcher/torch/pysig_torch.gop:50:1 58 | if pos > 0 { 59 | //line fetcher/torch/pysig_torch.gop:51:1 60 | sig := decl[pos:] 61 | //line fetcher/torch/pysig_torch.gop:52:1 62 | return Result{name, "function", "", strings.TrimRight(sig, spaces), url} 63 | } 64 | } 65 | //line fetcher/torch/pysig_torch.gop:55:1 66 | return Result{name, "", "", "", url} 67 | } 68 | //line fetcher/torch/pysig_torch.gop:58:1 69 | // URL returns the input URL for the given input. 70 | func URL(input interface{}) string { 71 | //line fetcher/torch/pysig_torch.gop:60:1 72 | return "https://pytorch.org/docs/stable/generated/torch." + input.(string) + ".html" 73 | } 74 | //line fetcher/torch/pysig_torch.gop:63:1 75 | func init() { 76 | //line fetcher/torch/pysig_torch.gop:64:1 77 | fetcher.Register("torch", New, URL) 78 | } 79 | -------------------------------------------------------------------------------- /fetcher/githubisstask/gop_autogen.go: -------------------------------------------------------------------------------- 1 | // Code generated by gop (Go+); DO NOT EDIT. 2 | 3 | package githubisstask 4 | 5 | import ( 6 | "github.com/goplus/hdq" 7 | "github.com/goplus/hdq/fetcher" 8 | "github.com/qiniu/x/errors" 9 | "strings" 10 | ) 11 | 12 | const GopPackage = "github.com/goplus/hdq" 13 | const _ = true 14 | 15 | type Task struct { 16 | Desc string `json:"desc"` 17 | Done bool `json:"done"` 18 | } 19 | type Result struct { 20 | Issue string `json:"issue"` 21 | Tasks []Task `json:"tasks"` 22 | } 23 | //line fetcher/githubisstask/github_issue_task.gop:35:1 24 | // New creates a new Result from a html document. 25 | func New(input interface{}, doc hdq.NodeSet) Result { 26 | //line fetcher/githubisstask/github_issue_task.gop:37:1 27 | issue := input.(string) 28 | //line fetcher/githubisstask/github_issue_task.gop:38:1 29 | taskList := doc.Any().Ul().Class("contains-task-list").One() 30 | //line fetcher/githubisstask/github_issue_task.gop:39:1 31 | tasks := func() (_gop_ret []Task) { 32 | //line fetcher/githubisstask/github_issue_task.gop:39:1 33 | taskList.Child().Li().Gop_Enum(func(li hdq.NodeSet) { 34 | //line fetcher/githubisstask/github_issue_task.gop:39:1 35 | _gop_ret = append(_gop_ret, Task{func() (_gop_ret string) { 36 | //line fetcher/githubisstask/github_issue_task.gop:39:1 37 | var _gop_err error 38 | //line fetcher/githubisstask/github_issue_task.gop:39:1 39 | _gop_ret, _gop_err = li.Text__0() 40 | //line fetcher/githubisstask/github_issue_task.gop:39:1 41 | if _gop_err != nil { 42 | //line fetcher/githubisstask/github_issue_task.gop:39:1 43 | _gop_err = errors.NewFrame(_gop_err, "li.text", "fetcher/githubisstask/github_issue_task.gop", 39, "githubisstask.New") 44 | //line fetcher/githubisstask/github_issue_task.gop:39:1 45 | panic(_gop_err) 46 | } 47 | //line fetcher/githubisstask/github_issue_task.gop:39:1 48 | return 49 | }(), li.FirstElementChild().HasAttr("checked")}) 50 | }) 51 | //line fetcher/githubisstask/github_issue_task.gop:39:1 52 | return 53 | }() 54 | //line fetcher/githubisstask/github_issue_task.gop:40:1 55 | return Result{issue, tasks} 56 | } 57 | //line fetcher/githubisstask/github_issue_task.gop:43:1 58 | // URL returns the input URL for the given name. 59 | func URL(input interface{}) string { 60 | //line fetcher/githubisstask/github_issue_task.gop:45:1 61 | issue := input.(string) 62 | //line fetcher/githubisstask/github_issue_task.gop:46:1 63 | if strings.HasPrefix(issue, "https://github.com/") { 64 | //line fetcher/githubisstask/github_issue_task.gop:47:1 65 | return issue 66 | } 67 | //line fetcher/githubisstask/github_issue_task.gop:49:1 68 | return "https://github.com/" + strings.Replace(issue, "#", "/issues/", 1) 69 | } 70 | //line fetcher/githubisstask/github_issue_task.gop:52:1 71 | func init() { 72 | //line fetcher/githubisstask/github_issue_task.gop:53:1 73 | fetcher.Register("githubisstask", New, URL) 74 | } 75 | -------------------------------------------------------------------------------- /stream/http/cached/cached.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The GoPlus Authors (goplus.org) 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package cached 17 | 18 | import ( 19 | "crypto/md5" 20 | "encoding/base64" 21 | "fmt" 22 | "io" 23 | "io/fs" 24 | "net/url" 25 | "os" 26 | "path" 27 | 28 | "github.com/goplus/hdq/stream" 29 | "github.com/goplus/hdq/stream/http" 30 | ) 31 | 32 | // ------------------------------------------------------------------------------------- 33 | 34 | var ( 35 | cacheDir = getCacheDir() 36 | ) 37 | 38 | func getCacheDir() string { 39 | root, err := os.UserCacheDir() 40 | if err != nil { 41 | panic(err) 42 | } 43 | dir := root + "/hdq/http/" 44 | os.MkdirAll(dir, 0755) 45 | return dir 46 | } 47 | 48 | // ------------------------------------------------------------------------------------- 49 | 50 | // TODO(xsw): add checksum to cache file 51 | func WriteCache(cacheFile string, url string) (err error) { 52 | resp, err := http.Get(url) 53 | if err != nil { 54 | return 55 | } 56 | defer resp.Body.Close() 57 | f, err := os.Create(cacheFile) 58 | if err != nil { 59 | return 60 | } 61 | defer f.Close() 62 | _, err = io.Copy(f, resp.Body) 63 | return 64 | } 65 | 66 | func ReadCache(cacheFile string, fi fs.FileInfo) (ret io.ReadCloser, err error) { 67 | return os.Open(cacheFile) 68 | } 69 | 70 | // ------------------------------------------------------------------------------------- 71 | 72 | // Open opens a http file object. 73 | func Open(url_ string) (ret io.ReadCloser, err error) { 74 | u, err := url.Parse(url_) 75 | if err != nil { 76 | return 77 | } 78 | fname := path.Base(u.Path) 79 | ext := path.Ext(fname) 80 | hash := md5.Sum([]byte(url_)) 81 | hashstr := base64.RawURLEncoding.EncodeToString(hash[:]) 82 | fname = fmt.Sprintf("%s-%s%s", fname[:len(fname)-len(ext)], hashstr, ext) 83 | file := cacheDir + fname 84 | if fi, e := os.Stat(file); e == nil { 85 | if ret, err = ReadCache(file, fi); err == nil { // cache hit 86 | return 87 | } 88 | } 89 | if err = WriteCache(file, url_); err != nil { 90 | return // write cache failed 91 | } 92 | return ReadCache(file, nil) 93 | } 94 | 95 | func init() { 96 | stream.Register("http", Open) 97 | stream.Register("https", Open) 98 | } 99 | 100 | // ------------------------------------------------------------------------------------- 101 | -------------------------------------------------------------------------------- /fetcher/gopkg/gop_autogen.go: -------------------------------------------------------------------------------- 1 | // Code generated by gop (Go+); DO NOT EDIT. 2 | 3 | package gopkg 4 | 5 | import ( 6 | "github.com/goplus/hdq" 7 | "github.com/goplus/hdq/fetcher" 8 | "github.com/qiniu/x/errors" 9 | "strconv" 10 | "strings" 11 | ) 12 | 13 | const GopPackage = "github.com/goplus/hdq" 14 | const _ = true 15 | 16 | type Result struct { 17 | Path string `json:"path"` 18 | ImportedBy int `json:"importedBy"` 19 | } 20 | //line fetcher/gopkg/gopkg_imps.gop:30:1 21 | // New creates a new Result from a html document. 22 | func New(input interface{}, doc hdq.NodeSet) Result { 23 | //line fetcher/gopkg/gopkg_imps.gop:32:1 24 | const importedByPrefix = "Imported By:" 25 | //line fetcher/gopkg/gopkg_imps.gop:33:1 26 | path := input.(string) 27 | //line fetcher/gopkg/gopkg_imps.gop:34:1 28 | a := doc.Any().A().Attribute__1("aria-label", func(v string) bool { 29 | //line fetcher/gopkg/gopkg_imps.gop:34:1 30 | return strings.HasPrefix(v, importedByPrefix) 31 | }).One() 32 | //line fetcher/gopkg/gopkg_imps.gop:35:1 33 | if !a.Ok() { 34 | //line fetcher/gopkg/gopkg_imps.gop:36:1 35 | return Result{path, 0} 36 | } 37 | //line fetcher/gopkg/gopkg_imps.gop:38:1 38 | label := func() (_gop_ret string) { 39 | //line fetcher/gopkg/gopkg_imps.gop:38:1 40 | var _gop_err error 41 | //line fetcher/gopkg/gopkg_imps.gop:38:1 42 | _gop_ret, _gop_err = a.Attr__0("aria-label") 43 | //line fetcher/gopkg/gopkg_imps.gop:38:1 44 | if _gop_err != nil { 45 | //line fetcher/gopkg/gopkg_imps.gop:38:1 46 | _gop_err = errors.NewFrame(_gop_err, "a.attr(\"aria-label\")", "fetcher/gopkg/gopkg_imps.gop", 38, "gopkg.New") 47 | //line fetcher/gopkg/gopkg_imps.gop:38:1 48 | panic(_gop_err) 49 | } 50 | //line fetcher/gopkg/gopkg_imps.gop:38:1 51 | return 52 | }() 53 | //line fetcher/gopkg/gopkg_imps.gop:39:1 54 | labelVal := strings.TrimSpace(label[len(importedByPrefix):]) 55 | //line fetcher/gopkg/gopkg_imps.gop:40:1 56 | importedBy := func() (_gop_ret int) { 57 | //line fetcher/gopkg/gopkg_imps.gop:40:1 58 | var _gop_err error 59 | //line fetcher/gopkg/gopkg_imps.gop:40:1 60 | _gop_ret, _gop_err = strconv.Atoi(strings.ReplaceAll(labelVal, ",", "")) 61 | //line fetcher/gopkg/gopkg_imps.gop:40:1 62 | if _gop_err != nil { 63 | //line fetcher/gopkg/gopkg_imps.gop:40:1 64 | _gop_err = errors.NewFrame(_gop_err, "strings.replaceAll(labelVal, \",\", \"\").int", "fetcher/gopkg/gopkg_imps.gop", 40, "gopkg.New") 65 | //line fetcher/gopkg/gopkg_imps.gop:40:1 66 | panic(_gop_err) 67 | } 68 | //line fetcher/gopkg/gopkg_imps.gop:40:1 69 | return 70 | }() 71 | //line fetcher/gopkg/gopkg_imps.gop:41:1 72 | return Result{path, importedBy} 73 | } 74 | //line fetcher/gopkg/gopkg_imps.gop:44:1 75 | // URL returns the input URL for the given input. 76 | func URL(input interface{}) string { 77 | //line fetcher/gopkg/gopkg_imps.gop:46:1 78 | return "https://pkg.go.dev/" + input.(string) 79 | } 80 | //line fetcher/gopkg/gopkg_imps.gop:49:1 81 | func init() { 82 | //line fetcher/gopkg/gopkg_imps.gop:50:1 83 | fetcher.Register("gopkg", New, URL) 84 | } 85 | -------------------------------------------------------------------------------- /hdq_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The GoPlus Authors (goplus.org) 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package hdq_test 17 | 18 | import ( 19 | "strings" 20 | "testing" 21 | 22 | "github.com/goplus/hdq" 23 | "github.com/goplus/hdq/fetcher/githubisstask" 24 | "github.com/goplus/hdq/fetcher/gopkg" 25 | "github.com/goplus/hdq/fetcher/torch" 26 | "github.com/goplus/hdq/hdqtest" 27 | 28 | repos "github.com/goplus/hdq/tutorial/02-GithubRepos" 29 | ) 30 | 31 | func textOf(_ string, doc hdq.NodeSet) (ret string) { 32 | ret, _ = doc.Text__0() 33 | return 34 | } 35 | 36 | func TestText(t *testing.T) { 37 | hdqtest.FromDir(t, "", "./_testdata/text", textOf, "data.zip#index.htm", "zip") 38 | } 39 | 40 | func TestGithub(t *testing.T) { 41 | hdqtest.FromDir(t, "", "./_testdata/github", repos.New, "data.zip#index.htm", "zip") 42 | } 43 | 44 | func TestTorch(t *testing.T) { 45 | hdqtest.FromDir(t, "", "./fetcher/torch/_testdata", torch.New, "data.zip#index.htm", "zip") 46 | } 47 | 48 | func TestGoPkg(t *testing.T) { 49 | hdqtest.FromDir(t, "", "./fetcher/gopkg/_testdata", gopkg.New, "data.zip#index.htm", "zip") 50 | } 51 | 52 | func TestGithubIssueTask(t *testing.T) { 53 | hdqtest.FromDir(t, "", "./fetcher/githubisstask/_testdata", githubisstask.New, "data.zip#index.htm", "zip") 54 | } 55 | 56 | func TestSource(t *testing.T) { 57 | const data = "hello" 58 | doc := hdq.Source([]byte(data)) 59 | sources := []any{ 60 | []byte(data), 61 | strings.NewReader(data), 62 | doc, 63 | } 64 | for _, in := range sources { 65 | v := hdq.Source(in) 66 | if text, err := v.Text__0(); err != nil || text != "hello" { 67 | t.Fatal("Source failed: ", text, err) 68 | } 69 | } 70 | if doc := hdq.Source("unknown:123"); doc.Ok() { 71 | t.Fatal("Source failed: no error?") 72 | } 73 | defer func() { 74 | if recover() == nil { 75 | t.Fatalf("Source failed: no panic?") 76 | } 77 | }() 78 | hdq.Source(123) 79 | } 80 | 81 | func TestErrNodeSet(t *testing.T) { 82 | docErr := hdq.NodeSet{Err: hdq.ErrInvalidNode} 83 | fns := []func(hdq.NodeSet) hdq.NodeSet{ 84 | (hdq.NodeSet).Child, 85 | (hdq.NodeSet).Parent, 86 | (hdq.NodeSet).PrevSiblings, 87 | (hdq.NodeSet).NextSiblings, 88 | (hdq.NodeSet).Any, 89 | } 90 | for _, fn := range fns { 91 | if v := fn(docErr); v != docErr { 92 | t.Fatal("ErrNodeSet failed:", v) 93 | } 94 | } 95 | const data = "hello" 96 | doc := hdq.Source([]byte(data)) 97 | for _, fn := range fns { 98 | fn(doc) 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /cmd/hdq/internal/help/help.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023 The GoPlus Authors (goplus.org). All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Package help implements the "hdq help” command. 18 | package help 19 | 20 | import ( 21 | "bufio" 22 | "fmt" 23 | "io" 24 | "log" 25 | "os" 26 | "strings" 27 | "text/template" 28 | "unicode" 29 | "unicode/utf8" 30 | 31 | "github.com/goplus/hdq/cmd/hdq/internal/base" 32 | ) 33 | 34 | // Help implements the 'help' command. 35 | func Help(w io.Writer, args []string) { 36 | cmd := base.Hdq 37 | Args: 38 | for i, arg := range args { 39 | for _, sub := range cmd.Commands { 40 | if sub.Name() == arg { 41 | cmd = sub 42 | continue Args 43 | } 44 | } 45 | 46 | // helpSuccess is the help command using as many args as possible that would succeed. 47 | helpSuccess := "hdq help" 48 | if i > 0 { 49 | helpSuccess += " " + strings.Join(args[:i], " ") 50 | } 51 | fmt.Fprintf(os.Stderr, "hdq help %s: unknown help topic. Run '%s'.\n", strings.Join(args, " "), helpSuccess) 52 | os.Exit(2) 53 | } 54 | 55 | if len(cmd.Commands) > 0 { 56 | PrintUsage(w, cmd) 57 | } else { 58 | cmd.Usage(w) 59 | } 60 | // not exit 2: succeeded at 'hdq help cmd'. 61 | } 62 | 63 | var usageTemplate = `{{.Short | trim}} 64 | 65 | Usage: 66 | 67 | {{.UsageLine}} [arguments] 68 | 69 | The commands are: 70 | {{range .Commands}}{{if or (.Runnable) .Commands}} 71 | {{.Name | printf "%-11s"}} {{.Short}}{{end}}{{end}} 72 | 73 | Use "hdq help{{with .LongName}} {{.}}{{end}} " for more information about a command. 74 | 75 | ` 76 | 77 | // An errWriter wraps a writer, recording whether a write error occurred. 78 | type errWriter struct { 79 | w io.Writer 80 | err error 81 | } 82 | 83 | func (w *errWriter) Write(b []byte) (int, error) { 84 | n, err := w.w.Write(b) 85 | if err != nil { 86 | w.err = err 87 | } 88 | return n, err 89 | } 90 | 91 | // tmpl executes the given template text on data, writing the result to w. 92 | func tmpl(w io.Writer, text string, data any) { 93 | t := template.New("top") 94 | t.Funcs(template.FuncMap{"trim": strings.TrimSpace, "capitalize": capitalize}) 95 | template.Must(t.Parse(text)) 96 | ew := &errWriter{w: w} 97 | err := t.Execute(ew, data) 98 | if ew.err != nil { 99 | // I/O error writing. Ignore write on closed pipe. 100 | if strings.Contains(ew.err.Error(), "pipe") { 101 | os.Exit(1) 102 | } 103 | log.Fatalf("writing output: %v", ew.err) 104 | } 105 | if err != nil { 106 | panic(err) 107 | } 108 | } 109 | 110 | func capitalize(s string) string { 111 | if s == "" { 112 | return s 113 | } 114 | r, n := utf8.DecodeRuneInString(s) 115 | return string(unicode.ToTitle(r)) + s[n:] 116 | } 117 | 118 | // PrintUsage prints usage information. 119 | func PrintUsage(w io.Writer, cmd *base.Command) { 120 | bw := bufio.NewWriter(w) 121 | tmpl(bw, usageTemplate, cmd) 122 | bw.Flush() 123 | } 124 | -------------------------------------------------------------------------------- /cmd/hdq/internal/base/base.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023 The GoPlus Authors (goplus.org). All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Package base defines shared basic pieces of the hdq command, 18 | // in particular logging and the Command structure. 19 | package base 20 | 21 | import ( 22 | "flag" 23 | "fmt" 24 | "io" 25 | "os" 26 | "strings" 27 | ) 28 | 29 | // A Command is an implementation of a gop command 30 | // like gop export or gop install. 31 | type Command struct { 32 | // Run runs the command. 33 | // The args are the arguments after the command name. 34 | Run func(cmd *Command, args []string) 35 | 36 | // UsageLine is the one-line usage message. 37 | // The words between "gop" and the first flag or argument in the line are taken to be the command name. 38 | UsageLine string 39 | 40 | // Short is the short description shown in the 'gop help' output. 41 | Short string 42 | 43 | // Flag is a set of flags specific to this command. 44 | Flag flag.FlagSet 45 | 46 | // Commands lists the available commands and help topics. 47 | // The order here is the order in which they are printed by 'gop help'. 48 | // Note that subcommands are in general best avoided. 49 | Commands []*Command 50 | } 51 | 52 | // Hdq command 53 | var Hdq = &Command{ 54 | UsageLine: "hdq", 55 | Short: `hdq - a HTML DOM Query Language for Go+`, 56 | // Commands initialized in package main 57 | } 58 | 59 | // LongName returns the command's long name: all the words in the usage line between "gop" and a flag or argument, 60 | func (c *Command) LongName() string { 61 | name := c.UsageLine 62 | if i := strings.Index(name, " ["); i >= 0 { 63 | name = name[:i] 64 | } 65 | if name == "hdq" { 66 | return "" 67 | } 68 | return strings.TrimPrefix(name, "hdq ") 69 | } 70 | 71 | // Name returns the command's short name: the last word in the usage line before a flag or argument. 72 | func (c *Command) Name() string { 73 | name := c.LongName() 74 | if i := strings.LastIndex(name, " "); i >= 0 { 75 | name = name[i+1:] 76 | } 77 | return name 78 | } 79 | 80 | // Usage show the command usage. 81 | func (c *Command) Usage(w io.Writer) { 82 | fmt.Fprintf(w, "%s\n\nUsage: %s\n", c.Short, c.UsageLine) 83 | 84 | // restore output of flag 85 | defer c.Flag.SetOutput(c.Flag.Output()) 86 | 87 | c.Flag.SetOutput(w) 88 | c.Flag.PrintDefaults() 89 | fmt.Fprintln(w) 90 | os.Exit(2) 91 | } 92 | 93 | // Runnable reports whether the command can be run; otherwise 94 | // it is a documentation pseudo-command. 95 | func (c *Command) Runnable() bool { 96 | return c.Run != nil 97 | } 98 | 99 | // Usage is the usage-reporting function, filled in by package main 100 | // but here for reference by other packages. 101 | // 102 | // flag.Usage func() 103 | 104 | // CmdName - "build", "install", "list", "mod tidy", etc. 105 | var CmdName string 106 | 107 | // Main runs a command. 108 | func Main(c *Command, app string, args []string) { 109 | name := c.UsageLine 110 | if i := strings.Index(name, " ["); i >= 0 { 111 | c.UsageLine = app + name[i:] 112 | } 113 | c.Run(c, args) 114 | } 115 | -------------------------------------------------------------------------------- /html_utils.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021 The GoPlus Authors (goplus.org) 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package hdq 17 | 18 | import ( 19 | "strings" 20 | 21 | "golang.org/x/net/html" 22 | "golang.org/x/net/html/atom" 23 | ) 24 | 25 | // ----------------------------------------------------------------------------- 26 | 27 | // containsClass returns true if v is a class in source. 28 | // eg. `ContainsClass("top current", "current")` returns true. 29 | func containsClass(source string, v string) bool { 30 | for { 31 | pos := strings.IndexByte(source, ' ') 32 | if pos < 0 { 33 | return source == v 34 | } 35 | if source[:pos] == v { 36 | return true 37 | } 38 | source = source[pos+1:] 39 | } 40 | } 41 | 42 | // attributeVal returns value of the attribute `k`. 43 | func attributeVal(node *html.Node, k string) (v string, err error) { 44 | if node.Type != html.ElementNode { 45 | return "", ErrInvalidNode 46 | } 47 | for _, attr := range node.Attr { 48 | if attr.Key == k { 49 | return attr.Val, nil 50 | } 51 | } 52 | return "", ErrNotFound 53 | } 54 | 55 | // firstChild returns the first child with type `nodeType` of the node `node`. 56 | func firstChild(node *html.Node, nodeType html.NodeType) (p *html.Node, err error) { 57 | for p = node.FirstChild; p != nil; p = p.NextSibling { 58 | if p.Type == nodeType { 59 | return p, nil 60 | } 61 | } 62 | return nil, ErrNotFound 63 | } 64 | 65 | // lastChild returns the last child with type `nodeType` of the node `node`. 66 | func lastChild(node *html.Node, nodeType html.NodeType) (p *html.Node, err error) { 67 | for p = node.LastChild; p != nil; p = p.PrevSibling { 68 | if p.Type == nodeType { 69 | return p, nil 70 | } 71 | } 72 | return nil, ErrNotFound 73 | } 74 | 75 | // ----------------------------------------------------------------------------- 76 | 77 | const ( 78 | spaces = " \t\r\n" 79 | ) 80 | 81 | // childEqualText returns true if the type of node's child is TextNode and it's Data equals `text`. 82 | func childEqualText(node *html.Node, text string) bool { 83 | p := node.FirstChild 84 | if p == nil || p.NextSibling != nil { 85 | return false 86 | } 87 | return equalText(p, text) 88 | } 89 | 90 | // equalText returns true if the type of node is TextNode and it's Data equals `text`. 91 | func equalText(node *html.Node, text string) bool { 92 | if node.Type != html.TextNode { 93 | return false 94 | } 95 | return node.Data == text 96 | } 97 | 98 | // containsText returns true if the type of node is TextNode and it's Data contains `text`. 99 | func containsText(node *html.Node, text string) bool { 100 | if node.Type != html.TextNode { 101 | return false 102 | } 103 | return strings.Contains(node.Data, text) 104 | } 105 | 106 | // hasPrefixText returns true if the type of node is TextNode and its Data has prefix `text`. 107 | func hasPrefixText(node *html.Node, text string) bool { 108 | if node.Type != html.TextNode { 109 | return false 110 | } 111 | return strings.Contains(strings.TrimLeft(node.Data, spaces), text) 112 | } 113 | 114 | // exactText returns text of node if the type of node is TextNode. 115 | func exactText(node *html.Node) (string, error) { 116 | if node.Type != html.TextNode { 117 | return node.Data, nil 118 | } 119 | return "", ErrInvalidNode 120 | } 121 | 122 | // textOf returns text data of node's all childs. 123 | func textOf(node *html.Node) string { 124 | var printer textPrinter 125 | printer.printNode(node) 126 | return string(printer.data) 127 | } 128 | 129 | type textPrinter struct { 130 | data []byte 131 | notLineStart bool 132 | hasSpace bool 133 | } 134 | 135 | func (p *textPrinter) printText(v string, hasRightSpace bool) { 136 | if v == "" { 137 | return 138 | } 139 | if p.notLineStart && p.hasSpace { 140 | p.data = append(p.data, ' ') 141 | } else { 142 | p.notLineStart = true 143 | } 144 | p.data = append(p.data, v...) 145 | p.hasSpace = hasRightSpace 146 | } 147 | 148 | func (p *textPrinter) printNode(node *html.Node) { 149 | if node == nil { 150 | return 151 | } 152 | if node.Type == html.TextNode { 153 | p.printText(textTrimRight(textTrimLeft(node.Data, &p.hasSpace))) 154 | return 155 | } 156 | for child := node.FirstChild; child != nil; child = child.NextSibling { 157 | p.printNode(child) 158 | } 159 | switch node.DataAtom { 160 | case atom.P: 161 | p.data = append(p.data, '\n') 162 | p.notLineStart = false 163 | } 164 | } 165 | 166 | func textTrimLeft(v string, hasSpace *bool) string { 167 | ret := strings.TrimLeft(v, spaces) 168 | if len(v) != len(ret) { 169 | *hasSpace = true 170 | } 171 | return ret 172 | } 173 | 174 | func textTrimRight(v string) (string, bool) { 175 | ret := strings.TrimRight(v, spaces) 176 | return ret, len(v) != len(ret) 177 | } 178 | 179 | // ----------------------------------------------------------------------------- 180 | -------------------------------------------------------------------------------- /tutorial/02-GithubRepos/gop_autogen.go: -------------------------------------------------------------------------------- 1 | // Code generated by gop (Go+); DO NOT EDIT. 2 | 3 | package repos 4 | 5 | import ( 6 | "github.com/goplus/hdq" 7 | "github.com/qiniu/x/errors" 8 | "golang.org/x/net/html/atom" 9 | ) 10 | 11 | const GopPackage = "github.com/goplus/hdq" 12 | const _ = true 13 | 14 | type Repo struct { 15 | Repo string 16 | ForkedFrom string 17 | Title string 18 | Language string 19 | UpdateTime string 20 | Forks int 21 | } 22 | type Result struct { 23 | Repos []Repo 24 | Next string 25 | } 26 | //line tutorial/02-GithubRepos/repos.gop:19:1 27 | func newRepo(node hdq.NodeSet) Repo { 28 | //line tutorial/02-GithubRepos/repos.gop:20:1 29 | aRepo := node.Any().A().Attr__1("itemprop", "name codeRepository").One() 30 | //line tutorial/02-GithubRepos/repos.gop:21:1 31 | repo := func() (_gop_ret string) { 32 | //line tutorial/02-GithubRepos/repos.gop:21:1 33 | var _gop_err error 34 | //line tutorial/02-GithubRepos/repos.gop:21:1 35 | _gop_ret, _gop_err = aRepo.Href__0() 36 | //line tutorial/02-GithubRepos/repos.gop:21:1 37 | if _gop_err != nil { 38 | //line tutorial/02-GithubRepos/repos.gop:21:1 39 | _gop_err = errors.NewFrame(_gop_err, "aRepo.href", "tutorial/02-GithubRepos/repos.gop", 21, "repos.newRepo") 40 | //line tutorial/02-GithubRepos/repos.gop:21:1 41 | panic(_gop_err) 42 | } 43 | //line tutorial/02-GithubRepos/repos.gop:21:1 44 | return 45 | }() 46 | //line tutorial/02-GithubRepos/repos.gop:22:1 47 | root := aRepo.ParentN(3).One() 48 | //line tutorial/02-GithubRepos/repos.gop:23:1 49 | forkedFrom := func() (_gop_ret string) { 50 | //line tutorial/02-GithubRepos/repos.gop:23:1 51 | var _gop_err error 52 | //line tutorial/02-GithubRepos/repos.gop:23:1 53 | _gop_ret, _gop_err = root.Any().Span().Any().TextContains("Forked from").One().NextSibling(1).A().Href__0() 54 | //line tutorial/02-GithubRepos/repos.gop:23:1 55 | if _gop_err != nil { 56 | //line tutorial/02-GithubRepos/repos.gop:23:1 57 | return "" 58 | } 59 | //line tutorial/02-GithubRepos/repos.gop:23:1 60 | return 61 | }() 62 | //line tutorial/02-GithubRepos/repos.gop:24:1 63 | title := func() (_gop_ret string) { 64 | //line tutorial/02-GithubRepos/repos.gop:24:1 65 | var _gop_err error 66 | //line tutorial/02-GithubRepos/repos.gop:24:1 67 | _gop_ret, _gop_err = root.Any().P().Attr__1("itemprop", "description").Text__0() 68 | //line tutorial/02-GithubRepos/repos.gop:24:1 69 | if _gop_err != nil { 70 | //line tutorial/02-GithubRepos/repos.gop:24:1 71 | return "" 72 | } 73 | //line tutorial/02-GithubRepos/repos.gop:24:1 74 | return 75 | }() 76 | //line tutorial/02-GithubRepos/repos.gop:25:1 77 | language := func() (_gop_ret string) { 78 | //line tutorial/02-GithubRepos/repos.gop:25:1 79 | var _gop_err error 80 | //line tutorial/02-GithubRepos/repos.gop:25:1 81 | _gop_ret, _gop_err = root.Any().Span().Attr__1("itemprop", "programmingLanguage").One().Text__0() 82 | //line tutorial/02-GithubRepos/repos.gop:25:1 83 | if _gop_err != nil { 84 | //line tutorial/02-GithubRepos/repos.gop:25:1 85 | return "" 86 | } 87 | //line tutorial/02-GithubRepos/repos.gop:25:1 88 | return 89 | }() 90 | //line tutorial/02-GithubRepos/repos.gop:26:1 91 | updateTime := func() (_gop_ret string) { 92 | //line tutorial/02-GithubRepos/repos.gop:26:1 93 | var _gop_err error 94 | //line tutorial/02-GithubRepos/repos.gop:26:1 95 | _gop_ret, _gop_err = root.Any().Element__1("relative-time").One().Attr__0("datetime") 96 | //line tutorial/02-GithubRepos/repos.gop:26:1 97 | if _gop_err != nil { 98 | //line tutorial/02-GithubRepos/repos.gop:26:1 99 | return "" 100 | } 101 | //line tutorial/02-GithubRepos/repos.gop:26:1 102 | return 103 | }() 104 | //line tutorial/02-GithubRepos/repos.gop:27:1 105 | forks := func() (_gop_ret int) { 106 | //line tutorial/02-GithubRepos/repos.gop:27:1 107 | var _gop_err error 108 | //line tutorial/02-GithubRepos/repos.gop:27:1 109 | _gop_ret, _gop_err = root.Any().A().Attr__1("href", repo+"/network/members").Int__0() 110 | //line tutorial/02-GithubRepos/repos.gop:27:1 111 | if _gop_err != nil { 112 | //line tutorial/02-GithubRepos/repos.gop:27:1 113 | return 0 114 | } 115 | //line tutorial/02-GithubRepos/repos.gop:27:1 116 | return 117 | }() 118 | //line tutorial/02-GithubRepos/repos.gop:28:1 119 | return Repo{Repo: repo, ForkedFrom: forkedFrom, Title: title, Language: language, UpdateTime: updateTime, Forks: forks} 120 | } 121 | //line tutorial/02-GithubRepos/repos.gop:45:1 122 | // New creates a new Result from a html document. 123 | func New(_ interface{}, doc hdq.NodeSet) Result { 124 | //line tutorial/02-GithubRepos/repos.gop:48:1 125 | divRepos := doc.Any().Element__0(atom.Div).Id("user-repositories-list").One() 126 | //line tutorial/02-GithubRepos/repos.gop:49:1 127 | repoList := divRepos.Child().Ul().One() 128 | //line tutorial/02-GithubRepos/repos.gop:50:1 129 | repos := func() (_gop_ret []Repo) { 130 | //line tutorial/02-GithubRepos/repos.gop:50:1 131 | repoList.Child().Li().Gop_Enum(func(x hdq.NodeSet) { 132 | //line tutorial/02-GithubRepos/repos.gop:50:1 133 | _gop_ret = append(_gop_ret, newRepo(x)) 134 | }) 135 | //line tutorial/02-GithubRepos/repos.gop:50:1 136 | return 137 | }() 138 | //line tutorial/02-GithubRepos/repos.gop:51:1 139 | next := func() (_gop_ret string) { 140 | //line tutorial/02-GithubRepos/repos.gop:51:1 141 | var _gop_err error 142 | //line tutorial/02-GithubRepos/repos.gop:51:1 143 | _gop_ret, _gop_err = doc.Any().Div().Class("paginate-container").One().Any().A().ChildEqualText("Next").Href__0() 144 | //line tutorial/02-GithubRepos/repos.gop:51:1 145 | if _gop_err != nil { 146 | //line tutorial/02-GithubRepos/repos.gop:51:1 147 | return "" 148 | } 149 | //line tutorial/02-GithubRepos/repos.gop:51:1 150 | return 151 | }() 152 | //line tutorial/02-GithubRepos/repos.gop:52:1 153 | return Result{Repos: repos, Next: next} 154 | } 155 | -------------------------------------------------------------------------------- /_testdata/github/repos/out.json: -------------------------------------------------------------------------------- 1 | { 2 | "Repos": [ 3 | { 4 | "Repo": "/xushiwei/linguist", 5 | "ForkedFrom": "/github/linguist", 6 | "Title": "Language Savant. If your repository's language is being reported incorrectly, send us a pull request!\n", 7 | "Language": "Ruby", 8 | "UpdateTime": "2021-08-08T17:39:30Z", 9 | "Forks": 3221 10 | }, 11 | { 12 | "Repo": "/xushiwei/x", 13 | "ForkedFrom": "/qiniu/x", 14 | "Title": "Extension of go standard library\n", 15 | "Language": "Go", 16 | "UpdateTime": "2021-08-04T16:37:08Z", 17 | "Forks": 16 18 | }, 19 | { 20 | "Repo": "/xushiwei/fyne", 21 | "ForkedFrom": "/fyne-io/fyne", 22 | "Title": "Cross platform GUI in Go based on Material Design\n", 23 | "Language": "Go", 24 | "UpdateTime": "2021-07-27T11:26:17Z", 25 | "Forks": 726 26 | }, 27 | { 28 | "Repo": "/xushiwei/qlang", 29 | "ForkedFrom": "/goplus/gop", 30 | "Title": "Q Language - A script language for Go\n", 31 | "Language": "Go", 32 | "UpdateTime": "2021-07-20T22:00:43Z", 33 | "Forks": 384 34 | }, 35 | { 36 | "Repo": "/xushiwei/winfsp", 37 | "ForkedFrom": "/billziss-gh/winfsp", 38 | "Title": "Windows File System Proxy - FUSE for Windows\n", 39 | "Language": "C", 40 | "UpdateTime": "2021-02-03T00:51:42Z", 41 | "Forks": 320 42 | }, 43 | { 44 | "Repo": "/xushiwei/embeddedgo", 45 | "ForkedFrom": "/embeddedgo/go", 46 | "Title": "The Go programming language with support for bare-matal programing\n", 47 | "Language": "Go", 48 | "UpdateTime": "2020-12-31T16:37:34Z", 49 | "Forks": 13037 50 | }, 51 | { 52 | "Repo": "/xushiwei/oak", 53 | "ForkedFrom": "/oakmound/oak", 54 | "Title": "A pure Go game engine\n", 55 | "Language": "Go", 56 | "UpdateTime": "2020-08-04T01:28:32Z", 57 | "Forks": 58 58 | }, 59 | { 60 | "Repo": "/xushiwei/GhostDB", 61 | "ForkedFrom": "/jakekgrog/GhostDB", 62 | "Title": "GhostDB is a distributed, in-memory, general purpose key-value data store that delivers microsecond performance at any scale.\n", 63 | "Language": "Go", 64 | "UpdateTime": "2020-08-01T10:58:55Z", 65 | "Forks": 35 66 | }, 67 | { 68 | "Repo": "/xushiwei/DeepLearning-500-questions", 69 | "ForkedFrom": "/scutan90/DeepLearning-500-questions", 70 | "Title": "深度学习500问,以问答形式对常用的概率知识、线性代数、机器学习、深度学习、计算机视觉等热点问题进行阐述,以帮助自己及有需要的读者。 全书分为18个章节,50余万字。由于水平有限,书中不妥之处恳请广大读者批评指正。 未完待续............ 如有意合作,联系scutjy2015@163.com 版权所有,违权必究 Tan 2018.06\n", 71 | "Language": "", 72 | "UpdateTime": "2020-07-20T12:06:41Z", 73 | "Forks": 14227 74 | }, 75 | { 76 | "Repo": "/xushiwei/simdjson-go", 77 | "ForkedFrom": "/minio/simdjson-go", 78 | "Title": "Golang port of simdjson: parsing gigabytes of JSON per second\n", 79 | "Language": "Go", 80 | "UpdateTime": "2020-06-26T22:23:56Z", 81 | "Forks": 59 82 | }, 83 | { 84 | "Repo": "/xushiwei/Paddle", 85 | "ForkedFrom": "/PaddlePaddle/Paddle", 86 | "Title": "PArallel Distributed Deep LEarning: Machine Learning Framework from Industrial Practice (『飞桨』核心框架,深度学习\u0026机器学习高性能单机、分布式训练和跨平台部署)\n", 87 | "Language": "C++", 88 | "UpdateTime": "2020-06-21T15:35:31Z", 89 | "Forks": 3953 90 | }, 91 | { 92 | "Repo": "/xushiwei/gorgonia", 93 | "ForkedFrom": "/gorgonia/gorgonia", 94 | "Title": "Gorgonia is a library that helps facilitate machine learning in Go.\n", 95 | "Language": "Go", 96 | "UpdateTime": "2020-06-20T08:32:36Z", 97 | "Forks": 359 98 | }, 99 | { 100 | "Repo": "/xushiwei/caire", 101 | "ForkedFrom": "/esimov/caire", 102 | "Title": "Content aware image resize library\n", 103 | "Language": "Go", 104 | "UpdateTime": "2020-06-18T08:10:21Z", 105 | "Forks": 360 106 | }, 107 | { 108 | "Repo": "/xushiwei/goplus-play", 109 | "ForkedFrom": "/visualfc/goplus-play", 110 | "Title": "Playground of the Go+ language\n", 111 | "Language": "JavaScript", 112 | "UpdateTime": "2020-06-17T15:40:34Z", 113 | "Forks": 2 114 | }, 115 | { 116 | "Repo": "/xushiwei/bpl", 117 | "ForkedFrom": "/qiniu/bpl", 118 | "Title": "Binary Processing Language\n", 119 | "Language": "Go", 120 | "UpdateTime": "2020-06-07T14:40:37Z", 121 | "Forks": 28 122 | }, 123 | { 124 | "Repo": "/xushiwei/c2goasm", 125 | "ForkedFrom": "/minio/c2goasm", 126 | "Title": "C to Go Assembly\n", 127 | "Language": "Go", 128 | "UpdateTime": "2020-06-04T02:52:47Z", 129 | "Forks": 92 130 | }, 131 | { 132 | "Repo": "/xushiwei/asm2plan9s", 133 | "ForkedFrom": "/minio/asm2plan9s", 134 | "Title": "Tool to generate BYTE sequences for Go assembly as generated by YASM\n", 135 | "Language": "Go", 136 | "UpdateTime": "2020-06-04T00:32:38Z", 137 | "Forks": 31 138 | }, 139 | { 140 | "Repo": "/xushiwei/scipy", 141 | "ForkedFrom": "/scipy/scipy", 142 | "Title": "Scipy library main repository\n", 143 | "Language": "Python", 144 | "UpdateTime": "2020-05-24T16:58:25Z", 145 | "Forks": 3826 146 | }, 147 | { 148 | "Repo": "/xushiwei/sympy", 149 | "ForkedFrom": "/sympy/sympy", 150 | "Title": "A computer algebra system written in pure Python\n", 151 | "Language": "Python", 152 | "UpdateTime": "2020-05-24T12:35:57Z", 153 | "Forks": 3435 154 | }, 155 | { 156 | "Repo": "/xushiwei/matplotlib", 157 | "ForkedFrom": "/matplotlib/matplotlib", 158 | "Title": "matplotlib: plotting with Python\n", 159 | "Language": "Python", 160 | "UpdateTime": "2020-05-24T00:49:40Z", 161 | "Forks": 5961 162 | }, 163 | { 164 | "Repo": "/xushiwei/notebook", 165 | "ForkedFrom": "/jupyter/notebook", 166 | "Title": "Jupyter Interactive Notebook\n", 167 | "Language": "JavaScript", 168 | "UpdateTime": "2020-05-23T00:20:16Z", 169 | "Forks": 3473 170 | }, 171 | { 172 | "Repo": "/xushiwei/jax", 173 | "ForkedFrom": "/google/jax", 174 | "Title": "Composable transformations of Python+NumPy programs: differentiate, vectorize, JIT to GPU/TPU, and more\n", 175 | "Language": "Python", 176 | "UpdateTime": "2020-05-22T06:56:01Z", 177 | "Forks": 1288 178 | }, 179 | { 180 | "Repo": "/xushiwei/flax", 181 | "ForkedFrom": "/google/flax", 182 | "Title": "Flax is a neural network library for JAX that is designed for flexibility.\n", 183 | "Language": "Python", 184 | "UpdateTime": "2020-05-21T22:57:17Z", 185 | "Forks": 249 186 | }, 187 | { 188 | "Repo": "/xushiwei/liner", 189 | "ForkedFrom": "/peterh/liner", 190 | "Title": "Pure Go line editor with history, inspired by linenoise\n", 191 | "Language": "Go", 192 | "UpdateTime": "2020-05-16T16:24:21Z", 193 | "Forks": 105 194 | }, 195 | { 196 | "Repo": "/xushiwei/query", 197 | "ForkedFrom": "/couchbase/query", 198 | "Title": "Query engine.\n", 199 | "Language": "Go", 200 | "UpdateTime": "2020-05-15T23:14:38Z", 201 | "Forks": 41 202 | }, 203 | { 204 | "Repo": "/xushiwei/pandas", 205 | "ForkedFrom": "/pandas-dev/pandas", 206 | "Title": "Flexible and powerful data analysis / manipulation library for Python, providing labeled data structures similar to R data.frame objects, statistical functions, and much more\n", 207 | "Language": "Python", 208 | "UpdateTime": "2020-05-13T20:43:46Z", 209 | "Forks": 12866 210 | }, 211 | { 212 | "Repo": "/xushiwei/hugo", 213 | "ForkedFrom": "/gohugoio/hugo", 214 | "Title": "The world’s fastest framework for building websites.\n", 215 | "Language": "Go", 216 | "UpdateTime": "2020-05-13T19:44:45Z", 217 | "Forks": 6086 218 | }, 219 | { 220 | "Repo": "/xushiwei/netlify-cms", 221 | "ForkedFrom": "/netlify/netlify-cms", 222 | "Title": "A Git-based CMS for Static Site Generators\n", 223 | "Language": "JavaScript", 224 | "UpdateTime": "2020-05-13T16:47:59Z", 225 | "Forks": 2446 226 | }, 227 | { 228 | "Repo": "/xushiwei/presto", 229 | "ForkedFrom": "/prestodb/presto", 230 | "Title": "The official home of the Presto distributed SQL query engine for big data\n", 231 | "Language": "Java", 232 | "UpdateTime": "2020-05-13T01:19:32Z", 233 | "Forks": 4234 234 | }, 235 | { 236 | "Repo": "/xushiwei/gonum", 237 | "ForkedFrom": "/gonum/gonum", 238 | "Title": "Gonum is a set of numeric libraries for the Go programming language. It contains libraries for matrices, statistics, optimization, and more\n", 239 | "Language": "Go", 240 | "UpdateTime": "2020-05-12T11:15:37Z", 241 | "Forks": 409 242 | } 243 | ], 244 | "Next": "https://github.com/xushiwei?after=Y3Vyc29yOnYyOpK0MjAyMC0wNS0xMlQxMToxNTozN1rOD7hDgA%3D%3D\u0026tab=repositories" 245 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /hdq_helper.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021 The GoPlus Authors (goplus.org) 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package hdq 17 | 18 | import ( 19 | "fmt" 20 | "io" 21 | "os" 22 | "strconv" 23 | "strings" 24 | 25 | "golang.org/x/net/html" 26 | "golang.org/x/net/html/atom" 27 | ) 28 | 29 | // ----------------------------------------------------------------------------- 30 | 31 | // Printf prints the NodeSet context and `print(format, params...)`. 32 | func (p NodeSet) Printf(w io.Writer, format string, params ...any) NodeSet { 33 | if p.Err != nil { 34 | return p 35 | } 36 | p.Data.ForEach(func(node *html.Node) error { 37 | html.Render(w, node) 38 | fmt.Fprintf(w, format, params...) 39 | return nil 40 | }) 41 | return p 42 | } 43 | 44 | // Dump prints the NodeSet context and `print("\n\n")`. 45 | func (p NodeSet) Dump() NodeSet { 46 | return p.Printf(os.Stdout, "\n\n") 47 | } 48 | 49 | // ----------------------------------------------------------------------------- 50 | 51 | // ChildEqualText returns NodeSet which child node text equals `text`. 52 | func (p NodeSet) ChildEqualText(text string) (ret NodeSet) { 53 | return p.Match(func(node *html.Node) bool { 54 | return childEqualText(node, text) 55 | }) 56 | } 57 | 58 | // TextEqual returns NodeSet which node type is TextNode and it's text equals `text`. 59 | func (p NodeSet) TextEqual(text string) (ret NodeSet) { 60 | return p.Match(func(node *html.Node) bool { 61 | return equalText(node, text) 62 | }) 63 | } 64 | 65 | // TextContains returns NodeSet which node type is TextNode and it's text contains `text`. 66 | func (p NodeSet) TextContains(text string) (ret NodeSet) { 67 | return p.Match(func(node *html.Node) bool { 68 | return containsText(node, text) 69 | }) 70 | } 71 | 72 | // TextHasPrefix returns NodeSet which node type is TextNode and its prefix is `text`. 73 | func (p NodeSet) TextHasPrefix(text string) (ret NodeSet) { 74 | return p.Match(func(node *html.Node) bool { 75 | return hasPrefixText(node, text) 76 | }) 77 | } 78 | 79 | func (p NodeSet) dataAtom(elem atom.Atom) (ret NodeSet) { 80 | return p.Match(func(node *html.Node) bool { 81 | return node.DataAtom == elem 82 | }) 83 | } 84 | 85 | // Element returns NodeSet which node type is ElementNode and it's element type is `elemType`. 86 | func (p NodeSet) Element__0(elemType atom.Atom) (ret NodeSet) { 87 | return p.dataAtom(elemType) 88 | } 89 | 90 | // Element returns NodeSet which node type is ElementNode and it's element type is `elemType`. 91 | func (p NodeSet) Element__1(elemType string) (ret NodeSet) { 92 | return p.Match(func(node *html.Node) bool { 93 | return node.Type == html.ElementNode && node.Data == elemType 94 | }) 95 | } 96 | 97 | // Attribute returns NodeSet which the value of attribute `k` is `v`. 98 | func (p NodeSet) Attribute__0(k, v string) (ret NodeSet) { 99 | return p.Match(func(node *html.Node) bool { 100 | if node.Type != html.ElementNode { 101 | return false 102 | } 103 | for _, attr := range node.Attr { 104 | if attr.Key == k && attr.Val == v { 105 | return true 106 | } 107 | } 108 | return false 109 | }) 110 | } 111 | 112 | func (p NodeSet) Attribute__1(k string, filter func(v string) bool) (ret NodeSet) { 113 | return p.Match(func(node *html.Node) bool { 114 | if node.Type != html.ElementNode { 115 | return false 116 | } 117 | for _, attr := range node.Attr { 118 | if attr.Key == k && filter(attr.Val) { 119 | return true 120 | } 121 | } 122 | return false 123 | }) 124 | } 125 | 126 | // ContainsClass returns NodeSet which class contains `v`. 127 | func (p NodeSet) ContainsClass(v string) (ret NodeSet) { 128 | return p.Match(func(node *html.Node) bool { 129 | if node.Type != html.ElementNode { 130 | return false 131 | } 132 | for _, attr := range node.Attr { 133 | if attr.Key == "class" { 134 | return containsClass(attr.Val, v) 135 | } 136 | } 137 | return false 138 | }) 139 | } 140 | 141 | // H1 returns NodeSet which node type is ElementNode and it's element type is `h1`. 142 | func (p NodeSet) H1() (ret NodeSet) { 143 | return p.dataAtom(atom.H1) 144 | } 145 | 146 | // H2 returns NodeSet which node type is ElementNode and it's element type is `h2`. 147 | func (p NodeSet) H2() (ret NodeSet) { 148 | return p.dataAtom(atom.H2) 149 | } 150 | 151 | // H3 returns NodeSet which node type is ElementNode and it's element type is `h3`. 152 | func (p NodeSet) H3() (ret NodeSet) { 153 | return p.dataAtom(atom.H3) 154 | } 155 | 156 | // H4 returns NodeSet which node type is ElementNode and it's element type is `h4`. 157 | func (p NodeSet) H4() (ret NodeSet) { 158 | return p.dataAtom(atom.H4) 159 | } 160 | 161 | // Td returns NodeSet which node type is ElementNode and it's element type is `td`. 162 | func (p NodeSet) Td() (ret NodeSet) { 163 | return p.dataAtom(atom.Td) 164 | } 165 | 166 | // A returns NodeSet which node type is ElementNode and it's element type is `a`. 167 | func (p NodeSet) A() (ret NodeSet) { 168 | return p.dataAtom(atom.A) 169 | } 170 | 171 | // P returns NodeSet which node type is ElementNode and it's element type is `p`. 172 | func (p NodeSet) P() (ret NodeSet) { 173 | return p.dataAtom(atom.P) 174 | } 175 | 176 | // Img returns NodeSet which node type is ElementNode and it's element type is `img`. 177 | func (p NodeSet) Img() (ret NodeSet) { 178 | return p.dataAtom(atom.Img) 179 | } 180 | 181 | // Ol returns NodeSet which node type is ElementNode and it's element type is `ol`. 182 | func (p NodeSet) Ol() (ret NodeSet) { 183 | return p.dataAtom(atom.Ol) 184 | } 185 | 186 | // Ul returns NodeSet which node type is ElementNode and it's element type is `ul`. 187 | func (p NodeSet) Ul() (ret NodeSet) { 188 | return p.dataAtom(atom.Ul) 189 | } 190 | 191 | // Dl returns NodeSet which node type is ElementNode and it's element type is `dl`. 192 | func (p NodeSet) Dl() (ret NodeSet) { 193 | return p.dataAtom(atom.Dl) 194 | } 195 | 196 | // Dt returns NodeSet which node type is ElementNode and it's element type is `dt`. 197 | func (p NodeSet) Dt() (ret NodeSet) { 198 | return p.dataAtom(atom.Dt) 199 | } 200 | 201 | // Span returns NodeSet which node type is ElementNode and it's element type is `span`. 202 | func (p NodeSet) Span() (ret NodeSet) { 203 | return p.dataAtom(atom.Span) 204 | } 205 | 206 | // Div returns NodeSet which node type is ElementNode and it's element type is `div`. 207 | func (p NodeSet) Div() (ret NodeSet) { 208 | return p.dataAtom(atom.Div) 209 | } 210 | 211 | // Nav returns NodeSet which node type is ElementNode and it's element type is `nav`. 212 | func (p NodeSet) Nav() (ret NodeSet) { 213 | return p.dataAtom(atom.Nav) 214 | } 215 | 216 | // Li returns NodeSet which node type is ElementNode and it's element type is `li`. 217 | func (p NodeSet) Li() (ret NodeSet) { 218 | return p.dataAtom(atom.Li) 219 | } 220 | 221 | // Class returns NodeSet which `class` attribute is `v`. 222 | func (p NodeSet) Class(v string) (ret NodeSet) { 223 | return p.Attribute__0("class", v) 224 | } 225 | 226 | // Id returns NodeSet which `id` attribute is `v`. 227 | func (p NodeSet) Id(v string) (ret NodeSet) { 228 | return p.Attribute__0("id", v).One() 229 | } 230 | 231 | // ----------------------------------------------------------------------------- 232 | 233 | // ExactText returns text of NodeSet. 234 | // exactlyOne=false: if NodeSet is more than one, returns first node's text (if 235 | // node type is not TextNode, return error). 236 | func (p NodeSet) ExactText__1(exactlyOne bool) (text string, err error) { 237 | node, err := p.CollectOne__1(exactlyOne) 238 | if err != nil { 239 | return 240 | } 241 | return exactText(node) 242 | } 243 | 244 | func (p NodeSet) ExactText__0() (text string, err error) { 245 | return p.ExactText__1(false) 246 | } 247 | 248 | // Text returns text of NodeSet. 249 | // exactlyOne=false: if NodeSet is more than one, returns first node's text. 250 | func (p NodeSet) Text__1(exactlyOne bool) (text string, err error) { 251 | node, err := p.CollectOne__1(exactlyOne) 252 | if err != nil { 253 | return 254 | } 255 | return textOf(node), nil 256 | } 257 | 258 | func (p NodeSet) Text__0() (text string, err error) { 259 | return p.Text__1(false) 260 | } 261 | 262 | // ScanInt returns int value of p.Text(). 263 | // exactlyOne=false: if NodeSet is more than one, returns first node's value. 264 | func (p NodeSet) ScanInt(format string, exactlyOne ...bool) (v int, err error) { 265 | text, err := p.Text__1(exactlyOne != nil && exactlyOne[0]) 266 | if err != nil { 267 | return 268 | } 269 | err = fmtSscanf(text, format, &v) 270 | if err != nil { 271 | v = 0 272 | } 273 | return 274 | } 275 | 276 | func fmtSscanf(text, format string, v *int) (err error) { 277 | prefix, suffix, err := parseFormat(format) 278 | if err != nil { 279 | return 280 | } 281 | if strings.HasPrefix(text, prefix) && strings.HasSuffix(text, suffix) { 282 | text = text[len(prefix) : len(text)-len(suffix)] 283 | *v, err = strconv.Atoi(strings.Replace(text, ",", "", -1)) 284 | return 285 | } 286 | return ErrInvalidScanFormat 287 | } 288 | 289 | func parseFormat(format string) (prefix, suffix string, err error) { 290 | pos := strings.Index(format, "%d") 291 | if pos < 0 { 292 | pos = strings.Index(format, "%v") 293 | } 294 | if pos < 0 { 295 | err = ErrInvalidScanFormat 296 | return 297 | } 298 | prefix = strings.Replace(format[:pos], "%%", "%", -1) 299 | suffix = strings.Replace(format[pos+2:], "%%", "%", -1) 300 | return 301 | } 302 | 303 | // UnitedFloat returns UnitedFloat value of p.Text(). 304 | // exactlyOne=false: if NodeSet is more than one, returns first node's value. 305 | func (p NodeSet) UnitedFloat__1(exactlyOne bool) (v float64, err error) { 306 | text, err := p.Text__1(exactlyOne) 307 | if err != nil { 308 | return 309 | } 310 | n := len(text) 311 | if n == 0 { 312 | return 0, ErrEmptyText 313 | } 314 | unit := 1.0 315 | switch text[n-1] { 316 | case 'k', 'K': 317 | unit = 1000 318 | text = text[:n-1] 319 | } 320 | v, err = strconv.ParseFloat(text, 64) 321 | if err != nil { 322 | return 323 | } 324 | return v * unit, nil 325 | } 326 | 327 | func (p NodeSet) UnitedFloat__0() (v float64, err error) { 328 | return p.UnitedFloat__1(false) 329 | } 330 | 331 | // Int returns int value of p.Text(). 332 | // exactlyOne=false: if NodeSet is more than one, returns first node's value. 333 | func (p NodeSet) Int__1(exactlyOne bool) (v int, err error) { 334 | text, err := p.Text__1(exactlyOne) 335 | if err != nil { 336 | return 337 | } 338 | return strconv.Atoi(strings.Replace(text, ",", "", -1)) 339 | } 340 | 341 | func (p NodeSet) Int__0() (v int, err error) { 342 | return p.Int__1(false) 343 | } 344 | 345 | // AttrVal returns attribute value of NodeSet. 346 | // exactlyOne=false: if NodeSet is more than one, returns first node's attribute value. 347 | func (p NodeSet) AttrVal(k string, exactlyOne ...bool) (text string, err error) { 348 | node, err := p.CollectOne__1(exactlyOne != nil && exactlyOne[0]) 349 | if err != nil { 350 | return 351 | } 352 | return attributeVal(node, k) 353 | } 354 | 355 | // HrefVal returns href attribute's value of NodeSet. 356 | // exactlyOne=false: if NodeSet is more than one, returns first node's attribute value. 357 | func (p NodeSet) HrefVal__1(exactlyOne bool) (text string, err error) { 358 | return p.AttrVal("href", exactlyOne) 359 | } 360 | 361 | func (p NodeSet) HrefVal__0() (text string, err error) { 362 | return p.AttrVal("href", false) 363 | } 364 | 365 | // ----------------------------------------------------------------------------- 366 | 367 | // Href returns href attribute's value of NodeSet. 368 | func (p NodeSet) Href__0() (text string, err error) { 369 | return p.AttrVal("href", false) 370 | } 371 | 372 | // Href returns NodeSet which `href` attribute is `v`. 373 | func (p NodeSet) Href__1(v string) (ret NodeSet) { 374 | return p.Attribute__0("href", v) 375 | } 376 | 377 | // Href returns href attribute's value of NodeSet. 378 | func (p NodeSet) Href__2(exactlyOne bool) (text string, err error) { 379 | return p.AttrVal("href", exactlyOne) 380 | } 381 | 382 | // HasAttr returns true if NodeSet has attribute k or not. 383 | func (p NodeSet) HasAttr(k string, exactlyOne ...bool) bool { 384 | _, e := p.AttrVal(k, exactlyOne...) 385 | return e == nil 386 | } 387 | 388 | // Attr returns attribute value of NodeSet. 389 | func (p NodeSet) Attr__0(k string, exactlyOne ...bool) (text string, err error) { 390 | return p.AttrVal(k, exactlyOne...) 391 | } 392 | 393 | func (p NodeSet) Attr__1(k, v string) (ret NodeSet) { 394 | return p.Attribute__0(k, v) 395 | } 396 | 397 | // ----------------------------------------------------------------------------- 398 | -------------------------------------------------------------------------------- /hdq.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021 The GoPlus Authors (goplus.org) 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package hdq 17 | 18 | import ( 19 | "bytes" 20 | "errors" 21 | "io" 22 | 23 | "github.com/goplus/hdq/stream" 24 | "golang.org/x/net/html" 25 | 26 | _ "github.com/goplus/hdq/stream/zip" 27 | ) 28 | 29 | const ( 30 | GopPackage = true // to indicate this is a Go+ package 31 | ) 32 | 33 | var ( 34 | ErrNotFound = errors.New("entity not found") 35 | ErrBreak = errors.New("break") 36 | 37 | ErrTooManyNodes = errors.New("too many nodes") 38 | ErrInvalidNode = errors.New("invalid node") 39 | 40 | // ErrEmptyText represents an `empty text` error. 41 | ErrEmptyText = errors.New("empty text") 42 | 43 | // ErrInvalidScanFormat represents an `invalid fmt.Scan format` error. 44 | ErrInvalidScanFormat = errors.New("invalid fmt.Scan format") 45 | ) 46 | 47 | // ----------------------------------------------------------------------------- 48 | 49 | type NodeEnum interface { 50 | ForEach(filter func(node *html.Node) error) 51 | } 52 | 53 | type cachedGetter interface { 54 | Cached() int 55 | } 56 | 57 | // NodeSet represents a set of nodes. 58 | type NodeSet struct { 59 | Data NodeEnum 60 | Err error 61 | } 62 | 63 | // New creates a NodeSet object. 64 | func New(r io.Reader) NodeSet { 65 | doc, err := html.Parse(r) 66 | if err != nil { 67 | return NodeSet{Err: err} 68 | } 69 | return NodeSet{Data: oneNode{doc}} 70 | } 71 | 72 | // Source opens a stream (if necessary) to create a NodeSet object. 73 | func Source(r any) (ret NodeSet) { 74 | switch v := r.(type) { 75 | case string: 76 | f, err := stream.Open(v) 77 | if err != nil { 78 | return NodeSet{Err: err} 79 | } 80 | return New(f) 81 | case []byte: 82 | r := bytes.NewReader(v) 83 | return New(r) 84 | case io.Reader: 85 | return New(v) 86 | case NodeSet: // input is a node set 87 | return v 88 | default: 89 | panic("unsupport source type") 90 | } 91 | } 92 | 93 | func (p NodeSet) Ok() bool { 94 | return p.Err == nil 95 | } 96 | 97 | func (p NodeSet) All() NodeSet { 98 | if _, ok := p.Data.(cachedGetter); ok { 99 | return p 100 | } 101 | nodes, err := p.Collect() 102 | if err != nil { 103 | return NodeSet{Err: err} 104 | } 105 | return NodeSet{Data: &fixNodes{nodes}} 106 | } 107 | 108 | func (p NodeSet) Gop_Enum(callback func(node NodeSet)) { 109 | if p.Err == nil { 110 | p.Data.ForEach(func(node *html.Node) error { 111 | t := NodeSet{Data: oneNode{node}} 112 | callback(t) 113 | return nil 114 | }) 115 | } 116 | } 117 | 118 | func (p NodeSet) ForEach(callback func(node NodeSet)) { 119 | p.Gop_Enum(callback) 120 | } 121 | 122 | // Render renders the node set to the given writer. 123 | func (p NodeSet) Render(w io.Writer, suffix ...string) (err error) { 124 | if p.Err != nil { 125 | return p.Err 126 | } 127 | p.Data.ForEach(func(node *html.Node) error { 128 | if e := html.Render(w, node); e != nil { 129 | err = e 130 | return ErrBreak 131 | } 132 | if suffix != nil { 133 | io.WriteString(w, suffix[0]) 134 | } 135 | return nil 136 | }) 137 | return 138 | } 139 | 140 | // ----------------------------------------------------------------------------- 141 | 142 | type oneNode struct { 143 | *html.Node 144 | } 145 | 146 | func (p oneNode) ForEach(filter func(node *html.Node) error) { 147 | filter(p.Node) 148 | } 149 | 150 | func (p oneNode) Cached() int { 151 | return 1 152 | } 153 | 154 | // ----------------------------------------------------------------------------- 155 | 156 | type fixNodes struct { 157 | nodes []*html.Node 158 | } 159 | 160 | func (p *fixNodes) ForEach(filter func(node *html.Node) error) { 161 | for _, node := range p.nodes { 162 | if filter(node) == ErrBreak { 163 | return 164 | } 165 | } 166 | } 167 | 168 | func (p *fixNodes) Cached() int { 169 | return len(p.nodes) 170 | } 171 | 172 | // Nodes creates a node set from the given nodes. 173 | func Nodes(nodes ...*html.Node) (ret NodeSet) { 174 | return NodeSet{Data: &fixNodes{nodes}} 175 | } 176 | 177 | // ----------------------------------------------------------------------------- 178 | 179 | const ( 180 | unknownNumNodes = -1 181 | ) 182 | 183 | type anyNodes struct { 184 | data NodeEnum 185 | } 186 | 187 | func (p *anyNodes) ForEach(filter func(node *html.Node) error) { 188 | p.data.ForEach(func(node *html.Node) error { 189 | anyForEach(node, filter) 190 | return nil 191 | }) 192 | } 193 | 194 | func (p *anyNodes) Cached() int { 195 | return unknownNumNodes 196 | } 197 | 198 | func anyForEach(p *html.Node, filter func(node *html.Node) error) error { 199 | if err := filter(p); err == nil || err == ErrBreak { 200 | return err 201 | } 202 | for node := p.FirstChild; node != nil; node = node.NextSibling { 203 | if anyForEach(node, filter) == ErrBreak { 204 | return ErrBreak 205 | } 206 | } 207 | return nil 208 | } 209 | 210 | // Any returns the all nodes as a node set. 211 | func (p NodeSet) Any() (ret NodeSet) { 212 | if p.Err != nil { 213 | return p 214 | } 215 | return NodeSet{Data: &anyNodes{p.Data}} 216 | } 217 | 218 | // ----------------------------------------------------------------------------- 219 | 220 | type childLevelNodes struct { 221 | data NodeEnum 222 | level int 223 | } 224 | 225 | func (p *childLevelNodes) ForEach(filter func(node *html.Node) error) { 226 | p.data.ForEach(func(node *html.Node) error { 227 | return childLevelForEach(node, p.level, filter) 228 | }) 229 | } 230 | 231 | func childLevelForEach(p *html.Node, level int, filter func(node *html.Node) error) error { 232 | if level == 0 { 233 | return filter(p) 234 | } 235 | level-- 236 | for node := p.FirstChild; node != nil; node = node.NextSibling { 237 | if childLevelForEach(node, level, filter) == ErrBreak { 238 | return ErrBreak 239 | } 240 | } 241 | return nil 242 | } 243 | 244 | type parentLevelNodes struct { 245 | data NodeEnum 246 | level int 247 | } 248 | 249 | func (p *parentLevelNodes) ForEach(filter func(node *html.Node) error) { 250 | p.data.ForEach(func(node *html.Node) error { 251 | return parentLevelForEach(node, p.level, filter) 252 | }) 253 | } 254 | 255 | func parentLevelForEach(p *html.Node, level int, filter func(node *html.Node) error) error { 256 | for level < 0 { 257 | if p = p.Parent; p == nil { 258 | return ErrNotFound 259 | } 260 | level++ 261 | } 262 | return filter(p) 263 | } 264 | 265 | // Child returns the child node set. It is equivalent to ChildN(1). 266 | func (p NodeSet) Child() (ret NodeSet) { 267 | return p.ChildN(1) 268 | } 269 | 270 | // ChildN returns the child node set at the given level. 271 | func (p NodeSet) ChildN(level int) (ret NodeSet) { 272 | if p.Err != nil || level == 0 { 273 | return p 274 | } 275 | if level > 0 { 276 | return NodeSet{Data: &childLevelNodes{p.Data, level}} 277 | } 278 | return NodeSet{Data: &parentLevelNodes{p.Data, level}} 279 | } 280 | 281 | // Parent returns the parent node set. It is equivalent to ParentN(1). 282 | func (p NodeSet) Parent() (ret NodeSet) { 283 | return p.ChildN(-1) 284 | } 285 | 286 | // ParentN returns the parent node set at the given level. 287 | func (p NodeSet) ParentN(level int) (ret NodeSet) { 288 | return p.ChildN(-level) 289 | } 290 | 291 | // One returns the first node as a node set. 292 | func (p NodeSet) One() (ret NodeSet) { 293 | if _, ok := p.Data.(oneNode); ok { 294 | return p 295 | } 296 | node, err := p.CollectOne__1(false) 297 | if err != nil { 298 | return NodeSet{Err: err} 299 | } 300 | return NodeSet{Data: oneNode{node}} 301 | } 302 | 303 | // ----------------------------------------------------------------------------- 304 | 305 | type siblingNodes struct { 306 | data NodeEnum 307 | delta int 308 | } 309 | 310 | func (p *siblingNodes) ForEach(filter func(node *html.Node) error) { 311 | p.data.ForEach(func(node *html.Node) error { 312 | return siblingForEach(node, p.delta, filter) 313 | }) 314 | } 315 | 316 | func siblingForEach(p *html.Node, delta int, filter func(node *html.Node) error) error { 317 | for delta > 0 { 318 | if p = p.NextSibling; p == nil { 319 | return ErrNotFound 320 | } 321 | delta-- 322 | } 323 | for delta < 0 { 324 | if p = p.PrevSibling; p == nil { 325 | return ErrNotFound 326 | } 327 | delta++ 328 | } 329 | return filter(p) 330 | } 331 | 332 | func (p NodeSet) NextSibling(delta int) (ret NodeSet) { 333 | if p.Err != nil { 334 | return p 335 | } 336 | return NodeSet{Data: &siblingNodes{p.Data, delta}} 337 | } 338 | 339 | func (p NodeSet) PrevSibling(delta int) (ret NodeSet) { 340 | return p.NextSibling(-delta) 341 | } 342 | 343 | // ----------------------------------------------------------------------------- 344 | 345 | type prevSiblingNodes struct { 346 | data NodeEnum 347 | } 348 | 349 | func (p *prevSiblingNodes) ForEach(filter func(node *html.Node) error) { 350 | p.data.ForEach(func(node *html.Node) error { 351 | for p := node.PrevSibling; p != nil; p = p.PrevSibling { 352 | if filter(p) == ErrBreak { 353 | return ErrBreak 354 | } 355 | } 356 | return nil 357 | }) 358 | } 359 | 360 | func (p NodeSet) PrevSiblings() (ret NodeSet) { 361 | if p.Err != nil { 362 | return p 363 | } 364 | return NodeSet{Data: &prevSiblingNodes{p.Data}} 365 | } 366 | 367 | // ----------------------------------------------------------------------------- 368 | 369 | type nextSiblingNodes struct { 370 | data NodeEnum 371 | } 372 | 373 | func (p *nextSiblingNodes) ForEach(filter func(node *html.Node) error) { 374 | p.data.ForEach(func(node *html.Node) error { 375 | for p := node.NextSibling; p != nil; p = p.NextSibling { 376 | if filter(p) == ErrBreak { 377 | return ErrBreak 378 | } 379 | } 380 | return nil 381 | }) 382 | } 383 | 384 | func (p NodeSet) NextSiblings() (ret NodeSet) { 385 | if p.Err != nil { 386 | return p 387 | } 388 | return NodeSet{Data: &nextSiblingNodes{p.Data}} 389 | } 390 | 391 | // ----------------------------------------------------------------------------- 392 | 393 | type firstChildNodes struct { 394 | data NodeEnum 395 | nodeType html.NodeType 396 | } 397 | 398 | func (p *firstChildNodes) ForEach(filter func(node *html.Node) error) { 399 | p.data.ForEach(func(node *html.Node) error { 400 | child, err := firstChild(node, p.nodeType) 401 | if err != nil { 402 | return err 403 | } 404 | return filter(child) 405 | }) 406 | } 407 | 408 | func (p NodeSet) FirstChild(nodeType html.NodeType) (ret NodeSet) { 409 | if p.Err != nil { 410 | return p 411 | } 412 | return NodeSet{Data: &firstChildNodes{p.Data, nodeType}} 413 | } 414 | 415 | func (p NodeSet) FirstTextChild() (ret NodeSet) { 416 | return p.FirstChild(html.TextNode) 417 | } 418 | 419 | func (p NodeSet) FirstElementChild() (ret NodeSet) { 420 | return p.FirstChild(html.ElementNode) 421 | } 422 | 423 | // ----------------------------------------------------------------------------- 424 | 425 | type lastChildNodes struct { 426 | data NodeEnum 427 | nodeType html.NodeType 428 | } 429 | 430 | func (p *lastChildNodes) ForEach(filter func(node *html.Node) error) { 431 | p.data.ForEach(func(node *html.Node) error { 432 | child, err := lastChild(node, p.nodeType) 433 | if err != nil { 434 | return err 435 | } 436 | return filter(child) 437 | }) 438 | } 439 | 440 | func (p NodeSet) LastChild(nodeType html.NodeType) (ret NodeSet) { 441 | if p.Err != nil { 442 | return p 443 | } 444 | return NodeSet{Data: &lastChildNodes{p.Data, nodeType}} 445 | } 446 | 447 | func (p NodeSet) LastTextChild() (ret NodeSet) { 448 | return p.LastChild(html.TextNode) 449 | } 450 | 451 | func (p NodeSet) LastElementChild() (ret NodeSet) { 452 | return p.LastChild(html.ElementNode) 453 | } 454 | 455 | // ----------------------------------------------------------------------------- 456 | 457 | type matchedNodes struct { 458 | data NodeEnum 459 | filter func(node *html.Node) bool 460 | } 461 | 462 | func (p *matchedNodes) ForEach(filter func(node *html.Node) error) { 463 | p.data.ForEach(func(node *html.Node) error { 464 | if p.filter(node) { 465 | return filter(node) 466 | } 467 | return ErrNotFound 468 | }) 469 | } 470 | 471 | // Match returns the matched node set. 472 | func (p NodeSet) Match(filter func(node *html.Node) bool) (ret NodeSet) { 473 | if p.Err != nil { 474 | return p 475 | } 476 | return NodeSet{Data: &matchedNodes{p.Data, filter}} 477 | } 478 | 479 | // ----------------------------------------------------------------------------- 480 | 481 | type textNodes struct { 482 | data NodeEnum 483 | doReplace bool 484 | } 485 | 486 | func (p *textNodes) ForEach(filter func(node *html.Node) error) { 487 | p.data.ForEach(func(t *html.Node) error { 488 | node := &html.Node{ 489 | Parent: t, 490 | Type: html.TextNode, 491 | Data: textOf(t), 492 | } 493 | if p.doReplace { 494 | t.FirstChild = node 495 | t.LastChild = node 496 | } 497 | return filter(node) 498 | }) 499 | } 500 | 501 | func (p NodeSet) ChildrenAsText(doReplace bool) (ret NodeSet) { 502 | if p.Err != nil { 503 | return p 504 | } 505 | return NodeSet{Data: &textNodes{p.Data, doReplace}} 506 | } 507 | 508 | // ----------------------------------------------------------------------------- 509 | 510 | // CollectOne returns the first node. 511 | // If `exactly` is true, it will return an error if there are more than one node. 512 | func (p NodeSet) CollectOne__1(exactly bool) (item *html.Node, err error) { 513 | if p.Err != nil { 514 | return nil, p.Err 515 | } 516 | err = ErrNotFound 517 | if exactly { 518 | p.Data.ForEach(func(node *html.Node) error { 519 | if err == ErrNotFound { 520 | item, err = node, nil 521 | return nil 522 | } 523 | err = ErrTooManyNodes 524 | return ErrBreak 525 | }) 526 | } else { 527 | p.Data.ForEach(func(node *html.Node) error { 528 | item, err = node, nil 529 | return ErrBreak 530 | }) 531 | } 532 | return 533 | } 534 | 535 | // CollectOne returns the first node. 536 | func (p NodeSet) CollectOne__0() (item *html.Node, err error) { 537 | return p.CollectOne__1(false) 538 | } 539 | 540 | // Collect returns all nodes. 541 | func (p NodeSet) Collect() (items []*html.Node, err error) { 542 | if p.Err != nil { 543 | return nil, p.Err 544 | } 545 | p.Data.ForEach(func(node *html.Node) error { 546 | items = append(items, node) 547 | return nil 548 | }) 549 | return 550 | } 551 | 552 | // ----------------------------------------------------------------------------- 553 | --------------------------------------------------------------------------------