├── .gitignore ├── LICENSE ├── README.md ├── cache ├── cache.go ├── net_cache.go ├── nocache.go └── query_cache.go ├── cmd └── main.go ├── config └── config.go ├── content.go ├── core ├── debug.go ├── error.go ├── grab.go ├── key.go ├── log.go ├── log_dummy.go ├── log_print.go ├── log_test.go ├── md5.go ├── path.go ├── process.go ├── scrape.go └── value.go ├── go.mod ├── go.sum ├── internal ├── action │ ├── action.go │ ├── action_do.go │ └── actions.go ├── grab_impl.go ├── option.go └── scrape_impl.go ├── network └── proxy.go ├── option.go ├── result.go ├── rule ├── action.go ├── action_type.go ├── input_type.go ├── process.go ├── process_property.go ├── process_type.go ├── process_value_type.go ├── rule.go ├── rule_test.go ├── skip_type.go └── web.go ├── scrape.go ├── scrape_test.go └── templates └── javbus.toml /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, build with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | /.vscode/ 14 | /.idea/ 15 | /video/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # go-scrape 2 | 3 | 4 | ```go 5 | DefaultOutputPath = "video" 6 | 7 | //如果需要使用代理请注册代理地址 8 | //RegisterProxy("https://localhost:10808") 9 | //RegisterProxy("http://localhost:10808") 10 | e := RegisterProxy("socks5://localhost:10808") 11 | if e != nil { 12 | return 13 | } 14 | //创建搜刮器 15 | grab2 := NewGrabJavbus() 16 | grab3 := NewGrabJavdb() 17 | scrape := NewScrape(GrabOption(grab2), GrabOption(grab3), OptimizeOption(true)) 18 | 19 | //需要查找的番号:多次或单次调用皆可 20 | e = scrape.Find("abp-891") 21 | e = scrape.Find("abp-892") 22 | 23 | //遍历结果 24 | e = scrape.Range(func(key string, content Content) error { 25 | t.Log("key", key, "info", content) 26 | return nil 27 | }) 28 | //或输出到DefaultOutputPath 29 | e = scrape.Output() 30 | ``` -------------------------------------------------------------------------------- /cache/cache.go: -------------------------------------------------------------------------------- 1 | package cache 2 | 3 | import ( 4 | "crypto/sha256" 5 | "encoding/hex" 6 | "io" 7 | "sync" 8 | 9 | "github.com/PuerkitoBio/goquery" 10 | "github.com/gocacher/cacher" 11 | ) 12 | 13 | // DefaultCachePath ... 14 | var DefaultCachePath = "tmp" 15 | var _cache NetCacher 16 | var _cacheOnce *sync.Once 17 | 18 | // Querier ... 19 | // @Description: 20 | type Querier interface { 21 | Cache() cacher.Cacher 22 | Query(url string, force bool) (*goquery.Document, error) 23 | GetQuery(url string, force bool) (*goquery.Document, error) 24 | ForceQuery(url string) (*goquery.Document, error) 25 | } 26 | 27 | type NetCacher interface { 28 | Cache() cacher.Cacher 29 | GetReader(url string, force bool) (io.Reader, error) 30 | GetBytes(url string, force bool) ([]byte, error) 31 | HasURL(url string) bool 32 | Save(url, to string) (e error) 33 | } 34 | 35 | // netCache ... 36 | 37 | func init() { 38 | _cacheOnce = &sync.Once{} 39 | } 40 | 41 | // Hash ... 42 | func Hash(url string) string { 43 | sum256 := sha256.Sum256([]byte(url)) 44 | return hex.EncodeToString(sum256[:]) 45 | } 46 | -------------------------------------------------------------------------------- /cache/net_cache.go: -------------------------------------------------------------------------------- 1 | package cache 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "io" 7 | "io/ioutil" 8 | "net/http" 9 | "os" 10 | "path/filepath" 11 | "sync" 12 | 13 | cache "github.com/gocacher/badger-cache/v3" 14 | "github.com/gocacher/cacher" 15 | "github.com/goextension/log" 16 | 17 | "github.com/javscrape/go-scrape/network" 18 | ) 19 | 20 | type netCache struct { 21 | lock sync.Mutex 22 | cacher.Cacher 23 | client *http.Client 24 | } 25 | 26 | func newCache(client *http.Client) *netCache { 27 | cache.DefaultCachePath = DefaultCachePath 28 | return &netCache{ 29 | Cacher: cache.New(), 30 | client: client, 31 | } 32 | } 33 | 34 | // New ... 35 | func New(client *http.Client) NetCacher { 36 | _cacheOnce.Do(func() { 37 | _cache = newCache(client) 38 | }) 39 | return _cache 40 | } 41 | 42 | // GetReader ... 43 | func (c *netCache) GetReader(url string, force bool) (io.Reader, error) { 44 | bys, e := c.get(url, force) 45 | if e != nil { 46 | return nil, e 47 | } 48 | return bytes.NewReader(bys), nil 49 | } 50 | 51 | // GetBytes ... 52 | func (c *netCache) GetBytes(url string, force bool) ([]byte, error) { 53 | return c.get(url, force) 54 | } 55 | 56 | func (c *netCache) HasURL(url string) bool { 57 | return c.has(Hash(url)) 58 | } 59 | 60 | func (c *netCache) has(name string) bool { 61 | exist, err := c.Has(name) 62 | return err == nil && exist 63 | } 64 | 65 | func (c *netCache) get(url string, force bool) (bys []byte, e error) { 66 | name := Hash(url) 67 | if !force { 68 | b := c.has(name) 69 | if b { 70 | bys, e = c.Get(name) 71 | if e != nil { 72 | return nil, e 73 | } 74 | log.Debug("CACHE", "query on cache", "url", url, "name", name) 75 | return bys, nil 76 | } 77 | } 78 | cli := network.Client() 79 | req, err := http.NewRequest(http.MethodGet, url, nil) 80 | if err != nil { 81 | return nil, err 82 | } 83 | req.Header.Set("user-agent", 84 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.11 Safari/537.36") 85 | 86 | res, e := cli.Do(req) 87 | if e != nil { 88 | return nil, e 89 | } 90 | defer res.Body.Close() 91 | if res.StatusCode != 200 { 92 | return nil, fmt.Errorf("status code error: %d %s", res.StatusCode, res.Status) 93 | } 94 | bys, e = ioutil.ReadAll(res.Body) 95 | if e != nil { 96 | return nil, e 97 | } 98 | log.Debug("CACHE", "query on remote server", "url", url, "name", name) 99 | 100 | if !force { 101 | e = c.Set(name, bys) 102 | if e != nil { 103 | return nil, e 104 | } 105 | } 106 | return bys, nil 107 | } 108 | 109 | func (c *netCache) Cache() cacher.Cacher { 110 | return c.Cacher 111 | } 112 | 113 | // Save ... 114 | func (c *netCache) Save(url, to string) (e error) { 115 | c.lock.Lock() 116 | defer c.lock.Unlock() 117 | s, e := filepath.Abs(to) 118 | if e != nil { 119 | return e 120 | } 121 | dir, _ := filepath.Split(s) 122 | _ = os.MkdirAll(dir, os.ModePerm) 123 | fromData, e := c.Get(url) 124 | if e != nil { 125 | return e 126 | } 127 | 128 | e = ioutil.WriteFile(s, fromData, 0755) 129 | if e != nil { 130 | return e 131 | } 132 | return nil 133 | } 134 | -------------------------------------------------------------------------------- /cache/nocache.go: -------------------------------------------------------------------------------- 1 | package cache 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "io" 7 | "io/ioutil" 8 | "net/http" 9 | 10 | "github.com/PuerkitoBio/goquery" 11 | "github.com/gocacher/cacher" 12 | 13 | "github.com/javscrape/go-scrape/network" 14 | ) 15 | 16 | type nocache struct { 17 | client *http.Client 18 | } 19 | 20 | func (n nocache) Cache() cacher.Cacher { 21 | return _cache.Cache() 22 | } 23 | 24 | func (n nocache) getReader(url string) (io.Reader, error) { 25 | cli := network.Client() 26 | req, err := http.NewRequest(http.MethodGet, url, nil) 27 | if err != nil { 28 | return nil, err 29 | } 30 | req.Header.Set("user-agent", 31 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.11 Safari/537.36") 32 | 33 | res, e := cli.Do(req) 34 | if e != nil { 35 | return nil, e 36 | } 37 | defer res.Body.Close() 38 | if res.StatusCode != 200 { 39 | return nil, fmt.Errorf("status code error: %d %s", res.StatusCode, res.Status) 40 | } 41 | bys, e := ioutil.ReadAll(res.Body) 42 | if e != nil { 43 | return nil, e 44 | } 45 | return bytes.NewReader(bys), nil 46 | } 47 | 48 | func (n nocache) Query(url string, force bool) (*goquery.Document, error) { 49 | reader, err := n.getReader(url) 50 | if err != nil { 51 | return nil, err 52 | } 53 | return goquery.NewDocumentFromReader(reader) 54 | } 55 | 56 | func (n nocache) GetQuery(url string, force bool) (*goquery.Document, error) { 57 | reader, err := n.getReader(url) 58 | if err != nil { 59 | return nil, err 60 | } 61 | return goquery.NewDocumentFromReader(reader) 62 | } 63 | 64 | func (n nocache) ForceQuery(url string) (*goquery.Document, error) { 65 | reader, err := n.getReader(url) 66 | if err != nil { 67 | return nil, err 68 | } 69 | return goquery.NewDocumentFromReader(reader) 70 | } 71 | 72 | func NoCacheQuery(client *http.Client) Querier { 73 | return &nocache{ 74 | client: client, 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /cache/query_cache.go: -------------------------------------------------------------------------------- 1 | package cache 2 | 3 | import ( 4 | "net/http" 5 | 6 | "github.com/PuerkitoBio/goquery" 7 | "github.com/gocacher/cacher" 8 | ) 9 | 10 | type queryCache netCache 11 | 12 | func (c *queryCache) Cache() cacher.Cacher { 13 | return c.net().Cache() 14 | } 15 | 16 | // Query ... 17 | func (c *queryCache) Query(url string, force bool) (*goquery.Document, error) { 18 | closer, e := c.net().GetReader(url, force) 19 | if e != nil { 20 | return nil, e 21 | } 22 | return goquery.NewDocumentFromReader(closer) 23 | } 24 | 25 | func (c *queryCache) GetQuery(url string, force bool) (*goquery.Document, error) { 26 | closer, e := c.net().GetReader(url, force) 27 | if e != nil { 28 | return nil, e 29 | } 30 | return goquery.NewDocumentFromReader(closer) 31 | } 32 | 33 | func (c *queryCache) ForceQuery(url string) (*goquery.Document, error) { 34 | closer, e := c.net().GetReader(url, true) 35 | if e != nil { 36 | return nil, e 37 | } 38 | return goquery.NewDocumentFromReader(closer) 39 | } 40 | 41 | func (c *queryCache) net() *netCache { 42 | return (*netCache)(c) 43 | } 44 | 45 | func NewQueryCache(client *http.Client) Querier { 46 | return (*queryCache)(newCache(client)) 47 | } 48 | 49 | var _ = Querier((*queryCache)(&netCache{})) 50 | var _ = (*queryCache)(&netCache{}) 51 | -------------------------------------------------------------------------------- /cmd/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | func main() { 4 | 5 | } 6 | -------------------------------------------------------------------------------- /config/config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | // Config ... 4 | // @Description: scrape configuration 5 | type Config struct { 6 | // Config ... 7 | // @Description: cache path 8 | Cache string `json:"cache"` 9 | // Config ... 10 | // @Description:case id to upper 11 | ToUpper bool `json:"to_upper"` 12 | // Config ... 13 | // @Description: output scrape data to path 14 | Output string `json:"output"` 15 | // Config ... 16 | // @Description: open or close debug mode 17 | Debug bool `json:"debug"` 18 | } 19 | 20 | func DefaultConfig() *Config { 21 | return &Config{ 22 | Cache: "tmp", 23 | ToUpper: true, 24 | Output: "output", 25 | Debug: false, 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /content.go: -------------------------------------------------------------------------------- 1 | package scrape 2 | 3 | import "time" 4 | 5 | // Genre ... 6 | type Genre struct { 7 | URL string 8 | Content string 9 | } 10 | 11 | // Sample ... 12 | type Sample struct { 13 | Index int 14 | Thumb string 15 | Image string 16 | Title string 17 | } 18 | 19 | // Star ... 20 | type Star struct { 21 | Image string 22 | Link string 23 | Name string //english name 24 | Alias []string //other name(katakana,...) 25 | } 26 | 27 | // Content ... 28 | type Content struct { 29 | From string //where this 30 | Language string 31 | Uncensored bool 32 | ID string 33 | Title string 34 | OriginalTitle string 35 | Year string 36 | ReleaseDate time.Time 37 | Studio string 38 | Director string 39 | MovieSet string 40 | Plot string 41 | Genres []*Genre 42 | Actors []*Star 43 | Poster string 44 | Thumb string 45 | Sample []*Sample 46 | Publisher string 47 | } 48 | -------------------------------------------------------------------------------- /core/debug.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | var DEBUG = false 4 | -------------------------------------------------------------------------------- /core/error.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "errors" 5 | ) 6 | 7 | var ErrEmptyRule = errors.New("empty rule") 8 | var ErrAbsoluteMultiAddress = errors.New("absolute mode used multi address") 9 | -------------------------------------------------------------------------------- /core/grab.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "github.com/goextension/gomap" 5 | 6 | "github.com/javscrape/go-scrape/cache" 7 | "github.com/javscrape/go-scrape/rule" 8 | ) 9 | 10 | // IGrab ... 11 | type IGrab interface { 12 | MainPage() string 13 | LoadActions(...rule.Action) error 14 | Cache() cache.Querier 15 | InputType() rule.InputType 16 | InputKey() string 17 | Put(key string, value *Value) 18 | Get(key string) *Value 19 | Run(input string) error 20 | Value() gomap.Map 21 | } 22 | 23 | var Empty = struct{}{} 24 | -------------------------------------------------------------------------------- /core/key.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | type KeyType int 4 | 5 | const ( 6 | KeyTypeSystem = iota 7 | KeyTypeCache 8 | KeyTypeExpression 9 | KeyTypeProto 10 | ) 11 | -------------------------------------------------------------------------------- /core/log.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "github.com/goextension/log/zap" 5 | ) 6 | 7 | func InitGlobalLogger(debug bool) { 8 | DEBUG = debug 9 | if debug { 10 | zap.InitZapSugar() 11 | return 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /core/log_dummy.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "github.com/goextension/log" 5 | ) 6 | 7 | type dummyLog struct { 8 | } 9 | 10 | func (d *dummyLog) Debug(args ...interface{}) { 11 | 12 | } 13 | 14 | func (d *dummyLog) Info(args ...interface{}) { 15 | 16 | } 17 | 18 | func (d *dummyLog) Warn(args ...interface{}) { 19 | 20 | } 21 | 22 | func (d *dummyLog) Error(args ...interface{}) { 23 | 24 | } 25 | 26 | func (d *dummyLog) DPanic(args ...interface{}) { 27 | 28 | } 29 | 30 | func (d *dummyLog) Panic(args ...interface{}) { 31 | 32 | } 33 | 34 | func (d *dummyLog) Fatal(args ...interface{}) { 35 | 36 | } 37 | 38 | func (d *dummyLog) Debugf(template string, args ...interface{}) { 39 | 40 | } 41 | 42 | func (d *dummyLog) Infof(template string, args ...interface{}) { 43 | 44 | } 45 | 46 | func (d *dummyLog) Warnf(template string, args ...interface{}) { 47 | 48 | } 49 | 50 | func (d *dummyLog) Errorf(template string, args ...interface{}) { 51 | 52 | } 53 | 54 | func (d *dummyLog) DPanicf(template string, args ...interface{}) { 55 | 56 | } 57 | 58 | func (d *dummyLog) Panicf(template string, args ...interface{}) { 59 | 60 | } 61 | 62 | func (d *dummyLog) Fatalf(template string, args ...interface{}) { 63 | 64 | } 65 | 66 | func (d *dummyLog) Debugw(msg string, keysAndValues ...interface{}) { 67 | 68 | } 69 | 70 | func (d *dummyLog) Infow(msg string, keysAndValues ...interface{}) { 71 | 72 | } 73 | 74 | func (d *dummyLog) Warnw(msg string, keysAndValues ...interface{}) { 75 | 76 | } 77 | 78 | func (d *dummyLog) Errorw(msg string, keysAndValues ...interface{}) { 79 | 80 | } 81 | 82 | func (d *dummyLog) DPanicw(msg string, keysAndValues ...interface{}) { 83 | 84 | } 85 | 86 | func (d *dummyLog) Panicw(msg string, keysAndValues ...interface{}) { 87 | 88 | } 89 | 90 | func (d *dummyLog) Fatalw(msg string, keysAndValues ...interface{}) { 91 | 92 | } 93 | 94 | var NilLogger log.Logger = (*dummyLog)(nil) 95 | -------------------------------------------------------------------------------- /core/log_print.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/goextension/log" 8 | ) 9 | 10 | type printLog struct { 11 | } 12 | 13 | func (p *printLog) Debug(args ...interface{}) { 14 | fmt.Print("[Debug] ") 15 | fmt.Println(args...) 16 | } 17 | 18 | func (p *printLog) Info(args ...interface{}) { 19 | fmt.Print("[Info] ") 20 | fmt.Println(args...) 21 | } 22 | 23 | func (p *printLog) Warn(args ...interface{}) { 24 | fmt.Print("[Warn] ") 25 | fmt.Println(args...) 26 | } 27 | 28 | func (p *printLog) Error(args ...interface{}) { 29 | fmt.Print("[Error] ") 30 | fmt.Println(args...) 31 | } 32 | 33 | func (p *printLog) DPanic(args ...interface{}) { 34 | fmt.Print("[DPanic] ") 35 | fmt.Println(args...) 36 | panic("dpanic call") 37 | } 38 | 39 | func (p *printLog) Panic(args ...interface{}) { 40 | fmt.Print("[Panic] ") 41 | fmt.Println(args...) 42 | panic("panic call") 43 | } 44 | 45 | func (p *printLog) Fatal(args ...interface{}) { 46 | fmt.Print("[Fatal] ") 47 | fmt.Println(args...) 48 | os.Exit(0) 49 | } 50 | 51 | func (p *printLog) Debugf(template string, args ...interface{}) { 52 | fmt.Print("[Debugf] ") 53 | fmt.Printf(template+"\n", args...) 54 | } 55 | 56 | func (p *printLog) Infof(template string, args ...interface{}) { 57 | fmt.Print("[Infof] ") 58 | fmt.Printf(template+"\n", args...) 59 | } 60 | 61 | func (p *printLog) Warnf(template string, args ...interface{}) { 62 | fmt.Print("[Warnf] ") 63 | fmt.Printf(template+"\n", args...) 64 | } 65 | 66 | func (p *printLog) Errorf(template string, args ...interface{}) { 67 | fmt.Print("[Errorf] ") 68 | fmt.Printf(template+"\n", args...) 69 | } 70 | 71 | func (p *printLog) DPanicf(template string, args ...interface{}) { 72 | fmt.Print("[DPanicf] ") 73 | fmt.Printf(template+"\n", args...) 74 | panic("dpanicf call") 75 | } 76 | 77 | func (p *printLog) Panicf(template string, args ...interface{}) { 78 | fmt.Print("[Panicf] ") 79 | fmt.Printf(template+"\n", args...) 80 | panic("panicf call") 81 | } 82 | 83 | func (p *printLog) Fatalf(template string, args ...interface{}) { 84 | fmt.Print("[Fatalf] ") 85 | fmt.Printf(template+"\n", args...) 86 | os.Exit(0) 87 | } 88 | 89 | func (p *printLog) Debugw(msg string, keysAndValues ...interface{}) { 90 | fmt.Print("[Debugw] ", msg) 91 | fmt.Println(keysAndValues...) 92 | } 93 | 94 | func (p *printLog) Infow(msg string, keysAndValues ...interface{}) { 95 | fmt.Print("[Infow] ", msg) 96 | fmt.Println(keysAndValues...) 97 | } 98 | 99 | func (p *printLog) Warnw(msg string, keysAndValues ...interface{}) { 100 | fmt.Print("[Warnw] ", msg) 101 | fmt.Println(keysAndValues...) 102 | } 103 | 104 | func (p *printLog) Errorw(msg string, keysAndValues ...interface{}) { 105 | fmt.Print("[Errorw] ", msg) 106 | fmt.Println(keysAndValues...) 107 | } 108 | 109 | func (p *printLog) DPanicw(msg string, keysAndValues ...interface{}) { 110 | fmt.Print("[DPanicw] ", msg) 111 | fmt.Println(keysAndValues...) 112 | } 113 | 114 | func (p *printLog) Panicw(msg string, keysAndValues ...interface{}) { 115 | fmt.Print("[Panicw] ", msg) 116 | fmt.Println(keysAndValues...) 117 | } 118 | 119 | func (p *printLog) Fatalw(msg string, keysAndValues ...interface{}) { 120 | fmt.Print("[Fatalw] ", msg) 121 | fmt.Println(keysAndValues...) 122 | } 123 | 124 | var PrintLogger log.Logger = (*printLog)(nil) 125 | -------------------------------------------------------------------------------- /core/log_test.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/goextension/log" 7 | ) 8 | 9 | func TestInitGlobalLogger(t *testing.T) { 10 | type args struct { 11 | debug bool 12 | } 13 | tests := []struct { 14 | name string 15 | args args 16 | }{ 17 | // TODO: Add test cases. 18 | { 19 | name: "", 20 | args: args{ 21 | debug: true, 22 | }, 23 | }, 24 | } 25 | for _, tt := range tests { 26 | t.Run(tt.name, func(t *testing.T) { 27 | InitGlobalLogger(true) 28 | log.Debugw("test output") 29 | }) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /core/md5.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "crypto/md5" 5 | "encoding/hex" 6 | ) 7 | 8 | func MD5(value []byte) string { 9 | md := md5.Sum(value) 10 | return hex.EncodeToString(md[:]) 11 | } 12 | -------------------------------------------------------------------------------- /core/path.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "net/url" 5 | "path" 6 | ) 7 | 8 | func URL(prefix string, uris ...string) string { 9 | u, err := url.Parse(prefix) 10 | if err != nil { 11 | return prefix 12 | } 13 | uris = append([]string{u.Path}, uris...) 14 | u.Path = path.Join(uris...) 15 | return u.String() 16 | } 17 | 18 | func URLAddValues(urlpath string, v url.Values) string { 19 | if v == nil { 20 | return urlpath 21 | } 22 | return urlpath + "?" + v.Encode() 23 | } 24 | -------------------------------------------------------------------------------- /core/process.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/PuerkitoBio/goquery" 7 | "github.com/goextension/log" 8 | 9 | "github.com/javscrape/go-scrape/rule" 10 | ) 11 | 12 | func ProcessValue(selection *goquery.Selection, p rule.Process) *Value { 13 | var v string 14 | switch p.Property { 15 | case rule.ProcessPropertyArray: 16 | var arr []interface{} 17 | selection.Each(func(i int, selection *goquery.Selection) { 18 | v = strings.TrimSpace(selection.Text()) 19 | log.Debug("ACTION", "array", v) 20 | arr = append(arr, v) 21 | }) 22 | if len(arr) != 0 { 23 | return NewArrayValue(arr) 24 | } 25 | case rule.ProcessPropertyValue: 26 | v = selection.Text() 27 | case rule.ProcessPropertyAttr: 28 | v = selection.AttrOr(p.PropertyName, "") 29 | case rule.ProcessPropertyText: 30 | selection.Contents().Each(func(i int, selection *goquery.Selection) { 31 | if goquery.NodeName(selection) == "#text" { 32 | v = selection.Text() 33 | } 34 | }) 35 | } 36 | 37 | v = strings.TrimSpace(v) 38 | if v == "" { 39 | return nil 40 | } 41 | return NewStringValue(v) 42 | } 43 | -------------------------------------------------------------------------------- /core/scrape.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "github.com/javscrape/go-scrape/cache" 5 | "github.com/javscrape/go-scrape/rule" 6 | ) 7 | 8 | // IScrape ... 9 | type IScrape interface { 10 | Cache() cache.Querier 11 | LoadRules(r ...*rule.Rule) ([]IGrab, error) 12 | } 13 | -------------------------------------------------------------------------------- /core/value.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/goextension/gomap" 7 | 8 | "github.com/javscrape/go-scrape/rule" 9 | ) 10 | 11 | type Value struct { 12 | Type rule.ProcessValue 13 | v interface{} 14 | } 15 | 16 | func NewStringValue(value interface{}) *Value { 17 | return &Value{Type: rule.ProcessValueString, v: value} 18 | } 19 | 20 | func NewArrayValue(value []interface{}) *Value { 21 | return &Value{Type: rule.ProcessValueArray, v: value} 22 | } 23 | 24 | func NewMapValue(value interface{}) *Value { 25 | return &Value{Type: rule.ProcessValueMap, v: value} 26 | } 27 | 28 | func NewFileValue(value []byte, fn func(key string, value []byte)) *Value { 29 | key := MD5(value) 30 | fn(key, value) 31 | return &Value{Type: rule.ProcessValueFie, v: value} 32 | } 33 | 34 | func (v Value) GetMap() gomap.Map { 35 | return v.v.(gomap.Map) 36 | } 37 | 38 | func (v Value) GetArray() []string { 39 | return v.v.([]string) 40 | } 41 | 42 | func (v Value) GetString() string { 43 | return v.v.(string) 44 | } 45 | 46 | func (v Value) GetFileHash() string { 47 | return v.v.(string) 48 | } 49 | 50 | func (v Value) String() string { 51 | return fmt.Sprintf("Value(Type:%v,Value:%v)", v.Type, v.v) 52 | } 53 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/javscrape/go-scrape 2 | 3 | go 1.13 4 | 5 | require ( 6 | github.com/BurntSushi/toml v0.3.1 7 | github.com/PuerkitoBio/goquery v1.7.1 8 | github.com/gocacher/badger-cache/v3 v3.0.2 9 | github.com/gocacher/cacher v1.0.5 10 | github.com/goextension/gomap v0.0.6 11 | github.com/goextension/log v0.0.2 12 | github.com/google/uuid v1.3.0 // indirect 13 | golang.org/x/net v0.0.0-20210614182718-04defd469f4e 14 | ) 15 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= 2 | github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= 3 | github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= 4 | github.com/DataDog/zstd v1.4.1 h1:3oxKN3wbHibqx897utPC2LTQU4J+IHWWJO+glkAkpFM= 5 | github.com/DataDog/zstd v1.4.1/go.mod h1:1jcaCB/ufaK+sKp1NBhlGmpz41jOoPQ35bpF36t7BBo= 6 | github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE= 7 | github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= 8 | github.com/PuerkitoBio/goquery v1.7.1 h1:oE+T06D+1T7LNrn91B4aERsRIeCLJ/oPSa6xB9FPnz4= 9 | github.com/PuerkitoBio/goquery v1.7.1/go.mod h1:XY0pP4kfraEmmV1O7Uf6XyjoslwsneBbgeDjLYuN8xY= 10 | github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5h18aE= 11 | github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY= 12 | github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= 13 | github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= 14 | github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= 15 | github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= 16 | github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= 17 | github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= 18 | github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= 19 | github.com/cosiner/argv v0.1.0/go.mod h1:EusR6TucWKX+zFgtdUsKT2Cvg45K5rtpCcWz4hK06d8= 20 | github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= 21 | github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= 22 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 23 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 24 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 25 | github.com/dgraph-io/badger/v3 v3.2011.1 h1:Hmyof0WMEF/QtutX5SQHzIMnJQxb/IrSzhjckV2SD6g= 26 | github.com/dgraph-io/badger/v3 v3.2011.1/go.mod h1:0rLLrQpKVQAL0or/lBLMQznhr6dWWX7h5AKnmnqx268= 27 | github.com/dgraph-io/ristretto v0.0.4-0.20210122082011-bb5d392ed82d h1:eQYOG6A4td1tht0NdJB9Ls6DsXRGb2Ft6X9REU/MbbE= 28 | github.com/dgraph-io/ristretto v0.0.4-0.20210122082011-bb5d392ed82d/go.mod h1:tv2ec8nA7vRpSYX7/MbP52ihrUMXIHit54CQMq8npXQ= 29 | github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 h1:tdlZCpZ/P9DhczCTSixgIKmwPv6+wP5DGjqLYw5SUiA= 30 | github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= 31 | github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= 32 | github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= 33 | github.com/fatih/structs v1.1.0 h1:Q7juDM0QtcnhCpeyLGQKyg4TOIghuNXrkL32pHAUMxo= 34 | github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M= 35 | github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= 36 | github.com/go-delve/delve v1.5.0/go.mod h1:c6b3a1Gry6x8a4LGCe/CWzrocrfaHvkUxCj3k4bvSUQ= 37 | github.com/gocacher/badger-cache/v3 v3.0.2 h1:G+8h1m5UDYKoJ4+lpg+Zwbag8aDR8fOcfLY4nE5iclw= 38 | github.com/gocacher/badger-cache/v3 v3.0.2/go.mod h1:684gRVmt1L3cFFJmwcui1fZHSO6d5ho9qqtsBvkUhQ4= 39 | github.com/gocacher/cacher v1.0.4/go.mod h1:59qIKgE1PQiWqo+pl9cXtvNpG0JJl+CpMz+pOeqRQ64= 40 | github.com/gocacher/cacher v1.0.5 h1:Nk7TfMg/rhKvYvuP4xe/qGq3KGndQ769BvsbTUpRqQU= 41 | github.com/gocacher/cacher v1.0.5/go.mod h1:59qIKgE1PQiWqo+pl9cXtvNpG0JJl+CpMz+pOeqRQ64= 42 | github.com/goextension/gomap v0.0.6 h1:xjNX4TwGAO0rLC2mdVnTDmXa6I+1AiANw92qp0x3XRc= 43 | github.com/goextension/gomap v0.0.6/go.mod h1:E2gvFKXPxXL4TBCCb801Mg06Z20A0RIr/dxH4OUJDwM= 44 | github.com/goextension/log v0.0.2 h1:/KMuT22zzIMHe4qD7rR8dnfXWSFAyp+10pTC4fWLztI= 45 | github.com/goextension/log v0.0.2/go.mod h1:fz72q/d4Iw05nbRSbxgGkGNTne3jxrq2Td5ogfunZak= 46 | github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= 47 | github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6 h1:ZgQEtGgCBiWRM39fZuwSd1LwSqqSW0hOdXCYYDX0R3I= 48 | github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= 49 | github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= 50 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 51 | github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg= 52 | github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 53 | github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4= 54 | github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= 55 | github.com/google/flatbuffers v1.12.0 h1:N8EguYFm2wwdpoNcpchQY0tPs85vOJkboFb2dPxmixo= 56 | github.com/google/flatbuffers v1.12.0/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= 57 | github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= 58 | github.com/google/go-dap v0.2.0/go.mod h1:5q8aYQFnHOAZEMP+6vmq25HKYAEwE+LF5yh7JKrrhSQ= 59 | github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= 60 | github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= 61 | github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 62 | github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= 63 | github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= 64 | github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= 65 | github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= 66 | github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= 67 | github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= 68 | github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= 69 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 70 | github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= 71 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 72 | github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= 73 | github.com/mattn/go-colorable v0.0.0-20170327083344-ded68f7a9561/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= 74 | github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= 75 | github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= 76 | github.com/mitchellh/mapstructure v1.1.2 h1:fmNYVwqnSfB9mZU6OS2O6GsXM+wcskZDuKQzvN1EDeE= 77 | github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= 78 | github.com/mmcloughlin/avo v0.0.0-20201105074841-5d2f697d268f/go.mod h1:6aKT4zZIrpGqB3RpFU14ByCSSyKY6LfJz4J/JJChHfI= 79 | github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= 80 | github.com/peterh/liner v0.0.0-20170317030525-88609521dc4b/go.mod h1:xIteQHvHuaLYG9IFj6mSxM0fCKrs34IrEQUhOYuGPHc= 81 | github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 82 | github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= 83 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 84 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 85 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 86 | github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= 87 | github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= 88 | github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= 89 | github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= 90 | github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= 91 | github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= 92 | github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= 93 | github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= 94 | github.com/spf13/cobra v0.0.0-20170417170307-b6cb39589372/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= 95 | github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= 96 | github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= 97 | github.com/spf13/pflag v0.0.0-20170417173400-9e4c21054fa1/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= 98 | github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= 99 | github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= 100 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 101 | github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= 102 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 103 | github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= 104 | github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= 105 | github.com/twitchyliquid64/golang-asm v0.15.0/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= 106 | github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= 107 | github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= 108 | github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= 109 | go.opencensus.io v0.22.5 h1:dntmOdLpSpHlVqbW5Eay97DelsZHe+55D+xC6i0dDS0= 110 | go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= 111 | go.starlark.net v0.0.0-20190702223751-32f345186213/go.mod h1:c1/X6cHgvdXj6pUlmWKMkuqRnW4K8x2vwt6JAaaircg= 112 | go.uber.org/atomic v1.5.0 h1:OI5t8sDa1Or+q8AeE+yKeB/SDYioSHAgcVljj9JIETY= 113 | go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= 114 | go.uber.org/multierr v1.3.0 h1:sFPn2GLc3poCkfrpIXGhBD2X0CMIo4Q/zSULXrj/+uc= 115 | go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= 116 | go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee h1:0mgffUl7nfd+FpvXMVz4IDEaUSmT1ysygQC7qYo7sG4= 117 | go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA= 118 | go.uber.org/zap v1.13.0 h1:nR6NoDBgAf67s68NhaXbsojM+2gxp3S1hWkHDl27pVU= 119 | go.uber.org/zap v1.13.0/go.mod h1:zwrFLgMcdUuIBviXEYEH1YKNaOBnKXsx2IPda5bBwHM= 120 | golang.org/x/arch v0.0.0-20190927153633-4e8777c89be4/go.mod h1:flIaEI6LNU6xOCD5PaJvn9wGP0agmIOqjrtsKGRguv4= 121 | golang.org/x/arch v0.0.0-20201008161808-52c3e6f60cff/go.mod h1:flIaEI6LNU6xOCD5PaJvn9wGP0agmIOqjrtsKGRguv4= 122 | golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= 123 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 124 | golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 125 | golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 126 | golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= 127 | golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 128 | golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= 129 | golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= 130 | golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= 131 | golang.org/x/lint v0.0.0-20190930215403-16217165b5de h1:5hukYrvBGR8/eNkX5mdUezrA6JiaEZDtJb9Ei+1LlBs= 132 | golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= 133 | golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= 134 | golang.org/x/mod v0.3.0 h1:RM4zey1++hCTbCVQfnWeKs9/IEsaBLA8vTkd0WVtmH4= 135 | golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 136 | golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 137 | golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 138 | golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 139 | golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 140 | golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 141 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 142 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 143 | golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= 144 | golang.org/x/net v0.0.0-20210614182718-04defd469f4e h1:XpT3nA5TvE525Ne3hInMh6+GETgn27Zfm9dxsThnX2Q= 145 | golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= 146 | golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= 147 | golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 148 | golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 149 | golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 150 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 151 | golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 152 | golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 153 | golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 154 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 155 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 156 | golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 157 | golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 158 | golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 159 | golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 160 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 161 | golang.org/x/sys v0.0.0-20210423082822-04245dca01da h1:b3NXsE2LusjYGGjL5bxEVZZORm/YEFFrWFjR8eFrw/c= 162 | golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 163 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= 164 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 165 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 166 | golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 167 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 168 | golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 169 | golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= 170 | golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= 171 | golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= 172 | golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 173 | golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 174 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 175 | golang.org/x/tools v0.0.0-20191127201027-ecd32218bd7f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 176 | golang.org/x/tools v0.0.0-20201105001634-bc3cf281b174 h1:0rx0F4EjJNbxTuzWe0KjKcIzs+3VEb/Mrs/d1ciNz1c= 177 | golang.org/x/tools v0.0.0-20201105001634-bc3cf281b174/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= 178 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 179 | golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 180 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= 181 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 182 | google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= 183 | google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= 184 | google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= 185 | google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= 186 | google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= 187 | google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= 188 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 189 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 190 | gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= 191 | gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 192 | gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= 193 | gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 194 | gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= 195 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 196 | honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= 197 | honnef.co/go/tools v0.0.1-2019.2.3 h1:3JgtbtFHMiCmsznwGVTUWbgGov+pVqnlf1dEJTNAXeM= 198 | honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= 199 | rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= 200 | -------------------------------------------------------------------------------- /internal/action/action.go: -------------------------------------------------------------------------------- 1 | package action 2 | 3 | import ( 4 | "github.com/javscrape/go-scrape/core" 5 | "github.com/javscrape/go-scrape/rule" 6 | ) 7 | 8 | type Action struct { 9 | core.IGrab 10 | action *rule.Action 11 | } 12 | 13 | func FromAction(grab core.IGrab, action rule.Action) *Action { 14 | return &Action{ 15 | IGrab: grab, 16 | action: &action, 17 | } 18 | } 19 | 20 | func (a *Action) Failure() string { 21 | return a.action.OnFailure 22 | } 23 | 24 | func (a *Action) Success() string { 25 | return a.action.OnSuccess 26 | } 27 | 28 | func (a Action) GetValue(key string) (core.KeyType, string) { 29 | val := key[1:] 30 | switch key[0] { 31 | case '$': 32 | return core.KeyTypeCache, a.Get(val).GetString() 33 | case '%': 34 | return core.KeyTypeExpression, key 35 | case '#': 36 | return core.KeyTypeSystem, a.Get(key).GetString() 37 | } 38 | return core.KeyTypeProto, key 39 | } 40 | -------------------------------------------------------------------------------- /internal/action/action_do.go: -------------------------------------------------------------------------------- 1 | package action 2 | 3 | import ( 4 | "fmt" 5 | "net/url" 6 | "strings" 7 | 8 | "github.com/PuerkitoBio/goquery" 9 | "github.com/goextension/gomap" 10 | "github.com/goextension/log" 11 | 12 | "github.com/javscrape/go-scrape/core" 13 | "github.com/javscrape/go-scrape/rule" 14 | ) 15 | 16 | func (a Action) Run() error { 17 | 18 | _, err := a.doWeb() 19 | if err != nil { 20 | return err 21 | } 22 | log.Debug("ACTION", "web html") 23 | return nil 24 | } 25 | 26 | func (a Action) getInputURL(urlpath string, input string) string { 27 | switch a.InputType() { 28 | case rule.InputTypeURL: 29 | return core.URL(urlpath, input) 30 | case rule.InputTypeValue: 31 | return core.URLAddValues(urlpath, url.Values{ 32 | a.InputKey(): []string{input}, 33 | }) 34 | } 35 | return "" 36 | } 37 | 38 | func isSkipped(skipType rule.SkipType, skips []rule.SkipType) bool { 39 | if len(skips) == 0 { 40 | return false 41 | } 42 | for _, skip := range skips { 43 | if skip == skipType { 44 | return true 45 | } 46 | } 47 | return false 48 | } 49 | 50 | func (a Action) getWebURL(relative bool) string { 51 | value := a.getWebValue() 52 | mainPage := a.MainPage() 53 | if relative { 54 | if mainPage == "" { 55 | mainPage = value 56 | } else { 57 | mainPage = core.URL(mainPage, value) 58 | } 59 | } else { 60 | mainPage = core.URL(value) 61 | } 62 | return mainPage 63 | } 64 | 65 | func (a Action) getWebValue() string { 66 | var ret string 67 | if len(a.action.Web.Value) != 0 { 68 | var exps []string 69 | var vals []interface{} 70 | for _, s := range a.action.Web.Value { 71 | t, v := a.GetValue(s) 72 | switch t { 73 | case core.KeyTypeExpression: 74 | exps = append(exps, v) 75 | default: 76 | vals = append(vals, v) 77 | } 78 | } 79 | format := "%v" 80 | if len(exps) == 1 { 81 | format = exps[0] 82 | } else if len(exps) > 1 { 83 | format = strings.Join(exps, "/") 84 | } 85 | 86 | fix := strings.Count(format, "%") - len(vals) 87 | for ; fix > 0; fix-- { 88 | vals = append(vals, "") 89 | } 90 | ret = fmt.Sprintf(format, vals...) 91 | log.Debug("ACTION", "get from value", ret) 92 | } 93 | return ret 94 | } 95 | 96 | func (a Action) doWeb() (sl string, err error) { 97 | log.Debug("ACTION", "do web query") 98 | var query *goquery.Document 99 | webCache := gomap.New() 100 | 101 | url := a.getWebURL(a.action.Web.Relative) 102 | if !isSkipped(rule.SkipTypeInput, a.action.Web.Skip) { 103 | url = a.getInputURL(url, a.Get("#"+a.InputKey()).GetString()) 104 | } 105 | log.Debug("ACTION", "query page url", url) 106 | query, err = a.Cache().Query(url, false) 107 | 108 | if err != nil { 109 | return "", err 110 | } 111 | 112 | if query == nil { 113 | return "", nil 114 | } 115 | 116 | if a.action.Web.Selector != "" { 117 | log.Debug("ACTION", "do query selector", a.action.Web.Selector) 118 | find := query.Find(a.action.Web.Selector) 119 | a.processDo(webCache, find, a.action.Web.Success) 120 | return find.Html() 121 | } 122 | return query.Html() 123 | } 124 | 125 | func (a *Action) processDo(cache gomap.Map, selection *goquery.Selection, process []rule.Process) *goquery.Selection { 126 | if len(process) == 0 { 127 | return selection 128 | } 129 | for i, s := range process { 130 | ssel := selection.Clone() 131 | if s.Selector != "" { 132 | ssel = ssel.Find(s.Selector) 133 | } 134 | ssel = a.processDo(cache, ssel, s.Compare) 135 | 136 | if ssel == nil { 137 | continue 138 | } 139 | html, _ := ssel.Html() 140 | log.Debug("ACTION", "print compare html", "index", s.Index, html) 141 | 142 | switch s.Type { 143 | case rule.ProcessTypeCompare: 144 | var ret *goquery.Selection 145 | selection.EachWithBreak(func(i int, selection *goquery.Selection) bool { 146 | v := core.ProcessValue(ssel, s) 147 | if v == nil { 148 | return true 149 | } 150 | if strings.Contains(v.GetString(), s.Name) { 151 | ret = selection 152 | return false 153 | } 154 | return true 155 | }) 156 | return ret 157 | case rule.ProcessTypePutArray: 158 | v := core.ProcessValue(ssel, s) 159 | if v != nil { 160 | log.Debug("ACTION", "put web value", "name", s.Name, "value", v, "index", i) 161 | a.Put(s.Name, v) 162 | } 163 | case rule.ProcessTypePut: 164 | ssel = goquery.NewDocumentFromNode(ssel.Nodes[s.Index]).First() 165 | html, _ := ssel.Html() 166 | log.Debug("ACTION", "print current html", "index", s.Index, html) 167 | v := core.ProcessValue(ssel, s) 168 | if v != nil { 169 | log.Debug("ACTION", "put web value", "name", s.Name, "value", v, "index", i) 170 | a.Put(s.Name, v) 171 | } 172 | } 173 | } 174 | return nil 175 | } 176 | 177 | func (a Action) doWebSuccessPut() { 178 | 179 | } 180 | -------------------------------------------------------------------------------- /internal/action/actions.go: -------------------------------------------------------------------------------- 1 | package action 2 | 3 | import ( 4 | "sort" 5 | ) 6 | 7 | type Actions []*Action 8 | 9 | func (a Actions) Len() int { 10 | return len(a) 11 | } 12 | 13 | func (a Actions) Less(i, j int) bool { 14 | return a[i].action.Index < a[j].action.Index 15 | } 16 | 17 | func (a Actions) Swap(i, j int) { 18 | a[i], a[j] = a[j], a[i] 19 | } 20 | 21 | func (a Actions) Sort() []*Action { 22 | sort.Sort(a) 23 | return a 24 | } 25 | -------------------------------------------------------------------------------- /internal/grab_impl.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "errors" 5 | "sync" 6 | 7 | "github.com/goextension/gomap" 8 | "github.com/goextension/log" 9 | 10 | "github.com/javscrape/go-scrape/core" 11 | "github.com/javscrape/go-scrape/internal/action" 12 | "github.com/javscrape/go-scrape/rule" 13 | ) 14 | 15 | var ErrActionIsAlreadyExist = errors.New("action is already exist") 16 | 17 | type grabImpl struct { 18 | core.IScrape 19 | inputType rule.InputType 20 | inputKey string 21 | actions map[string]*action.Action 22 | group map[string][]*action.Action 23 | value struct { 24 | lock sync.RWMutex 25 | keys map[string]struct{} 26 | gomap.Map 27 | } 28 | } 29 | 30 | func (g *grabImpl) InputType() rule.InputType { 31 | return g.inputType 32 | } 33 | 34 | func (g *grabImpl) InputKey() string { 35 | return g.inputKey 36 | } 37 | 38 | func (g *grabImpl) Name() string { 39 | return g.Get("#name").GetString() 40 | } 41 | 42 | func (g *grabImpl) PutInner(key string, value *core.Value) { 43 | g.value.lock.Lock() 44 | g.value.Set("#"+key, value) 45 | g.value.lock.Unlock() 46 | } 47 | 48 | func (g *grabImpl) Put(key string, value *core.Value) { 49 | g.value.lock.Lock() 50 | g.value.keys[key] = core.Empty 51 | g.value.Set(key, value) 52 | g.value.lock.Unlock() 53 | } 54 | 55 | func (g *grabImpl) Get(key string) *core.Value { 56 | var v interface{} 57 | g.value.lock.RLock() 58 | v = g.value.Get(key) 59 | g.value.lock.RUnlock() 60 | if v == nil { 61 | return nil 62 | } 63 | return (v).(*core.Value) 64 | } 65 | 66 | func (g *grabImpl) Value() gomap.Map { 67 | g.value.lock.Lock() 68 | defer g.value.lock.Unlock() 69 | var keys []string 70 | for k := range g.value.keys { 71 | keys = append(keys, k) 72 | } 73 | return g.value.Only(keys) 74 | } 75 | 76 | func (g *grabImpl) MainPage() string { 77 | return g.Get("#main_page").GetString() 78 | } 79 | 80 | func (g *grabImpl) Entrance() string { 81 | return g.Get("#entrance").GetString() 82 | } 83 | 84 | func (g *grabImpl) LoadActions(acts ...rule.Action) error { 85 | for _, v := range acts { 86 | switch v.Type { 87 | case rule.ActionTypeGroup: 88 | log.Debug("GRAB", "load group", v.Name) 89 | g.group[v.Name] = append(g.group[v.Name], action.FromAction(g, v)) 90 | default: 91 | v.Type = rule.ActionTypeAction 92 | fallthrough 93 | case rule.ActionTypeAction: 94 | log.Debug("GRAB", "load action", v.Name) 95 | if _, exist := g.actions[v.Name]; exist { 96 | return ErrActionIsAlreadyExist 97 | } 98 | g.actions[v.Name] = action.FromAction(g, v) 99 | } 100 | } 101 | return nil 102 | } 103 | 104 | func (g *grabImpl) Run(input string) error { 105 | g.PutInner(g.InputKey(), core.NewStringValue(input)) 106 | return g.actionDo(g.Entrance()) 107 | } 108 | 109 | func (g *grabImpl) actionDo(name string) error { 110 | actions := g.getActions(name) 111 | log.Debug("GRAB", "get actions", name, "total", len(actions)) 112 | if len(actions) == 0 { 113 | return nil 114 | } 115 | log.Debug("GRAB", "start action", name, "query", g.Get(g.InputKey())) 116 | for _, a := range actions { 117 | if err := a.Run(); err != nil { 118 | return g.actionDo(a.Failure()) 119 | } 120 | return g.actionDo(a.Success()) 121 | } 122 | return nil 123 | } 124 | 125 | func (g *grabImpl) getActions(name string) []*action.Action { 126 | var exist bool 127 | var actions action.Actions 128 | if _, exist = g.actions[name]; exist { 129 | actions = []*action.Action{g.actions[name]} 130 | } else if _, exist = g.group[name]; exist { 131 | actions = g.group[name] 132 | } 133 | return actions.Sort() 134 | } 135 | 136 | func NewGrab(scrape core.IScrape, r *rule.Rule) core.IGrab { 137 | value := gomap.New() 138 | for s, i := range r.Preset { 139 | value.Set(s, i) 140 | } 141 | value.Set("#name", core.NewStringValue(r.Name)) 142 | if r.MainPage != "" { 143 | value.Set("#main_page", core.NewStringValue(r.MainPage)) 144 | } 145 | if r.Entrance != "" { 146 | value.Set("#entrance", core.NewStringValue(r.Entrance)) 147 | } 148 | 149 | if r.InputKey == "" { 150 | r.InputKey = "intput" 151 | } 152 | 153 | value.Range(func(key string, value interface{}) bool { 154 | v := value.(*core.Value) 155 | log.Debug("GRAB", "init map value", key, v) 156 | return true 157 | }) 158 | 159 | return &grabImpl{ 160 | IScrape: scrape, 161 | inputType: r.InputType, 162 | inputKey: r.InputKey, 163 | actions: make(map[string]*action.Action), 164 | group: make(map[string][]*action.Action), 165 | value: struct { 166 | lock sync.RWMutex 167 | keys map[string]struct{} 168 | gomap.Map 169 | }{Map: value, keys: make(map[string]struct{})}, 170 | } 171 | } 172 | 173 | var _ core.IGrab = (*grabImpl)(nil) 174 | -------------------------------------------------------------------------------- /internal/option.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "github.com/javscrape/go-scrape/cache" 5 | "github.com/javscrape/go-scrape/config" 6 | "github.com/javscrape/go-scrape/network" 7 | ) 8 | 9 | // Options ... 10 | type Options func(impl *scrapeImpl) 11 | 12 | // CacheOption ... 13 | func CacheOption(cache cache.Querier) Options { 14 | return func(impl *scrapeImpl) { 15 | impl.cache = cache 16 | } 17 | } 18 | 19 | func ConfigOption(config *config.Config) Options { 20 | return func(impl *scrapeImpl) { 21 | impl.config = config 22 | } 23 | } 24 | 25 | func ProxyOption(addr string) Options { 26 | return func(impl *scrapeImpl) { 27 | err := network.RegisterProxy(addr) 28 | if err != nil { 29 | panic(err) 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /internal/scrape_impl.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/goextension/log" 7 | "github.com/google/uuid" 8 | 9 | "github.com/javscrape/go-scrape/cache" 10 | "github.com/javscrape/go-scrape/config" 11 | "github.com/javscrape/go-scrape/core" 12 | "github.com/javscrape/go-scrape/network" 13 | "github.com/javscrape/go-scrape/rule" 14 | ) 15 | 16 | type scrapeImpl struct { 17 | config *config.Config 18 | cache cache.Querier 19 | //grabs []Grab 20 | } 21 | 22 | // NewScrape ... 23 | func NewScrape(opts ...Options) core.IScrape { 24 | scrape := &scrapeImpl{} 25 | 26 | for _, opt := range opts { 27 | opt(scrape) 28 | } 29 | 30 | scrape.init() 31 | return scrape 32 | } 33 | 34 | func (s *scrapeImpl) init() { 35 | if s.cache == nil { 36 | s.cache = cache.NewQueryCache(network.Client()) 37 | } 38 | if s.config == nil { 39 | s.config = config.DefaultConfig() 40 | } 41 | fmt.Println("DEBUG ON:", s.config.Debug) 42 | core.InitGlobalLogger(s.config.Debug) 43 | } 44 | 45 | func (s *scrapeImpl) Cache() cache.Querier { 46 | return s.cache 47 | } 48 | 49 | func (s *scrapeImpl) LoadRules(rs ...*rule.Rule) ([]core.IGrab, error) { 50 | if len(rs) == 0 { 51 | return nil, core.ErrEmptyRule 52 | } 53 | var gs []core.IGrab 54 | for i := range rs { 55 | log.Debug("SCRAPE", "new grab", "index", i) 56 | if rs[i].InputType == "" { 57 | rs[i].InputType = rule.InputTypeURL 58 | } 59 | if rs[i].Name == "" { 60 | rs[i].Name = uuid.New().String() 61 | } 62 | g := NewGrab(s, rs[i]) 63 | if err := g.LoadActions(rs[i].Actions...); err != nil { 64 | return nil, err 65 | } 66 | gs = append(gs, g) 67 | } 68 | return gs, nil 69 | } 70 | 71 | var _ core.IScrape = (*scrapeImpl)(nil) 72 | -------------------------------------------------------------------------------- /network/proxy.go: -------------------------------------------------------------------------------- 1 | package network 2 | 3 | import ( 4 | "context" 5 | "crypto/tls" 6 | "net" 7 | "net/http" 8 | "net/url" 9 | "time" 10 | 11 | "github.com/goextension/log" 12 | "golang.org/x/net/proxy" 13 | ) 14 | 15 | //var log = trait.NewZapSugar() 16 | var ( 17 | cli *http.Client 18 | ) 19 | 20 | func init() { 21 | cli = http.DefaultClient 22 | } 23 | 24 | // ProxyArgs ... 25 | type ProxyArgs func(cli *http.Client) 26 | 27 | // TimeOut ... 28 | func TimeOut(sec int) ProxyArgs { 29 | return func(cli *http.Client) { 30 | cli.Timeout = time.Duration(sec) * time.Second 31 | } 32 | } 33 | 34 | // RegisterProxy ... 35 | func RegisterProxy(addr string, args ...ProxyArgs) (e error) { 36 | log.Debug("NETWORK", "register proxy", "addr", addr) 37 | u, e := url.Parse(addr) 38 | if e != nil { 39 | return e 40 | } 41 | var transport *http.Transport 42 | switch u.Scheme { 43 | case "http", "https": 44 | transport = getHTTPTransport(u) 45 | case "socks5": 46 | transport = getSOCKS5Transport(u.Host) 47 | default: 48 | transport = &http.Transport{ 49 | TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, 50 | } 51 | } 52 | cli = &http.Client{ 53 | Transport: transport, 54 | CheckRedirect: nil, 55 | Jar: nil, 56 | Timeout: 60 * time.Second, 57 | } 58 | 59 | for _, fn := range args { 60 | fn(cli) 61 | } 62 | return nil 63 | } 64 | 65 | func getHTTPTransport(u *url.URL) *http.Transport { 66 | return &http.Transport{ 67 | Proxy: http.ProxyURL(u), 68 | TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, 69 | } 70 | } 71 | 72 | func proxySOCKS5(addr string) (proxy.Dialer, error) { 73 | return proxy.SOCKS5("tcp", addr, 74 | nil, //&proxy.Auth{User: "", Password: ""}, 75 | &net.Dialer{ 76 | Timeout: 30 * time.Second, 77 | KeepAlive: 30 * time.Second, 78 | }, 79 | ) 80 | } 81 | func getSOCKS5Transport(addr string) *http.Transport { 82 | queryProxy, err := proxySOCKS5(addr) 83 | if err != nil { 84 | return &http.Transport{ 85 | TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, 86 | } 87 | } 88 | return &http.Transport{ 89 | DialContext: func(ctx context.Context, network, addr string) (conn net.Conn, e error) { 90 | return queryProxy.Dial(network, addr) 91 | }, 92 | } 93 | } 94 | 95 | func Client() *http.Client { 96 | return cli 97 | } 98 | -------------------------------------------------------------------------------- /option.go: -------------------------------------------------------------------------------- 1 | package scrape 2 | 3 | import ( 4 | "github.com/javscrape/go-scrape/internal" 5 | ) 6 | 7 | // Options ... 8 | type Options = internal.Options 9 | 10 | // CacheOption ... 11 | var CacheOption = internal.CacheOption 12 | 13 | var ConfigOption = internal.ConfigOption 14 | 15 | var ProxyOption = internal.ProxyOption 16 | -------------------------------------------------------------------------------- /result.go: -------------------------------------------------------------------------------- 1 | package scrape 2 | 3 | type Result map[string]interface{} 4 | -------------------------------------------------------------------------------- /rule/action.go: -------------------------------------------------------------------------------- 1 | package rule 2 | 3 | //Action ... 4 | type Action struct { 5 | Type ActionType `toml:"type,omitempty"` 6 | Name string `toml:"name,omitempty"` 7 | Index int `toml:"index,omitempty"` 8 | Web Web `toml:"web,omitempty"` 9 | OnSuccess string `toml:"on_success,omitempty"` 10 | OnFailure string `toml:"on_failure,omitempty"` 11 | Success []Process `toml:"success,omitempty"` 12 | Failure []Process `toml:"failure,omitempty"` 13 | } 14 | -------------------------------------------------------------------------------- /rule/action_type.go: -------------------------------------------------------------------------------- 1 | package rule 2 | 3 | //ActionType action type 4 | type ActionType string 5 | 6 | const ( 7 | ActionTypeNone ActionType = "none" 8 | ActionTypeAction = "action" 9 | ActionTypeGroup = "group" 10 | ) 11 | -------------------------------------------------------------------------------- /rule/input_type.go: -------------------------------------------------------------------------------- 1 | package rule 2 | 3 | type InputType string 4 | 5 | const ( 6 | InputTypeURL = "url" 7 | InputTypeValue = "value" 8 | InputTypeJSON = "json" 9 | ) 10 | -------------------------------------------------------------------------------- /rule/process.go: -------------------------------------------------------------------------------- 1 | package rule 2 | 3 | type Process struct { 4 | Name string `toml:"name,omitempty"` 5 | Selector string `toml:"selector,omitempty"` 6 | Compare string `toml:"compare,omitempty"` 7 | Type ProcessType `toml:"type,omitempty"` 8 | Property string `toml:"property,omitempty"` 9 | PropertyIndex int `toml:"property_index,omitempty"` 10 | PropertyName string `toml:"property_name,omitempty"` 11 | Value ProcessValue `toml:"value,omitempty"` 12 | Do []Process `toml:"do,omitempty"` 13 | } 14 | -------------------------------------------------------------------------------- /rule/process_property.go: -------------------------------------------------------------------------------- 1 | package rule 2 | 3 | type ProcessProperty string 4 | 5 | const ( 6 | ProcessPropertyAttr = "attr" 7 | ProcessPropertyText = "text" 8 | ProcessPropertyArray = "array" 9 | ProcessPropertyValue = "value" 10 | ) 11 | -------------------------------------------------------------------------------- /rule/process_type.go: -------------------------------------------------------------------------------- 1 | package rule 2 | 3 | type ProcessType string 4 | 5 | const ( 6 | ProcessTypePut = "put" 7 | ProcessTypePutArray = "put_array" 8 | ProcessTypeCache = "cache" 9 | ProcessTypeCacheArray = "cache_array" 10 | ProcessTypeCompare = "compare" 11 | ) 12 | -------------------------------------------------------------------------------- /rule/process_value_type.go: -------------------------------------------------------------------------------- 1 | package rule 2 | 3 | type ProcessValue string 4 | 5 | const ( 6 | ProcessValueString = "string" 7 | ProcessValueArray = "array" 8 | ProcessValueMap = "map" 9 | ProcessValueFie = "file" 10 | ) 11 | -------------------------------------------------------------------------------- /rule/rule.go: -------------------------------------------------------------------------------- 1 | package rule 2 | 3 | import ( 4 | "os" 5 | 6 | "github.com/BurntSushi/toml" 7 | ) 8 | 9 | // Rule ... 10 | // @Description: 11 | type Rule struct { 12 | Name string `toml:"name,omitempty"` 13 | Entrance string `toml:"entrance,omitempty"` 14 | MainPage string `toml:"main_page,omitempty"` 15 | InputKey string `toml:"input_key,omitempty"` 16 | InputType InputType `toml:"input_type,omitempty"` 17 | Preset map[string]interface{} `toml:"preset,omitempty"` 18 | Actions []Action `toml:"actions,omitempty"` 19 | } 20 | 21 | func LoadRuleFromFile(file string) (*Rule, error) { 22 | var r Rule 23 | _, err := toml.DecodeFile(file, &r) 24 | return &r, err 25 | } 26 | 27 | func SaveRuleToFile(file string, r *Rule) error { 28 | openFile, err := os.OpenFile(file, os.O_CREATE|os.O_RDWR|os.O_TRUNC|os.O_SYNC, 0755) 29 | if err != nil { 30 | return err 31 | } 32 | defer openFile.Close() 33 | enc := toml.NewEncoder(openFile) 34 | return enc.Encode(r) 35 | } 36 | -------------------------------------------------------------------------------- /rule/rule_test.go: -------------------------------------------------------------------------------- 1 | package rule 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestSaveRuleToFile(t *testing.T) { 8 | type args struct { 9 | file string 10 | r *Rule 11 | } 12 | tests := []struct { 13 | name string 14 | args args 15 | wantErr bool 16 | }{ 17 | // TODO: Add test cases. 18 | { 19 | name: "", 20 | args: args{ 21 | file: "tmp.toml", 22 | r: &Rule{ 23 | Entrance: "", 24 | MainPage: "", 25 | Actions: []Action{ 26 | { 27 | Type: "", 28 | Name: "", 29 | Index: 0, 30 | Web: Web{ 31 | Method: "GET", 32 | Header: map[string][]string{ 33 | "cookie": {"1"}, 34 | }, 35 | Skip: []SkipType{ 36 | SkipTypeInput, SkipTypeMainPage, 37 | }, 38 | Value: []string{"test from"}, 39 | Relative: true, 40 | Selector: "", 41 | Success: nil, 42 | }, 43 | OnSuccess: "detail", 44 | OnFailure: "", 45 | Success: []Process{ 46 | { 47 | Name: "nexturl", 48 | Selector: "", 49 | Compare: []Process{ 50 | { 51 | Name: "xxx", 52 | Selector: "span", 53 | Compare: nil, 54 | Index: 0, 55 | Type: "", 56 | Property: "", 57 | PropertyIndex: 0, 58 | PropertyName: "", 59 | Value: "", 60 | Do: nil, 61 | }, 62 | }, 63 | Index: 0, 64 | Type: "put", 65 | Property: "attr", 66 | PropertyIndex: 0, 67 | PropertyName: "", 68 | Value: "", 69 | }, 70 | }, 71 | }, 72 | { 73 | Type: "", 74 | Name: "detail", 75 | Index: 2, 76 | Web: Web{ 77 | Method: "GET", 78 | Header: nil, 79 | Value: []string{"$nexturl"}, 80 | //URL: "test url", 81 | //URI: "test uri", 82 | Selector: "", 83 | Success: []Process{ 84 | { 85 | Name: "", 86 | Selector: "", 87 | Compare: []Process{ 88 | { 89 | Name: "zzzzz", 90 | Selector: "", 91 | Compare: nil, 92 | Index: 0, 93 | Type: "", 94 | Property: "", 95 | PropertyIndex: 0, 96 | PropertyName: "", 97 | Value: "", 98 | Do: nil, 99 | }, 100 | }, 101 | Index: 0, 102 | Type: "", 103 | Property: "", 104 | PropertyIndex: 0, 105 | PropertyName: "", 106 | Value: "", 107 | }, 108 | }, 109 | }, 110 | OnSuccess: "", 111 | OnFailure: "", 112 | }, 113 | { 114 | Type: "", 115 | Name: "getvalue", 116 | Index: 3, 117 | Web: Web{ 118 | //Method: "GET", 119 | //Header: map[string][]string{ 120 | // "cookie": {"1"}, 121 | //}, 122 | //URL: "test url", 123 | //URI: "test uri", 124 | }, 125 | OnSuccess: "", 126 | OnFailure: "", 127 | Success: []Process{}, 128 | }, 129 | }, 130 | }, 131 | }, 132 | wantErr: false, 133 | }, 134 | } 135 | for _, tt := range tests { 136 | t.Run(tt.name, func(t *testing.T) { 137 | if err := SaveRuleToFile(tt.args.file, tt.args.r); (err != nil) != tt.wantErr { 138 | t.Errorf("SaveRuleToFile() error = %v, wantErr %v", err, tt.wantErr) 139 | } 140 | }) 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /rule/skip_type.go: -------------------------------------------------------------------------------- 1 | package rule 2 | 3 | type SkipType string 4 | 5 | const ( 6 | SkipTypeMainPage SkipType = "main" 7 | SkipTypeInput SkipType = "input" 8 | ) 9 | -------------------------------------------------------------------------------- /rule/web.go: -------------------------------------------------------------------------------- 1 | package rule 2 | 3 | type Web struct { 4 | Method string `toml:"method,omitempty"` 5 | Header map[string][]string `toml:"header,omitempty"` 6 | Value []string `toml:"value,omitempty"` 7 | Relative bool `toml:"relative,omitempty"` 8 | Skip []SkipType `toml:"skip,omitempty"` 9 | Selector string `toml:"selector,omitempty"` 10 | Success []Process `toml:"success,omitempty"` 11 | } 12 | -------------------------------------------------------------------------------- /scrape.go: -------------------------------------------------------------------------------- 1 | package scrape 2 | 3 | import ( 4 | "github.com/goextension/log" 5 | 6 | "github.com/javscrape/go-scrape/core" 7 | "github.com/javscrape/go-scrape/internal" 8 | ) 9 | 10 | func init() { 11 | log.Register(core.PrintLogger) 12 | } 13 | 14 | var New = internal.NewScrape 15 | -------------------------------------------------------------------------------- /scrape_test.go: -------------------------------------------------------------------------------- 1 | package scrape 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/goextension/log" 7 | 8 | "github.com/javscrape/go-scrape/config" 9 | "github.com/javscrape/go-scrape/core" 10 | "github.com/javscrape/go-scrape/rule" 11 | ) 12 | 13 | var cfg = config.DefaultConfig() 14 | var scrape core.IScrape 15 | 16 | func init() { 17 | cfg.Debug = true 18 | core.DEBUG = true 19 | //core.InitGlobalLogger(cfg.Debug) 20 | } 21 | 22 | // TestNewScrape ... 23 | func TestNew(t *testing.T) { 24 | scrape = New(ProxyOption("http://127.0.0.1:7890")) 25 | 26 | r, err := rule.LoadRuleFromFile("./templates/javbus.toml") 27 | if err != nil { 28 | t.Fatal(err) 29 | } 30 | log.Debug("TEST", "load rules") 31 | grabs, err := scrape.LoadRules(r) 32 | if err != nil { 33 | t.Fatal(err) 34 | } 35 | 36 | if len(grabs) == 0 { 37 | t.Fatal("empty grabs list") 38 | } 39 | 40 | err = grabs[0].Run("ABW-140") 41 | if err != nil { 42 | t.Fatal(err) 43 | return 44 | } 45 | 46 | m := grabs[0].Value() 47 | m.Range(func(key string, value interface{}) bool { 48 | v := value.(*core.Value) 49 | log.Debug("Test", "print value", "key", key, "value", v) 50 | return true 51 | }) 52 | 53 | } 54 | -------------------------------------------------------------------------------- /templates/javbus.toml: -------------------------------------------------------------------------------- 1 | entrance = "search" 2 | main_page = "https://www.javbus.com/" 3 | 4 | [[actions]] 5 | type = "group" 6 | name = "search" 7 | index = 0 8 | through = false 9 | on_success = "detail" 10 | on_failure = "" 11 | [actions.web] 12 | method = "GET" 13 | relative = true 14 | value = ["/search"] 15 | selector = "div > a.movie-box" 16 | [[actions.web.success]] 17 | name = "nexturl" 18 | trim = true 19 | type = "put" 20 | property = "attr" 21 | property_name = "href" 22 | 23 | [[actions]] 24 | type = "" 25 | name = "detail" 26 | index = 1 27 | through = false 28 | on_success = "" 29 | on_failure = "" 30 | [actions.web] 31 | method = "GET" 32 | relative = false 33 | skip = ["input"] 34 | value = ["$nexturl"] 35 | selector = "body > div.container" 36 | [[actions.web.success]] 37 | selector = "h3" 38 | name = "title" 39 | trim = true 40 | type = "put" 41 | property = "value" 42 | 43 | [[actions.web.success]] 44 | selector = "div.row.movie > div.col-md-3.info > p" 45 | name = "code_no" 46 | trim = true 47 | type = "put" 48 | property = "value" 49 | [[actions.web.success.compare]] 50 | selector = "span" 51 | name = "識別碼" 52 | type = "compare" 53 | property = "text" 54 | 55 | [[actions.web.success]] 56 | selector = "div.row.movie > div.col-md-3.info > p:nth-child(2)" 57 | name = "release" 58 | trim = true 59 | type = "put" 60 | property = "text" 61 | 62 | [[actions.web.success]] 63 | selector = "div.row.movie > div.col-md-3.info > p:nth-child(3)" 64 | name = "length" 65 | trim = true 66 | type = "put" 67 | property = "text" 68 | 69 | [[actions.web.success]] 70 | selector = "div.row.movie > div.col-md-3.info > p:nth-child(4) > a" 71 | name = "studio" 72 | trim = true 73 | type = "put" 74 | property = "value" 75 | 76 | [[actions.web.success]] 77 | selector = "div.row.movie > div.col-md-3.info > p > a" 78 | index = 1 79 | name = "label" 80 | trim = true 81 | type = "put" 82 | property = "value" 83 | 84 | [[actions.web.success]] 85 | selector = "div.row.movie > div.col-md-3.info > p > a" 86 | index = 2 87 | name = "series" 88 | trim = true 89 | type = "put" 90 | property = "value" 91 | 92 | [[actions.web.success]] 93 | selector = "div.row.movie > div.col-md-3.info > p > span.genre > label" 94 | index = 0 95 | name = "genre" 96 | trim = true 97 | type = "put_array" 98 | property = "array" 99 | 100 | [[actions.web.success]] 101 | selector = "div.row.movie > div.col-md-3.info > p" 102 | index = 9 103 | name = "star" 104 | trim = true 105 | type = "put" 106 | property = "array" 107 | 108 | [[actions]] 109 | type = "" 110 | name = "finish" 111 | index = 2 112 | through = false 113 | on_success = "" 114 | on_failure = "" 115 | [actions.web] 116 | method = "GET" 117 | url = "test url" 118 | uri = "test uri" 119 | selector = "" 120 | --------------------------------------------------------------------------------