├── .github ├── FUNDING.yml └── workflows │ ├── golang.yml │ └── golangci.yml ├── .gitignore ├── .golangci.yml ├── LICENSE.md ├── README.md ├── chrome.go ├── domain.go ├── email.go ├── go.mod ├── go.sum ├── scrape.go ├── scrape_test.go └── scraper.go /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [lawzava] 2 | -------------------------------------------------------------------------------- /.github/workflows/golang.yml: -------------------------------------------------------------------------------- 1 | name: Golang 2 | on: [push] 3 | env: 4 | GO111MODULE: on 5 | jobs: 6 | test: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - name: Setup Go for use with actions 10 | uses: actions/setup-go@v2 11 | with: 12 | go-version: 1.21 13 | - uses: actions/checkout@v2 14 | - uses: actions/cache@v2 15 | with: 16 | path: ~/go/pkg/mod 17 | key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} 18 | restore-keys: | 19 | ${{ runner.os }}-go- 20 | - name: Run tests 21 | run: go test -race -covermode atomic -coverprofile=covprofile ./... 22 | - name: Install goveralls 23 | env: 24 | GO111MODULE: off 25 | run: go get github.com/mattn/goveralls 26 | - name: Send coverage 27 | env: 28 | COVERALLS_TOKEN: ${{ secrets.COVERALLS_TOKEN }} 29 | run: goveralls -coverprofile=covprofile -------------------------------------------------------------------------------- /.github/workflows/golangci.yml: -------------------------------------------------------------------------------- 1 | name: golangci 2 | on: [push] 3 | env: 4 | GO111MODULE: on 5 | jobs: 6 | golangci: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v2 10 | - name: golangci-lint 11 | uses: golangci/golangci-lint-action@v2 12 | with: 13 | version: v1.54.2 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | linters-settings: 2 | gocritic: 3 | enabled-tags: 4 | - diagnostic 5 | - experimental 6 | - opinionated 7 | - performance 8 | - style 9 | goimports: 10 | local-prefixes: github.com/lawzava/emailscraper 11 | govet: 12 | check-shadowing: true 13 | misspell: 14 | locale: US 15 | nakedret: 16 | max-func-lines: 2 17 | gofumpt: 18 | extra-rules: true 19 | 20 | linters: 21 | enable-all: true 22 | disable: 23 | - depguard -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright 2021 Lawrence Zava 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![GolangCI](https://github.com/lawzava/emailscraper/workflows/golangci/badge.svg?branch=main) 2 | [![Version](https://img.shields.io/badge/version-v1.2.1-green.svg)](https://github.com/lawzava/emailscraper/releases) 3 | [![Go Report Card](https://goreportcard.com/badge/github.com/lawzava/emailscraper)](https://goreportcard.com/report/github.com/lawzava/emailscraper) 4 | [![Coverage Status](https://coveralls.io/repos/github/lawzava/emailscraper/badge.svg?branch=main)](https://coveralls.io/github/lawzava/emailscraper?branch=main) 5 | [![Go Reference](https://pkg.go.dev/badge/github.com/lawzava/emailscraper.svg)](https://pkg.go.dev/github.com/lawzava/emailscraper) 6 | 7 | # emailscraper 8 | 9 | Minimalistic library to scrape emails from websites. 10 | 11 | Requires chromium or google-chrome available in environment for JS render utilization. 12 | 13 | ## Installation 14 | 15 | ``` 16 | go get github.com/lawzava/emailscraper 17 | ``` 18 | 19 | ## Usage 20 | 21 | ```go 22 | package main 23 | 24 | import ( 25 | "fmt" 26 | 27 | "github.com/lawzava/emailscraper" 28 | ) 29 | 30 | func main() { 31 | s := emailscraper.New(emailscraper.DefaultConfig()) 32 | 33 | extractedEmails, err := s.Scrape("https://lawzava.com") 34 | if err != nil { 35 | panic(err) 36 | } 37 | 38 | fmt.Println(extractedEmails) 39 | } 40 | ``` 41 | -------------------------------------------------------------------------------- /chrome.go: -------------------------------------------------------------------------------- 1 | package emailscraper 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "time" 7 | 8 | "github.com/chromedp/chromedp" 9 | "github.com/gocolly/colly/v2" 10 | ) 11 | 12 | func initiateScrapingFromChrome(response *colly.Response, timeout int) error { 13 | opts := []chromedp.ExecAllocatorOption{ 14 | //nolint:lll // allow longer line here 15 | chromedp.UserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3830.0 Safari/537.36"), 16 | //nolint:gomnd // allow magic number here 17 | chromedp.WindowSize(1920, 1080), 18 | chromedp.NoFirstRun, 19 | chromedp.Headless, 20 | chromedp.DisableGPU, 21 | } 22 | 23 | ctx, cancel := chromedp.NewExecAllocator(context.Background(), opts...) 24 | defer cancel() 25 | 26 | ctx, cancel = chromedp.NewContext(ctx) 27 | defer cancel() 28 | 29 | if timeout > 0 { 30 | ctx, cancel = context.WithTimeout(ctx, time.Duration(timeout)*time.Second) 31 | defer cancel() 32 | } 33 | 34 | var res string 35 | if err := chromedp.Run(ctx, chromedp.Navigate(response.Request.URL.String()), 36 | chromedp.InnerHTML("html", &res), // Scrape whole rendered page 37 | ); err != nil { 38 | return fmt.Errorf("executing chromedp: %w", err) 39 | } 40 | 41 | response.Body = []byte(res) 42 | 43 | return nil 44 | } 45 | -------------------------------------------------------------------------------- /domain.go: -------------------------------------------------------------------------------- 1 | package emailscraper 2 | 3 | import ( 4 | "fmt" 5 | "net/url" 6 | "strings" 7 | ) 8 | 9 | // Trim the input domain to whitelist root. 10 | func prepareAllowedDomain(requestURL string) ([]string, error) { 11 | requestURL = "https://" + trimProtocol(requestURL) 12 | 13 | u, err := url.ParseRequestURI(requestURL) 14 | if err != nil { 15 | return nil, fmt.Errorf("failed to parse request URI: %w", err) 16 | } 17 | 18 | domain := strings.TrimPrefix(u.Hostname(), "www.") 19 | 20 | return []string{ 21 | domain, 22 | "www." + domain, 23 | "http://" + domain, 24 | "https://" + domain, 25 | "http://www." + domain, 26 | "https://www." + domain, 27 | }, nil 28 | } 29 | 30 | func trimProtocol(requestURL string) string { 31 | return strings.TrimPrefix(strings.TrimPrefix(requestURL, "http://"), "https://") 32 | } 33 | -------------------------------------------------------------------------------- /email.go: -------------------------------------------------------------------------------- 1 | package emailscraper 2 | 3 | import ( 4 | "bytes" 5 | "regexp" 6 | "strconv" 7 | "strings" 8 | "sync" 9 | 10 | "github.com/lawzava/go-tld" 11 | ) 12 | 13 | type emails struct { 14 | emails []string 15 | m sync.Mutex 16 | } 17 | 18 | func (s *emails) add(email string) { 19 | if !isValidEmail(email) { 20 | return 21 | } 22 | 23 | // check for already existing emails 24 | s.m.Lock() 25 | defer s.m.Unlock() 26 | 27 | for _, existingEmail := range s.emails { 28 | if existingEmail == email { 29 | return 30 | } 31 | } 32 | 33 | s.emails = append(s.emails, email) 34 | } 35 | 36 | // Initialize once. 37 | var ( 38 | reg = regexp.MustCompile(`([a-zA-Z0-9._-]+@([a-zA-Z0-9_-]+\.)+[a-zA-Z0-9_-]+)`) 39 | 40 | obfuscatedSeparators = regexp.MustCompile(`.(AT|at|ETA).`) 41 | ) 42 | 43 | // Parse any *@*.* string and append to the slice. 44 | func (s *emails) parseEmails(body []byte) { 45 | res := reg.FindAll(body, -1) 46 | 47 | for _, r := range res { 48 | s.add(string(r)) 49 | } 50 | 51 | body = obfuscatedSeparators.ReplaceAll(body, []byte("@")) 52 | 53 | res = reg.FindAll(body, -1) 54 | for _, r := range res { 55 | s.add(string(r)) 56 | } 57 | } 58 | 59 | func (s *emails) parseCloudflareEmail(cloudflareEncodedEmail string) { 60 | decodedEmail := decodeCloudflareEmail(cloudflareEncodedEmail) 61 | email := reg.FindString(decodedEmail) 62 | 63 | s.add(email) 64 | } 65 | 66 | func decodeCloudflareEmail(email string) string { 67 | var buffer bytes.Buffer 68 | 69 | r, _ := strconv.ParseInt(email[0:2], 16, 0) 70 | 71 | for n := 4; n < len(email)+2; n += 2 { 72 | i, _ := strconv.ParseInt(email[n-2:n], 16, 0) 73 | c := i ^ r 74 | 75 | buffer.WriteRune(rune(c)) 76 | } 77 | 78 | return buffer.String() 79 | } 80 | 81 | // Check if email looks valid. 82 | func isValidEmail(email string) bool { 83 | if email == "" { 84 | return false 85 | } 86 | 87 | split := strings.Split(email, ".") 88 | 89 | //nolint:gomnd // allow magic number here 90 | if len(split) < 2 { 91 | return false 92 | } 93 | 94 | ending := split[len(split)-1] 95 | 96 | //nolint:gomnd // allow magic number here 97 | if len(ending) < 2 { 98 | return false 99 | } 100 | 101 | // check if TLD name actually exists and is not some image ending 102 | if !tld.IsValid(ending) { 103 | return false 104 | } 105 | 106 | if _, err := strconv.Atoi(ending); err == nil { 107 | return false 108 | } 109 | 110 | return true 111 | } 112 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/lawzava/emailscraper 2 | 3 | go 1.21 4 | 5 | require ( 6 | github.com/chromedp/chromedp v0.9.2 7 | github.com/gocolly/colly/v2 v2.1.0 8 | github.com/lawzava/go-tld v1.1.0 9 | ) 10 | 11 | require ( 12 | github.com/PuerkitoBio/goquery v1.8.1 // indirect 13 | github.com/andybalholm/cascadia v1.3.2 // indirect 14 | github.com/antchfx/htmlquery v1.3.0 // indirect 15 | github.com/antchfx/xmlquery v1.3.18 // indirect 16 | github.com/antchfx/xpath v1.2.4 // indirect 17 | github.com/chromedp/cdproto v0.0.0-20231007061347-18b01cd81617 // indirect 18 | github.com/chromedp/sysutil v1.0.0 // indirect 19 | github.com/gobwas/glob v0.2.3 // indirect 20 | github.com/gobwas/httphead v0.1.0 // indirect 21 | github.com/gobwas/pool v0.2.1 // indirect 22 | github.com/gobwas/ws v1.3.0 // indirect 23 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect 24 | github.com/golang/protobuf v1.5.3 // indirect 25 | github.com/josharian/intern v1.0.0 // indirect 26 | github.com/kennygrant/sanitize v1.2.4 // indirect 27 | github.com/mailru/easyjson v0.7.7 // indirect 28 | github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d // indirect 29 | github.com/temoto/robotstxt v1.1.2 // indirect 30 | golang.org/x/net v0.16.0 // indirect 31 | golang.org/x/sys v0.13.0 // indirect 32 | golang.org/x/text v0.13.0 // indirect 33 | google.golang.org/appengine v1.6.8 // indirect 34 | google.golang.org/protobuf v1.31.0 // indirect 35 | ) 36 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= 2 | github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= 3 | github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc= 4 | github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAcwmWM= 5 | github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ= 6 | github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= 7 | github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY= 8 | github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= 9 | github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss= 10 | github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU= 11 | github.com/antchfx/htmlquery v1.2.3/go.mod h1:B0ABL+F5irhhMWg54ymEZinzMSi0Kt3I2if0BLYa3V0= 12 | github.com/antchfx/htmlquery v1.3.0 h1:5I5yNFOVI+egyia5F2s/5Do2nFWxJz41Tr3DyfKD25E= 13 | github.com/antchfx/htmlquery v1.3.0/go.mod h1:zKPDVTMhfOmcwxheXUsx4rKJy8KEY/PU6eXr/2SebQ8= 14 | github.com/antchfx/xmlquery v1.2.4/go.mod h1:KQQuESaxSlqugE2ZBcM/qn+ebIpt+d+4Xx7YcSGAIrM= 15 | github.com/antchfx/xmlquery v1.3.18 h1:FSQ3wMuphnPPGJOFhvc+cRQ2CT/rUj4cyQXkJcjOwz0= 16 | github.com/antchfx/xmlquery v1.3.18/go.mod h1:Afkq4JIeXut75taLSuI31ISJ/zeq+3jG7TunF7noreA= 17 | github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= 18 | github.com/antchfx/xpath v1.1.8/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= 19 | github.com/antchfx/xpath v1.2.3/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs= 20 | github.com/antchfx/xpath v1.2.4 h1:dW1HB/JxKvGtJ9WyVGJ0sIoEcqftV3SqIstujI+B9XY= 21 | github.com/antchfx/xpath v1.2.4/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs= 22 | github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= 23 | github.com/chromedp/cdproto v0.0.0-20230802225258-3cf4e6d46a89/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs= 24 | github.com/chromedp/cdproto v0.0.0-20231007061347-18b01cd81617 h1:/5dwcyi5WOawM1Iz6MjrYqB90TRIdZv3O0fVHEJb86w= 25 | github.com/chromedp/cdproto v0.0.0-20231007061347-18b01cd81617/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs= 26 | github.com/chromedp/chromedp v0.9.2 h1:dKtNz4kApb06KuSXoTQIyUC2TrA0fhGDwNZf3bcgfKw= 27 | github.com/chromedp/chromedp v0.9.2/go.mod h1:LkSXJKONWTCHAfQasKFUZI+mxqS4tZqhmtGzzhLsnLs= 28 | github.com/chromedp/sysutil v1.0.0 h1:+ZxhTpfpZlmchB58ih/LBHX52ky7w2VhQVKQMucy3Ic= 29 | github.com/chromedp/sysutil v1.0.0/go.mod h1:kgWmDdq8fTzXYcKIBqIYvRRTnYb9aNS9moAV0xufSww= 30 | github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= 31 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 32 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 33 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 34 | github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= 35 | github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= 36 | github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= 37 | github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= 38 | github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU= 39 | github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM= 40 | github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og= 41 | github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= 42 | github.com/gobwas/ws v1.2.1/go.mod h1:hRKAFb8wOxFROYNsT1bqfWnhX+b5MFeJM9r2ZSwg/KY= 43 | github.com/gobwas/ws v1.3.0 h1:sbeU3Y4Qzlb+MOzIe6mQGf7QR4Hkv6ZD0qhGkBFL2O0= 44 | github.com/gobwas/ws v1.3.0/go.mod h1:hRKAFb8wOxFROYNsT1bqfWnhX+b5MFeJM9r2ZSwg/KY= 45 | github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA= 46 | github.com/gocolly/colly/v2 v2.1.0 h1:k0DuZkDoCsx51bKpRJNEmcxcp+W5N8ziuwGaSDuFoGs= 47 | github.com/gocolly/colly/v2 v2.1.0/go.mod h1:I2MuhsLjQ+Ex+IzK3afNS8/1qP3AedHOusRPcRdC5o0= 48 | github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= 49 | github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= 50 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= 51 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= 52 | github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= 53 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 54 | github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 55 | github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 56 | github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= 57 | github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= 58 | github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= 59 | github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= 60 | github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= 61 | github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= 62 | github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= 63 | github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= 64 | github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= 65 | github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= 66 | github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= 67 | github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= 68 | github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= 69 | github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= 70 | github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 71 | github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= 72 | github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 73 | github.com/jawher/mow.cli v1.1.0/go.mod h1:aNaQlc7ozF3vw6IJ2dHjp2ZFiA4ozMIYY6PyuRJwlUg= 74 | github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= 75 | github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= 76 | github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o= 77 | github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak= 78 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 79 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 80 | github.com/lawzava/go-tld v1.0.1 h1:MiXmEKJCgt75smaojNfB6WbX0eaO+Y2XhKu3WfR9Ibo= 81 | github.com/lawzava/go-tld v1.0.1/go.mod h1:6kXrWOB9J8f2ot4FMObf/kJHeZmuLB5O58/DGofJl3k= 82 | github.com/lawzava/go-tld v1.1.0 h1:fjtGrvEjsumnm+beoocW71RPl6OQfQr6R5wESvP2kow= 83 | github.com/lawzava/go-tld v1.1.0/go.mod h1:/mIjyMjSDHCsk9bFH8Dg5Yq0+pysx1m46RnqjGqBxwI= 84 | github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 h1:6Yzfa6GP0rIo/kULo2bwGEkFvCePZ3qHDDTC3/J9Swo= 85 | github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs= 86 | github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= 87 | github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= 88 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= 89 | github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde h1:x0TT0RDC7UhAVbbWWBzr41ElhJx5tXPWkIHA2HWPRuw= 90 | github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde/go.mod h1:nZgzbfBr3hhjoZnS66nKrHmduYNpc34ny7RK4z5/HM0= 91 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 92 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 93 | github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= 94 | github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU= 95 | github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d h1:hrujxIzL1woJ7AwssoOcM/tq5JjjG2yYOc8odClEiXA= 96 | github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU= 97 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 98 | github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= 99 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 100 | github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= 101 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 102 | github.com/temoto/robotstxt v1.1.1/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo= 103 | github.com/temoto/robotstxt v1.1.2 h1:W2pOjSJ6SWvldyEuiFXNxz3xZ8aiWX5LbfDiOFd7Fxg= 104 | github.com/temoto/robotstxt v1.1.2/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo= 105 | github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= 106 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 107 | golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 108 | golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= 109 | golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 110 | golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= 111 | golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= 112 | golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= 113 | golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= 114 | golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= 115 | golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 116 | golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 117 | golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 118 | golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 119 | golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 120 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 121 | golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= 122 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 123 | golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 124 | golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= 125 | golang.org/x/net v0.0.0-20200602114024-627f9648deb9/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= 126 | golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= 127 | golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= 128 | golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= 129 | golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws= 130 | golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= 131 | golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= 132 | golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= 133 | golang.org/x/net v0.16.0 h1:7eBu7KsSvFDtSXUIDbh3aqlK4DPsZ1rByC8PFfBThos= 134 | golang.org/x/net v0.16.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= 135 | golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= 136 | golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 137 | golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 138 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 139 | golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 140 | golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 141 | golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 142 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 143 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 144 | golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 145 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 146 | golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 147 | golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 148 | golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 149 | golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 150 | golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 151 | golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 152 | golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 153 | golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 154 | golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= 155 | golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 156 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= 157 | golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= 158 | golang.org/x/term v0.4.0/go.mod h1:9P2UbLfCdcvo3p/nzKvsmas4TnlujnuoV9hGgYzW1lQ= 159 | golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= 160 | golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY= 161 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 162 | golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= 163 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 164 | golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 165 | golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= 166 | golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= 167 | golang.org/x/text v0.6.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= 168 | golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= 169 | golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= 170 | golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= 171 | golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= 172 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 173 | golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 174 | golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= 175 | golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= 176 | golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= 177 | golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= 178 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 179 | golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= 180 | golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= 181 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 182 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= 183 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 184 | google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= 185 | google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= 186 | google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= 187 | google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAsM= 188 | google.golang.org/appengine v1.6.8/go.mod h1:1jJ3jBArFh5pcgW8gCtRJnepW8FzD1V44FJffLiz/Ds= 189 | google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= 190 | google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= 191 | google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= 192 | google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= 193 | google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= 194 | google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= 195 | google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= 196 | google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= 197 | google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= 198 | google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= 199 | google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= 200 | google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= 201 | google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= 202 | google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= 203 | google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4= 204 | google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= 205 | google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= 206 | google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= 207 | google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= 208 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 209 | gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 210 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= 211 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 212 | honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= 213 | honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= 214 | -------------------------------------------------------------------------------- /scrape.go: -------------------------------------------------------------------------------- 1 | package emailscraper 2 | 3 | import ( 4 | "github.com/gocolly/colly/v2" 5 | ) 6 | 7 | // Scrape is responsible for main scraping logic. 8 | func (s *Scraper) Scrape(url string) ([]string, error) { 9 | url = getWebsite(url, true) 10 | 11 | var emailsSet emails 12 | 13 | collector := s.collector 14 | 15 | if !s.cfg.FollowExternalLinks { 16 | allowedDomains, err := prepareAllowedDomain(url) 17 | if err != nil { 18 | return nil, err 19 | } 20 | 21 | collector.AllowedDomains = allowedDomains 22 | } 23 | 24 | // Parse emails on each downloaded page 25 | collector.OnScraped(func(response *colly.Response) { 26 | emailsSet.parseEmails(response.Body) 27 | }) 28 | 29 | // cloudflare encoded email support 30 | collector.OnHTML("span[data-cfemail]", func(el *colly.HTMLElement) { 31 | emailsSet.parseCloudflareEmail(el.Attr("data-cfemail")) 32 | }) 33 | 34 | // Start the scrape 35 | if err := collector.Visit(url); err != nil { 36 | s.log("error while visiting secure domain: ", url, err.Error()) 37 | } 38 | 39 | collector.Wait() // Wait for concurrent scrapes to finish 40 | 41 | if emailsSet.emails == nil || len(emailsSet.emails) == 0 { 42 | // Start the scrape on insecure url 43 | if err := collector.Visit(getWebsite(url, false)); err != nil { 44 | s.log("error while visiting insecure domain: ", err.Error()) 45 | } 46 | 47 | collector.Wait() // Wait for concurrent scrapes to finish 48 | } 49 | 50 | return emailsSet.emails, nil 51 | } 52 | 53 | func getWebsite(url string, secure bool) string { 54 | url = trimProtocol(url) 55 | 56 | if secure { 57 | return "https://" + url 58 | } 59 | 60 | return "http://" + url 61 | } 62 | -------------------------------------------------------------------------------- /scrape_test.go: -------------------------------------------------------------------------------- 1 | package emailscraper_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/lawzava/emailscraper" 7 | ) 8 | 9 | func TestScrape(t *testing.T) { 10 | t.Parallel() 11 | 12 | cfg := emailscraper.DefaultConfig() 13 | cfg.Debug = true 14 | cfg.MaxDepth = 1 15 | 16 | scraper := emailscraper.New(cfg) 17 | 18 | testCases := []struct { 19 | name string 20 | url string 21 | mustContainEmail string 22 | }{ 23 | {"cloudflare protected", "https://lawzava.com/contact/", "contact@lawzava.com"}, 24 | } 25 | 26 | for _, testCase := range testCases { 27 | emails, err := scraper.Scrape(testCase.url) 28 | if err != nil { 29 | t.Fatal(err) 30 | } 31 | 32 | var contains bool 33 | 34 | for _, email := range emails { 35 | if email == testCase.mustContainEmail { 36 | contains = true 37 | 38 | break 39 | } 40 | } 41 | 42 | if !contains { 43 | t.Error("email missing: ", emails) 44 | t.Fail() 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /scraper.go: -------------------------------------------------------------------------------- 1 | package emailscraper 2 | 3 | import ( 4 | "errors" 5 | "log" 6 | "os" 7 | 8 | "github.com/gocolly/colly/v2" 9 | "github.com/gocolly/colly/v2/debug" 10 | ) 11 | 12 | // Scraper config. 13 | type Scraper struct { 14 | cfg Config 15 | 16 | collector *colly.Collector 17 | } 18 | 19 | // Config for the scraper. 20 | type Config struct { 21 | MaxDepth int 22 | Timeout int 23 | 24 | Recursively bool 25 | Async bool 26 | EnableJavascript bool 27 | FollowExternalLinks bool 28 | Debug bool 29 | } 30 | 31 | // DefaultConfig defines default config with sane defaults for most use cases. 32 | func DefaultConfig() Config { 33 | //nolint:gomnd // allow for default config 34 | return Config{ 35 | MaxDepth: 3, 36 | Timeout: 5, 37 | Recursively: true, 38 | Async: true, 39 | EnableJavascript: true, 40 | FollowExternalLinks: false, 41 | Debug: false, 42 | } 43 | } 44 | 45 | // New initiates new scraper entity. 46 | func New(cfg Config) *Scraper { 47 | // Initiate colly 48 | collector := colly.NewCollector( 49 | //nolint:lll // allow long line for user agent 50 | colly.UserAgent("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"), 51 | ) 52 | 53 | collector.Async = cfg.Async 54 | collector.MaxDepth = cfg.MaxDepth 55 | 56 | if cfg.Debug { 57 | collector.SetDebugger(&debug.LogDebugger{ 58 | Output: os.Stderr, 59 | Prefix: "", 60 | Flag: log.LstdFlags, 61 | }) 62 | } 63 | 64 | scraper := Scraper{ 65 | cfg: cfg, 66 | collector: collector, 67 | } 68 | 69 | if cfg.EnableJavascript { 70 | scraper.collector.OnResponse(func(response *colly.Response) { 71 | if err := initiateScrapingFromChrome(response, cfg.Timeout); err != nil { 72 | scraper.log(err) 73 | 74 | return 75 | } 76 | }) 77 | } 78 | 79 | if cfg.Recursively { 80 | // Find and visit all links 81 | scraper.collector.OnHTML("a[href]", func(el *colly.HTMLElement) { 82 | scraper.log("visiting: ", el.Attr("href")) 83 | if err := el.Request.Visit(el.Attr("href")); err != nil { 84 | // Ignore already visited error, this appears too often 85 | if !errors.Is(err, colly.ErrAlreadyVisited) { 86 | scraper.log("error while linking: ", err.Error()) 87 | } 88 | } 89 | }) 90 | } 91 | 92 | return &scraper 93 | } 94 | 95 | func (s *Scraper) log(v ...interface{}) { 96 | if s.cfg.Debug { 97 | log.Println(v...) 98 | } 99 | } 100 | --------------------------------------------------------------------------------