├── .github
└── workflows
│ └── audit.yml
├── LICENSE
├── README.md
├── example_test.go
├── go.mod
├── go.sum
├── limit.go
├── screenshot.png
├── search.go
└── search_test.go
/.github/workflows/audit.yml:
--------------------------------------------------------------------------------
1 | name: GoogleDOMCheck
2 |
3 | on:
4 | push:
5 | branches: [master]
6 | pull_request:
7 | branches: [master]
8 | schedule:
9 | - cron: '0 0,12 * * *'
10 |
11 |
12 | jobs:
13 |
14 | build:
15 | runs-on: ubuntu-latest
16 | steps:
17 | - uses: actions/checkout@v3
18 |
19 | - name: Set up Go
20 | uses: actions/setup-go@v3
21 | with:
22 | go-version: 1.18
23 |
24 | - name: Build
25 | run: go build -v ./...
26 |
27 | - name: Test
28 | run: go test -v ./...
29 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020-22 PJ Engineering and Business Solutions Pty. Ltd.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | ⭐ the project to show your appreciation. :arrow_upper_right:
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 | Quickly scrape Google Search Results.
16 |
17 | ## Example
18 |
19 | ```go
20 | import "fmt"
21 | import "github.com/rocketlaunchr/google-search"
22 |
23 | func main() {
24 | fmt.Println(googlesearch.Search(nil, "cars for sale in Toronto, Canada"))
25 | }
26 | ```
27 |
28 | ## Results:
29 |
30 | ```go
31 | ([]googlesearch.Result) (len=11 cap=16) {
32 | (googlesearch.Result) {
33 | Rank: (int) 1,
34 | URL: (string) (len=42) "https://www.autotrader.ca/cars/on/toronto/",
35 | Title: (string) (len=51) "New & Used Cars for sale in Toronto | autoTRADER.ca",
36 | Description: (string) ""
37 | },
38 | (googlesearch.Result) {
39 | Rank: (int) 2,
40 | URL: (string) (len=42) "https://www.autotrader.ca/cars/on/toronto/",
41 | Title: (string) (len=51) "New & Used Cars for sale in Toronto | autoTRADER.ca",
42 | Description: (string) ""
43 | },
44 | (googlesearch.Result) {
45 | Rank: (int) 3,
46 | URL: (string) (len=50) "https://www.carpages.ca/ontario/toronto/used-cars/",
47 | Title: (string) (len=31) "Used Cars Toronto | Carpages.ca",
48 | Description: (string) (len=337) "13518 results - Used Cars, Trucks and SUVs for Sale in Toronto, ON. 2009 Acura TL. AWD, Navi, Camera, Leather, 3/Y warranty availabl. 2010 Chevrolet Traverse. 2LT. 2017 Jaguar F-PACE. 35T-AWD-NAVI-CAMERA-PANO ROOF-CPO WARRANTY. 2005 Audi A6. $2,495. 2007 Audi A4. 2.0T. 2012 Audi Q7. 3.0L Premium Plus. 2005 Ford F-250. 2010 Nissan Cube."
49 | }
50 | }
51 | ```
52 |
53 | ## :warning: Warning
54 |
55 | The implementation relies on Google's search page DOM being constant. From time to time, Google changes their DOM and thus breaks the implementation.
56 |
57 | In the event it changes, this package will be updated as soon as possible.
58 |
59 | Also note, that if you call this function too quickly, Google detects that it is being scraped and produces a [recaptcha](https://www.google.com/recaptcha/intro/v3.html) which interferes with the scraping. **Don't call it in quick succession.** It may take some time before Google unblocks you.
60 |
61 | **Always** use the built-in [rate-limiter](https://godoc.org/github.com/rocketlaunchr/google-search#RateLimit) set to reasonable settings.
62 |
63 |
64 | HTTP STATUS CODE: 429 — Too Many Requests
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
About this page
75 |
76 | Our systems have detected unusual traffic from your computer network. This page checks to see if it's really you sending the requests, and not a robot.
Why did this happen?
77 |
78 |
79 | This page appears when Google automatically detects requests coming from your computer network which appear to be in violation of the
Terms of Service . The block will expire shortly after those requests stop. In the meantime, solving the above CAPTCHA will let you continue to use our services.
This traffic may have been sent by malicious software, a browser plug-in, or a script that sends automated requests. If you share your network connection, ask your administrator for help — a different computer using the same IP address may be responsible.
Learn more Sometimes you may be asked to solve the CAPTCHA if you are using advanced terms that robots are known to use, or sending requests very quickly.
80 |
81 |
82 | IP address: xxx.xx.xxx.xx
Time: 2021-01-13T05:27:34Z
URL: https://www.google.com/search?q=Hello+World&hl=en&num=20
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 | ## Credits
93 |
94 | Special thanks to [Edmund Martin](https://edmundmartin.com/scraping-google-with-golang/).
95 |
96 |
97 | Other useful packages
98 | ------------
99 |
100 | - [awesome-svelte](https://github.com/rocketlaunchr/awesome-svelte) - Resources for killing react.js
101 | - [dataframe-go](https://github.com/rocketlaunchr/dataframe-go) - Statistics and data manipulation
102 | - [dbq](https://github.com/rocketlaunchr/dbq) - Zero boilerplate database operations for Go
103 | - [electron-alert](https://github.com/rocketlaunchr/electron-alert) - SweetAlert2 for Electron Applications
104 | - [igo](https://github.com/rocketlaunchr/igo) - A Go transpiler with cool new syntax such as fordefer (defer for for-loops)
105 | - [mysql-go](https://github.com/rocketlaunchr/mysql-go) - Properly cancel slow MySQL queries
106 | - [react](https://github.com/rocketlaunchr/react) - Build front end applications using Go
107 | - [remember-go](https://github.com/rocketlaunchr/remember-go) - Cache slow database queries
108 | - [showerglass](https://github.com/rocketlaunchr/showerglass) - A soothing face filter for privacy
109 | - [testing-go](https://github.com/rocketlaunchr/testing-go) - Testing framework for unit testing
--------------------------------------------------------------------------------
/example_test.go:
--------------------------------------------------------------------------------
1 | package googlesearch
2 |
3 | import (
4 | "fmt"
5 | "strings"
6 | )
7 |
8 | func ExampleSearch() {
9 |
10 | opt := SearchOptions{
11 | CountryCode: "au",
12 | }
13 |
14 | //lint:ignore SA1012 ignore this bare essentials by passing nil for context and removing context package (despite not being idiomatic go).
15 | serp, err := Search(nil, "First Aid Course Australia Wide First Aid", opt)
16 |
17 | if err != nil {
18 | fmt.Print(err.Error())
19 | }
20 |
21 | for _, result := range serp {
22 | if strings.Contains(result.URL, "australiawidefirstaid.com.au") {
23 | fmt.Println("Australia Wide First Aid (https://www.australiawidefirstaid.com.au/) found in the serp")
24 | break
25 | }
26 | }
27 |
28 | // Output: Australia Wide First Aid (https://www.australiawidefirstaid.com.au/) found in the serp
29 |
30 | }
31 |
32 | /*
33 | Example of how to set the useragent
34 | */
35 | func ExampleUserAgent() {
36 |
37 | // whatismybrowser.com maintains a database of UserAgents
38 | // https://www.whatismybrowser.com/guides/the-latest-user-agent/chrome
39 |
40 | opt := SearchOptions{
41 | CountryCode: "au",
42 | UserAgent: "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36",
43 | }
44 |
45 | //lint:ignore SA1012 ignore this bare essentials by passing nil for context and removing context package (despite not being idiomatic go).
46 | serp, err := Search(nil, "First Aid Course Australia Wide First Aid", opt)
47 |
48 | if err != nil {
49 | fmt.Print(err.Error())
50 | }
51 |
52 | for _, result := range serp {
53 | if strings.Contains(result.URL, "australiawidefirstaid.com.au") {
54 | fmt.Println("Australia Wide First Aid (https://www.australiawidefirstaid.com.au/) found in the serp")
55 | break
56 | }
57 | }
58 |
59 | // Output: Australia Wide First Aid (https://www.australiawidefirstaid.com.au/) found in the serp
60 |
61 | }
62 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/rocketlaunchr/google-search
2 |
3 | go 1.16
4 |
5 | require (
6 | github.com/gocolly/colly/v2 v2.1.0
7 | golang.org/x/time v0.3.0
8 | )
9 |
10 | require (
11 | github.com/PuerkitoBio/goquery v1.8.1 // indirect
12 | github.com/andybalholm/cascadia v1.3.2 // indirect
13 | github.com/antchfx/htmlquery v1.3.0 // indirect
14 | github.com/antchfx/xmlquery v1.3.15 // indirect
15 | github.com/antchfx/xpath v1.2.4 // indirect
16 | github.com/golang/protobuf v1.5.3 // indirect
17 | github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d // indirect
18 | github.com/temoto/robotstxt v1.1.2 // indirect
19 | golang.org/x/net v0.10.0 // indirect
20 | google.golang.org/appengine v1.6.7 // indirect
21 | google.golang.org/protobuf v1.30.0 // indirect
22 | )
23 |
--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
2 | github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
3 | github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
4 | github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAcwmWM=
5 | github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ=
6 | github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
7 | github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY=
8 | github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
9 | github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
10 | github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
11 | github.com/antchfx/htmlquery v1.2.3/go.mod h1:B0ABL+F5irhhMWg54ymEZinzMSi0Kt3I2if0BLYa3V0=
12 | github.com/antchfx/htmlquery v1.3.0 h1:5I5yNFOVI+egyia5F2s/5Do2nFWxJz41Tr3DyfKD25E=
13 | github.com/antchfx/htmlquery v1.3.0/go.mod h1:zKPDVTMhfOmcwxheXUsx4rKJy8KEY/PU6eXr/2SebQ8=
14 | github.com/antchfx/xmlquery v1.2.4/go.mod h1:KQQuESaxSlqugE2ZBcM/qn+ebIpt+d+4Xx7YcSGAIrM=
15 | github.com/antchfx/xmlquery v1.3.15 h1:aJConNMi1sMha5G8YJoAIF5P+H+qG1L73bSItWHo8Tw=
16 | github.com/antchfx/xmlquery v1.3.15/go.mod h1:zMDv5tIGjOxY/JCNNinnle7V/EwthZ5IT8eeCGJKRWA=
17 | github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
18 | github.com/antchfx/xpath v1.1.8/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
19 | github.com/antchfx/xpath v1.2.3/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
20 | github.com/antchfx/xpath v1.2.4 h1:dW1HB/JxKvGtJ9WyVGJ0sIoEcqftV3SqIstujI+B9XY=
21 | github.com/antchfx/xpath v1.2.4/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
22 | github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
23 | github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
24 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
25 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
26 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
27 | github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
28 | github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
29 | github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
30 | github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
31 | github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI=
32 | github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA=
33 | github.com/gocolly/colly/v2 v2.1.0 h1:k0DuZkDoCsx51bKpRJNEmcxcp+W5N8ziuwGaSDuFoGs=
34 | github.com/gocolly/colly/v2 v2.1.0/go.mod h1:I2MuhsLjQ+Ex+IzK3afNS8/1qP3AedHOusRPcRdC5o0=
35 | github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
36 | github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
37 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE=
38 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
39 | github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
40 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
41 | github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
42 | github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
43 | github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
44 | github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
45 | github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
46 | github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
47 | github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
48 | github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
49 | github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
50 | github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
51 | github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
52 | github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
53 | github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
54 | github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
55 | github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
56 | github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
57 | github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
58 | github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
59 | github.com/jawher/mow.cli v1.1.0/go.mod h1:aNaQlc7ozF3vw6IJ2dHjp2ZFiA4ozMIYY6PyuRJwlUg=
60 | github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o=
61 | github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak=
62 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
63 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
64 | github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
65 | github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
66 | github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d h1:hrujxIzL1woJ7AwssoOcM/tq5JjjG2yYOc8odClEiXA=
67 | github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
68 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
69 | github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
70 | github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
71 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
72 | github.com/temoto/robotstxt v1.1.1/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo=
73 | github.com/temoto/robotstxt v1.1.2 h1:W2pOjSJ6SWvldyEuiFXNxz3xZ8aiWX5LbfDiOFd7Fxg=
74 | github.com/temoto/robotstxt v1.1.2/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo=
75 | github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
76 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
77 | golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
78 | golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
79 | golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
80 | golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
81 | golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
82 | golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
83 | golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
84 | golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
85 | golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
86 | golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
87 | golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
88 | golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
89 | golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
90 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
91 | golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
92 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
93 | golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
94 | golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
95 | golang.org/x/net v0.0.0-20200602114024-627f9648deb9/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
96 | golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
97 | golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
98 | golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
99 | golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws=
100 | golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
101 | golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
102 | golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
103 | golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M=
104 | golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
105 | golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
106 | golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
107 | golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
108 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
109 | golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
110 | golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
111 | golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
112 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
113 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
114 | golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
115 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
116 | golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
117 | golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
118 | golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
119 | golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
120 | golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
121 | golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
122 | golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
123 | golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
124 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
125 | golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
126 | golang.org/x/term v0.4.0/go.mod h1:9P2UbLfCdcvo3p/nzKvsmas4TnlujnuoV9hGgYzW1lQ=
127 | golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
128 | golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY=
129 | golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
130 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
131 | golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
132 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
133 | golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
134 | golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
135 | golang.org/x/text v0.6.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
136 | golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
137 | golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE=
138 | golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
139 | golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4=
140 | golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
141 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
142 | golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
143 | golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
144 | golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
145 | golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
146 | golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
147 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
148 | golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
149 | golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
150 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
151 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
152 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
153 | google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
154 | google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
155 | google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
156 | google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c=
157 | google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
158 | google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
159 | google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
160 | google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
161 | google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
162 | google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
163 | google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
164 | google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
165 | google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
166 | google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
167 | google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
168 | google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
169 | google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
170 | google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
171 | google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
172 | google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4=
173 | google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
174 | google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
175 | google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng=
176 | google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
177 | honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
178 | honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
179 |
--------------------------------------------------------------------------------
/limit.go:
--------------------------------------------------------------------------------
1 | // Copyright 2020-22 PJ Engineering and Business Solutions Pty. Ltd. All rights reserved.
2 |
3 | package googlesearch
4 |
5 | import (
6 | "errors"
7 |
8 | "golang.org/x/time/rate"
9 | )
10 |
11 | // ErrBlocked indicates that Google has detected that you were scraping and temporarily blocked you.
12 | // The duration of the block is unspecified.
13 | //
14 | // See: https://github.com/rocketlaunchr/google-search#warning-warning
15 | var ErrBlocked = errors.New("google block")
16 |
17 | // RateLimit sets a global limit to how many requests to Google Search can be made in a given time interval.
18 | // The default is unlimited (but obviously Google Search will block you temporarily if you do too many
19 | // calls too quickly).
20 | //
21 | // See: https://godoc.org/golang.org/x/time/rate#NewLimiter
22 | var RateLimit = rate.NewLimiter(rate.Inf, 0)
23 |
--------------------------------------------------------------------------------
/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rocketlaunchr/google-search/30c2a23ad5945b239832620056b409d4a0f250cf/screenshot.png
--------------------------------------------------------------------------------
/search.go:
--------------------------------------------------------------------------------
1 | // Copyright 2020-22 PJ Engineering and Business Solutions Pty. Ltd. All rights reserved.
2 |
3 | package googlesearch
4 |
5 | import (
6 | "context"
7 | "fmt"
8 | "net/url"
9 | "strconv"
10 | "strings"
11 |
12 | "github.com/gocolly/colly/v2"
13 | "github.com/gocolly/colly/v2/proxy"
14 | "github.com/gocolly/colly/v2/queue"
15 | )
16 |
17 | // Result represents a single result from Google Search.
18 | type Result struct {
19 |
20 | // Rank is the order number of the search result.
21 | Rank int `json:"rank"`
22 |
23 | // URL of result.
24 | URL string `json:"url"`
25 |
26 | // Title of result.
27 | Title string `json:"title"`
28 |
29 | // Description of the result.
30 | Description string `json:"description"`
31 | }
32 |
33 | const stdGoogleBase = "https://www.google."
34 | const defaultAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36"
35 |
36 | // GoogleDomains represents localized Google homepages. The 2 letter country code is based on ISO 3166-1 alpha-2.
37 | //
38 | // See: https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
39 | var GoogleDomains = map[string]string{
40 | "us": "com/search?q=",
41 | "ac": "ac/search?q=",
42 | "ad": "ad/search?q=",
43 | "ae": "ae/search?q=",
44 | "af": "com.af/search?q=",
45 | "ag": "com.ag/search?q=",
46 | "ai": "com.ai/search?q=",
47 | "al": "al/search?q=",
48 | "am": "am/search?q=",
49 | "ao": "co.ao/search?q=",
50 | "ar": "com.ar/search?q=",
51 | "as": "as/search?q=",
52 | "at": "at/search?q=",
53 | "au": "com.au/search?q=",
54 | "az": "az/search?q=",
55 | "ba": "ba/search?q=",
56 | "bd": "com.bd/search?q=",
57 | "be": "be/search?q=",
58 | "bf": "bf/search?q=",
59 | "bg": "bg/search?q=",
60 | "bh": "com.bh/search?q=",
61 | "bi": "bi/search?q=",
62 | "bj": "bj/search?q=",
63 | "bn": "com.bn/search?q=",
64 | "bo": "com.bo/search?q=",
65 | "br": "com.br/search?q=",
66 | "bs": "bs/search?q=",
67 | "bt": "bt/search?q=",
68 | "bw": "co.bw/search?q=",
69 | "by": "by/search?q=",
70 | "bz": "com.bz/search?q=",
71 | "ca": "ca/search?q=",
72 | "kh": "com.kh/search?q=",
73 | "cc": "cc/search?q=",
74 | "cd": "cd/search?q=",
75 | "cf": "cf/search?q=",
76 | "cat": "cat/search?q=",
77 | "cg": "cg/search?q=",
78 | "ch": "ch/search?q=",
79 | "ci": "ci/search?q=",
80 | "ck": "co.ck/search?q=",
81 | "cl": "cl/search?q=",
82 | "cm": "cm/search?q=",
83 | "cn": "cn/search?q=",
84 | "co": "com.co/search?q=",
85 | "cr": "co.cr/search?q=",
86 | "cu": "com.cu/search?q=",
87 | "cv": "cv/search?q=",
88 | "cy": "com.cy/search?q=",
89 | "cz": "cz/search?q=",
90 | "de": "de/search?q=",
91 | "dj": "dj/search?q=",
92 | "dk": "dk/search?q=",
93 | "dm": "dm/search?q=",
94 | "do": "com.do/search?q=",
95 | "dz": "dz/search?q=",
96 | "ec": "com.ec/search?q=",
97 | "ee": "ee/search?q=",
98 | "eg": "com.eg/search?q=",
99 | "es": "es/search?q=",
100 | "et": "com.et/search?q=",
101 | "fi": "fi/search?q=",
102 | "fj": "com.fj/search?q=",
103 | "fm": "fm/search?q=",
104 | "fr": "fr/search?q=",
105 | "ga": "ga/search?q=",
106 | "gb": "co.uk/search?q=",
107 | "ge": "ge/search?q=",
108 | "gf": "gf/search?q=",
109 | "gg": "gg/search?q=",
110 | "gh": "com.gh/search?q=",
111 | "gi": "com.gi/search?q=",
112 | "gl": "gl/search?q=",
113 | "gm": "gm/search?q=",
114 | "gp": "gp/search?q=",
115 | "gr": "gr/search?q=",
116 | "gt": "com.gt/search?q=",
117 | "gy": "gy/search?q=",
118 | "hk": "com.hk/search?q=",
119 | "hn": "hn/search?q=",
120 | "hr": "hr/search?q=",
121 | "ht": "ht/search?q=",
122 | "hu": "hu/search?q=",
123 | "id": "co.id/search?q=",
124 | "iq": "iq/search?q=",
125 | "ie": "ie/search?q=",
126 | "il": "co.il/search?q=",
127 | "im": "im/search?q=",
128 | "in": "co.in/search?q=",
129 | "io": "io/search?q=",
130 | "is": "is/search?q=",
131 | "it": "it/search?q=",
132 | "je": "je/search?q=",
133 | "jm": "com.jm/search?q=",
134 | "jo": "jo/search?q=",
135 | "jp": "co.jp/search?q=",
136 | "ke": "co.ke/search?q=",
137 | "ki": "ki/search?q=",
138 | "kg": "kg/search?q=",
139 | "kr": "co.kr/search?q=",
140 | "kw": "com.kw/search?q=",
141 | "kz": "kz/search?q=",
142 | "la": "la/search?q=",
143 | "lb": "com.lb/search?q=",
144 | "lc": "com.lc/search?q=",
145 | "li": "li/search?q=",
146 | "lk": "lk/search?q=",
147 | "ls": "co.ls/search?q=",
148 | "lt": "lt/search?q=",
149 | "lu": "lu/search?q=",
150 | "lv": "lv/search?q=",
151 | "ly": "com.ly/search?q=",
152 | "ma": "co.ma/search?q=",
153 | "md": "md/search?q=",
154 | "me": "me/search?q=",
155 | "mg": "mg/search?q=",
156 | "mk": "mk/search?q=",
157 | "ml": "ml/search?q=",
158 | "mm": "com.mm/search?q=",
159 | "mn": "mn/search?q=",
160 | "ms": "ms/search?q=",
161 | "mt": "com.mt/search?q=",
162 | "mu": "mu/search?q=",
163 | "mv": "mv/search?q=",
164 | "mw": "mw/search?q=",
165 | "mx": "com.mx/search?q=",
166 | "my": "com.my/search?q=",
167 | "mz": "co.mz/search?q=",
168 | "na": "com.na/search?q=",
169 | "ne": "ne/search?q=",
170 | "nf": "com.nf/search?q=",
171 | "ng": "com.ng/search?q=",
172 | "ni": "com.ni/search?q=",
173 | "nl": "nl/search?q=",
174 | "no": "no/search?q=",
175 | "np": "com.np/search?q=",
176 | "nr": "nr/search?q=",
177 | "nu": "nu/search?q=",
178 | "nz": "co.nz/search?q=",
179 | "om": "com.om/search?q=",
180 | "pa": "com.pa/search?q=",
181 | "pe": "com.pe/search?q=",
182 | "ph": "com.ph/search?q=",
183 | "pk": "com.pk/search?q=",
184 | "pl": "pl/search?q=",
185 | "pg": "com.pg/search?q=",
186 | "pn": "pn/search?q=",
187 | "pr": "com.pr/search?q=",
188 | "ps": "ps/search?q=",
189 | "pt": "pt/search?q=",
190 | "py": "com.py/search?q=",
191 | "qa": "com.qa/search?q=",
192 | "ro": "ro/search?q=",
193 | "rs": "rs/search?q=",
194 | "ru": "ru/search?q=",
195 | "rw": "rw/search?q=",
196 | "sa": "com.sa/search?q=",
197 | "sb": "com.sb/search?q=",
198 | "sc": "sc/search?q=",
199 | "se": "se/search?q=",
200 | "sg": "com.sg/search?q=",
201 | "sh": "sh/search?q=",
202 | "si": "si/search?q=",
203 | "sk": "sk/search?q=",
204 | "sl": "com.sl/search?q=",
205 | "sn": "sn/search?q=",
206 | "sm": "sm/search?q=",
207 | "so": "so/search?q=",
208 | "st": "st/search?q=",
209 | "sv": "com.sv/search?q=",
210 | "td": "td/search?q=",
211 | "tg": "tg/search?q=",
212 | "th": "co.th/search?q=",
213 | "tj": "com.tj/search?q=",
214 | "tk": "tk/search?q=",
215 | "tl": "tl/search?q=",
216 | "tm": "tm/search?q=",
217 | "to": "to/search?q=",
218 | "tn": "tn/search?q=",
219 | "tr": "com.tr/search?q=",
220 | "tt": "tt/search?q=",
221 | "tw": "com.tw/search?q=",
222 | "tz": "co.tz/search?q=",
223 | "ua": "com.ua/search?q=",
224 | "ug": "co.ug/search?q=",
225 | "uk": "co.uk/search?q=",
226 | "uy": "com.uy/search?q=",
227 | "uz": "co.uz/search?q=",
228 | "vc": "com.vc/search?q=",
229 | "ve": "co.ve/search?q=",
230 | "vg": "vg/search?q=",
231 | "vi": "co.vi/search?q=",
232 | "vn": "com.vn/search?q=",
233 | "vu": "vu/search?q=",
234 | "ws": "ws/search?q=",
235 | "za": "co.za/search?q=",
236 | "zm": "co.zm/search?q=",
237 | "zw": "co.zw/search?q=",
238 | }
239 |
240 | // SearchOptions modifies how the Search function behaves.
241 | type SearchOptions struct {
242 |
243 | // CountryCode sets the ISO 3166-1 alpha-2 code of the localized Google Search homepage to use.
244 | // The default is "us", which will return results from https://www.google.com.
245 | CountryCode string
246 |
247 | // LanguageCode sets the language code.
248 | // Default: en
249 | LanguageCode string
250 |
251 | // Limit sets how many results to fetch (at maximum).
252 | Limit int
253 |
254 | // Start sets from what rank the new result set should return.
255 | Start int
256 |
257 | // UserAgent sets the UserAgent of the http request.
258 | // Default: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36"
259 | UserAgent string
260 |
261 | // OverLimit searches for more results than that specified by Limit.
262 | // It then reduces the returned results to match Limit.
263 | OverLimit bool
264 |
265 | // ProxyAddr sets a proxy address to avoid IP blocking.
266 | ProxyAddr string
267 |
268 | // FollowNextPage, when set, scrapes subsequent result pages.
269 | FollowNextPage bool
270 | }
271 |
272 | // Search returns a list of search results from Google.
273 | func Search(ctx context.Context, searchTerm string, opts ...SearchOptions) ([]Result, error) {
274 | if ctx == nil {
275 | ctx = context.Background()
276 | }
277 |
278 | if err := RateLimit.Wait(ctx); err != nil {
279 | return nil, err
280 | }
281 |
282 | c := colly.NewCollector(colly.MaxDepth(1))
283 | if len(opts) == 0 {
284 | opts = append(opts, SearchOptions{})
285 | }
286 |
287 | if opts[0].UserAgent == "" {
288 | c.UserAgent = defaultAgent
289 | } else {
290 | c.UserAgent = opts[0].UserAgent
291 | }
292 |
293 | var lc string
294 | if opts[0].LanguageCode == "" {
295 | lc = "en"
296 | } else {
297 | lc = opts[0].LanguageCode
298 | }
299 |
300 | q, _ := queue.New(1, &queue.InMemoryQueueStorage{MaxSize: 10000})
301 |
302 | limit := opts[0].Limit
303 | if opts[0].OverLimit {
304 | limit = int(float64(opts[0].Limit) * 1.5)
305 | }
306 |
307 | results := []Result{}
308 | nextPageLink := ""
309 | var rErr error
310 | filteredRank := 1
311 | rank := 1
312 |
313 | c.OnRequest(func(r *colly.Request) {
314 | if err := ctx.Err(); err != nil {
315 | r.Abort()
316 | rErr = err
317 | return
318 | }
319 | if opts[0].FollowNextPage && nextPageLink != "" {
320 | req, err := r.New("GET", nextPageLink, nil)
321 | if err == nil {
322 | q.AddRequest(req)
323 | }
324 | }
325 | })
326 |
327 | c.OnError(func(r *colly.Response, err error) {
328 | rErr = err
329 | })
330 |
331 | // https://www.w3schools.com/cssref/css_selectors.asp
332 | c.OnHTML("div.g", func(e *colly.HTMLElement) {
333 |
334 | sel := e.DOM
335 |
336 | linkHref, _ := sel.Find("a").Attr("href")
337 | linkText := strings.TrimSpace(linkHref)
338 | titleText := strings.TrimSpace(sel.Find("div > div > div > a > h3").Text())
339 | descText := strings.TrimSpace(sel.Find("div > div > div > div:first-child > span:first-child").Text())
340 |
341 | rank += 1
342 | if linkText != "" && linkText != "#" && titleText != "" {
343 | result := Result{
344 | Rank: filteredRank,
345 | URL: linkText,
346 | Title: titleText,
347 | Description: descText,
348 | }
349 | results = append(results, result)
350 | filteredRank += 1
351 | }
352 |
353 | // check if there is a next button at the end.
354 | // Added this selector as the Id is the same for every language checked on google.com .pt and .es the text changes but the id remains the same
355 | nextPageHref, _ := sel.Find("a #pnnext").Attr("href")
356 | nextPageLink = strings.TrimSpace(nextPageHref)
357 |
358 | })
359 |
360 | c.OnHTML("div.g", func(e *colly.HTMLElement) {
361 |
362 | sel := e.DOM
363 |
364 | // check if there is a next button at the end.
365 | // Added this selector as the Id is the same for every language checked on google.com .pt and .es the text changes but the id remains the same
366 | if nextPageHref, exists := sel.Attr("href"); exists {
367 | start := getStart(strings.TrimSpace(nextPageHref))
368 | nextPageLink = buildUrl(searchTerm, opts[0].CountryCode, lc, limit, start)
369 | q.AddURL(nextPageLink)
370 | } else {
371 | nextPageLink = ""
372 | }
373 | })
374 |
375 | url := buildUrl(searchTerm, opts[0].CountryCode, lc, limit, opts[0].Start)
376 |
377 | if opts[0].ProxyAddr != "" {
378 | rp, err := proxy.RoundRobinProxySwitcher(opts[0].ProxyAddr)
379 | if err != nil {
380 | return nil, err
381 | }
382 | c.SetProxyFunc(rp)
383 | }
384 |
385 | q.AddURL(url)
386 | q.Run(c)
387 |
388 | if rErr != nil {
389 | if strings.Contains(rErr.Error(), "Too Many Requests") {
390 | return nil, ErrBlocked
391 | }
392 | return nil, rErr
393 | }
394 |
395 | // Reduce results to max limit
396 | if opts[0].Limit != 0 && len(results) > opts[0].Limit {
397 | return results[:opts[0].Limit], nil
398 | }
399 |
400 | return results, nil
401 | }
402 |
403 | func getStart(uri string) int {
404 | u, err := url.Parse(uri)
405 | if err != nil {
406 | fmt.Println(err)
407 | }
408 | q := u.Query()
409 | ss := q.Get("start")
410 | si, _ := strconv.Atoi(ss)
411 | return si
412 |
413 | }
414 |
415 | func base(url string) string {
416 | if strings.HasPrefix(url, "http") {
417 | return url
418 | } else {
419 | return stdGoogleBase + url
420 | }
421 | }
422 |
423 | func buildUrl(searchTerm string, countryCode string, languageCode string, limit int, start int) string {
424 | searchTerm = strings.Trim(searchTerm, " ")
425 | searchTerm = strings.Replace(searchTerm, " ", "+", -1)
426 | countryCode = strings.ToLower(countryCode)
427 |
428 | var url string
429 |
430 | if googleBase, found := GoogleDomains[countryCode]; found {
431 | if start == 0 {
432 | url = fmt.Sprintf("%s%s&hl=%s", base(googleBase), searchTerm, languageCode)
433 | } else {
434 | url = fmt.Sprintf("%s%s&hl=%s&start=%d", base(googleBase), searchTerm, languageCode, start)
435 | }
436 | } else {
437 | if start == 0 {
438 | url = fmt.Sprintf("%s%s&hl=%s", stdGoogleBase+GoogleDomains["us"], searchTerm, languageCode)
439 | } else {
440 | url = fmt.Sprintf("%s%s&hl=%s&start=%d", stdGoogleBase+GoogleDomains["us"], searchTerm, languageCode, start)
441 | }
442 | }
443 |
444 | if limit != 0 {
445 | url = fmt.Sprintf("%s&num=%d", url, limit)
446 | }
447 |
448 | return url
449 | }
450 |
--------------------------------------------------------------------------------
/search_test.go:
--------------------------------------------------------------------------------
1 | // Copyright 2020-21 PJ Engineering and Business Solutions Pty. Ltd. All rights reserved.
2 |
3 | package googlesearch_test
4 |
5 | import (
6 | "testing"
7 |
8 | googlesearch "github.com/rocketlaunchr/google-search"
9 | )
10 |
11 | func TestSearch(t *testing.T) {
12 |
13 | q := "Hello World"
14 |
15 | opts := googlesearch.SearchOptions{
16 | Limit: 20,
17 | }
18 |
19 | //lint:ignore SA1012 ignore this bare essentials by passing nil for context and removing context package (despite not being idiomatic go).
20 | returnLinks, err := googlesearch.Search(nil, q, opts)
21 | if err != nil {
22 | t.Errorf("something went wrong: %v", err)
23 | return
24 | }
25 |
26 | if len(returnLinks) == 0 {
27 | t.Errorf("no results returned: %v", returnLinks)
28 | }
29 |
30 | noURL := 0
31 | noTitle := 0
32 | noDesc := 0
33 |
34 | for _, res := range returnLinks {
35 | if res.URL == "" {
36 | noURL++
37 | }
38 |
39 | if res.Title == "" {
40 | noTitle++
41 | }
42 |
43 | if res.Description == "" {
44 | noDesc++
45 | }
46 | }
47 |
48 | if noURL == len(returnLinks) || noTitle == len(returnLinks) || noDesc == len(returnLinks) {
49 | t.Errorf("google dom changed")
50 | }
51 | }
52 |
--------------------------------------------------------------------------------