├── .github ├── FUNDING.yaml └── workflows │ ├── build.yml │ └── publish.yml ├── .gitignore ├── .golangci.yaml ├── .pre-commit-config.yaml ├── Dockerfile ├── LICENSE ├── MacOS instructions.md ├── Makefile ├── README.md ├── banner.png ├── cmd └── tools │ └── tools.go ├── deduper ├── deduper.go └── hashmap.go ├── docker-compose.dev.yaml ├── example-queries.txt ├── examples └── plugins │ └── example_writer.go ├── exiter └── exiter.go ├── gmaps-extractor.md ├── gmaps ├── emailjob.go ├── entry.go ├── entry_test.go ├── job.go ├── multiple.go ├── place.go ├── reviews.go └── searchjob.go ├── go.mod ├── go.sum ├── go.work ├── go.work.sum ├── img ├── SerpApi-banner.png ├── SerpApi-logo-w.png ├── capsolver-banner.png ├── example.gif ├── gmaps-extractor-banner.png ├── gmaps-extractor-logo.png ├── premium_scrap_io.png ├── premium_scrap_io_demo.gif ├── scrapeless_dark.png └── scrapeless_light.png ├── lint.go ├── main.go ├── postgres ├── provider.go └── resultwriter.go ├── runner ├── databaserunner │ └── databaserunner.go ├── filerunner │ └── filerunner.go ├── installplaywright │ └── installplaywright.go ├── jobs.go ├── lambdaaws │ ├── invoker.go │ ├── io.go │ └── lambdaaws.go ├── runner.go └── webrunner │ └── webrunner.go ├── s3uploader └── s3uploader.go ├── scrap_io.md ├── scripts └── migrations │ ├── 0001_create_tables.down.sql │ ├── 0001_create_tables.up.sql │ ├── 0002_add_lat_lon_results.down.sql │ ├── 0002_add_lat_lon_results.up.sql │ ├── 0003_results_jsonb.dow.sql │ ├── 0003_results_jsonb.up.sql │ └── 0004_add-index-gmaps_jobs.up.sql ├── serpapi.md ├── testdata ├── output.json ├── panic.json ├── panic2.json ├── raw.json └── raw2.json ├── tlmt ├── gonoop │ └── gonoop.go ├── goposthog │ └── goposthog.go └── tlmt.go └── web ├── errors.go ├── job.go ├── service.go ├── sqlite └── sqlite.go ├── static ├── css │ └── main.css ├── spec │ └── spec.yaml └── templates │ ├── index.html │ ├── job_row.html │ ├── job_rows.html │ └── redoc.html └── web.go /.github/FUNDING.yaml: -------------------------------------------------------------------------------- 1 | github: gosom 2 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | permissions: {} 4 | 5 | on: 6 | push: 7 | branches: [main] 8 | pull_request: 9 | branches: [main] 10 | 11 | jobs: 12 | run: 13 | name: Build 14 | runs-on: ubuntu-latest 15 | timeout-minutes: 7 16 | strategy: 17 | fail-fast: true 18 | matrix: 19 | go: ['1.24.3'] 20 | 21 | steps: 22 | - name: Check out code 23 | uses: actions/checkout@v4 24 | - name: Install Go 25 | uses: actions/setup-go@v4 26 | with: 27 | go-version: ${{ matrix.go }} 28 | check-latest: true 29 | - name: Go Format 30 | run: gofmt -s -w . && git diff --exit-code 31 | - name: Lint 32 | run: make lint 33 | - name: Go Vet 34 | run: go vet ./... 35 | - name: Go Tidy 36 | run: go mod tidy && git diff --exit-code 37 | - name: Go Mod 38 | run: go mod download 39 | - name: Go Mod Verify 40 | run: go mod verify 41 | - name: Go Vulnerability Check 42 | run: make vuln 43 | - name: Go Build 44 | run: go build -o /dev/null ./... 45 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: publish 2 | 3 | permissions: {} 4 | 5 | on: 6 | release: 7 | types: [published] 8 | 9 | jobs: 10 | push_to_registry: 11 | name: Push Docker image to Docker Hub 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Check out the repo 15 | uses: actions/checkout@v4 16 | 17 | - name: Log in to Docker Hub 18 | uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 19 | with: 20 | username: ${{ secrets.DOCKER_USERNAME }} 21 | password: ${{ secrets.DOCKER_PASSWORD }} 22 | 23 | - name: Extract metadata (tags, labels) for Docker 24 | id: meta 25 | uses: docker/metadata-action@v5 26 | with: 27 | images: gosom/google-maps-scraper 28 | 29 | - name: Build and push Docker image 30 | uses: docker/build-push-action@v5 31 | with: 32 | context: . 33 | push: true 34 | tags: ${{ steps.meta.outputs.tags }} 35 | labels: ${{ steps.meta.outputs.labels }} 36 | no-cache: true 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | cache/ 2 | vendor 3 | bin 4 | webdata/ 5 | -------------------------------------------------------------------------------- /.golangci.yaml: -------------------------------------------------------------------------------- 1 | run: 2 | issues-exit-code: 1 3 | timeout: 3m 4 | linters-settings: 5 | errcheck: 6 | check-type-assertions: true 7 | goconst: 8 | min-len: 2 9 | min-occurrences: 3 10 | gci: 11 | sections: 12 | - prefix(github.com/gosom/ledger) 13 | custom-order: true 14 | gocritic: 15 | enabled-tags: 16 | - diagnostic 17 | - experimental 18 | - opinionated 19 | - performance 20 | - style 21 | govet: 22 | shadow: true 23 | nolintlint: 24 | require-explanation: true 25 | require-specific: true 26 | linters: 27 | disable-all: true 28 | enable: 29 | - bodyclose 30 | - dogsled 31 | - dupl 32 | - errcheck 33 | - copyloopvar 34 | - exhaustive 35 | - goconst 36 | - gocritic 37 | - gofmt 38 | - goimports 39 | - gocyclo 40 | - gosec 41 | - gosimple 42 | - govet 43 | - ineffassign 44 | - misspell 45 | - nolintlint 46 | - nakedret 47 | - prealloc 48 | - predeclared 49 | - revive 50 | - staticcheck 51 | - stylecheck 52 | - testpackage 53 | - thelper 54 | - tparallel 55 | - typecheck 56 | - unconvert 57 | - unparam 58 | - whitespace 59 | - wsl 60 | 61 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: local 3 | hooks: 4 | - id: gobuild 5 | name: gobuild 6 | entry: go build -v -o /dev/null ./... 7 | language: golang 8 | types: [go] 9 | require_serial: true 10 | pass_filenames: false 11 | - id: golangci-lint 12 | name: golangci-lint 13 | entry: make lint 14 | language: golang 15 | types: [go] 16 | require_serial: true 17 | pass_filenames: false 18 | - id: gotest 19 | name: gotest 20 | entry: go test -v -race ./... 21 | language: golang 22 | types: [go] 23 | require_serial: true 24 | pass_filenames: false 25 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Build stage for Playwright dependencies 2 | FROM golang:1.24.3-bullseye AS playwright-deps 3 | ENV PLAYWRIGHT_BROWSERS_PATH=/opt/browsers 4 | #ENV PLAYWRIGHT_DRIVER_PATH=/opt/ 5 | RUN apt-get update && apt-get install -y --no-install-recommends \ 6 | ca-certificates \ 7 | curl \ 8 | && curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \ 9 | && apt-get install -y --no-install-recommends nodejs \ 10 | && apt-get clean \ 11 | && rm -rf /var/lib/apt/lists/* \ 12 | && go install github.com/playwright-community/playwright-go/cmd/playwright@latest \ 13 | && mkdir -p /opt/browsers \ 14 | && playwright install chromium --with-deps 15 | 16 | # Build stage 17 | FROM golang:1.24.3-bullseye AS builder 18 | WORKDIR /app 19 | COPY go.mod go.sum ./ 20 | RUN go mod download 21 | COPY . . 22 | RUN CGO_ENABLED=0 go build -ldflags="-w -s" -o /usr/bin/google-maps-scraper 23 | 24 | # Final stage 25 | FROM debian:bullseye-slim 26 | ENV PLAYWRIGHT_BROWSERS_PATH=/opt/browsers 27 | ENV PLAYWRIGHT_DRIVER_PATH=/opt 28 | 29 | # Install only the necessary dependencies in a single layer 30 | RUN apt-get update && apt-get install -y --no-install-recommends \ 31 | ca-certificates \ 32 | libnss3 \ 33 | libnspr4 \ 34 | libatk1.0-0 \ 35 | libatk-bridge2.0-0 \ 36 | libcups2 \ 37 | libdrm2 \ 38 | libdbus-1-3 \ 39 | libxkbcommon0 \ 40 | libatspi2.0-0 \ 41 | libx11-6 \ 42 | libxcomposite1 \ 43 | libxdamage1 \ 44 | libxext6 \ 45 | libxfixes3 \ 46 | libxrandr2 \ 47 | libgbm1 \ 48 | libpango-1.0-0 \ 49 | libcairo2 \ 50 | libasound2 \ 51 | && apt-get clean \ 52 | && rm -rf /var/lib/apt/lists/* 53 | 54 | COPY --from=playwright-deps /opt/browsers /opt/browsers 55 | COPY --from=playwright-deps /root/.cache/ms-playwright-go /opt/ms-playwright-go 56 | 57 | RUN chmod -R 755 /opt/browsers \ 58 | && chmod -R 755 /opt/ms-playwright-go 59 | 60 | COPY --from=builder /usr/bin/google-maps-scraper /usr/bin/ 61 | 62 | ENTRYPOINT ["google-maps-scraper"] 63 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Georgios Komninos 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MacOS instructions.md: -------------------------------------------------------------------------------- 1 | **1. Install Homebrew** 2 | 3 | Open the Terminal and run: 4 | 5 | `/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"` 6 | 7 | After installation, add Homebrew to your PATH (if it doesn’t happen automatically). Run: 8 | 9 | ``` 10 | echo 'eval "$(/opt/homebrew/bin/brew shellenv)"' >> ~/.zshrc 11 | source ~/.zshrc 12 | ``` 13 | 14 | Now, check if Homebrew is installed correctly: 15 | 16 | `brew --version 17 | ` 18 | 19 | **2. Install Go Using Homebrew** 20 | 21 | Once Homebrew is installed, install Go with: 22 | 23 | `brew install go` 24 | 25 | After installing Go, verify it: 26 | 27 | `go version` 28 | 29 | **3. Clone the Repository** 30 | 31 | Open your terminal and clone the repository: 32 | 33 | ``` 34 | git clone https://github.com/gosom/google-maps-scraper.git 35 | cd google-maps-scraper 36 | ``` 37 | 38 | **4. Build for macOS** 39 | 40 | The repository provides a Makefile, but you can manually compile the project for macOS: 41 | 42 | `go build -o google_maps_scraper` 43 | 44 | Create a macOS application structure: 45 | 46 | ``` 47 | mkdir -p GoogleMapsScraper.app/Contents/MacOS 48 | mv google_maps_scraper GoogleMapsScraper.app/Contents/MacOS/ 49 | ``` 50 | 51 | Convert the App to a GUI Application 52 | 53 | Create a new script inside MacOS/: 54 | 55 | `nano GoogleMapsScraper.app/Contents/MacOS/start.sh` 56 | 57 | Paste this inside: 58 | 59 | ``` 60 | #!/bin/bash 61 | DIR="$(cd "$(dirname "$0")" && pwd)" 62 | cd "$HOME" 63 | 64 | # Run scraper in background and get its PID 65 | "$DIR/google_maps_scraper" > "$HOME/google_maps_scraper.log" 2>&1 & 66 | SCRAPER_PID=$! 67 | 68 | # Open browser 69 | sleep 2 70 | open http://localhost:8080 71 | 72 | # Idle timeout in seconds 73 | IDLE_TIMEOUT=6000 74 | 75 | # Wait and kill 76 | sleep $IDLE_TIMEOUT 77 | 78 | # Check if still running, and kill it 79 | if ps -p $SCRAPER_PID > /dev/null; then 80 | echo "Stopping scraper after $IDLE_TIMEOUT seconds of idle time." 81 | kill $SCRAPER_PID 82 | fi 83 | ``` 84 | Save and exit 85 | 86 | Now make it executable: 87 | 88 | `chmod +x GoogleMapsScraper.app/Contents/MacOS/start.sh` 89 | 90 | Add a Info.plist inside GoogleMapsScraper.app/Contents/: 91 | 92 | ``` 93 | 94 | 95 | 96 | 97 | CFBundleExecutable 98 | start.sh 99 | CFBundleIdentifier 100 | com.yourcompany.googlemapsscraper 101 | CFBundleName 102 | Google Maps Scraper 103 | CFBundleVersion 104 | 1.0 105 | 106 | 107 | ``` 108 | 109 | **5. Run the Application** 110 | 111 | `open GoogleMapsScraper.app` 112 | 113 | **6. Code Sign the App (Optional but Recommended)** 114 | 115 | `codesign --force --deep --sign - GoogleMapsScraper.app` 116 | 117 | If you plan to distribute it, you’ll need an Apple Developer ID to properly sign it. 118 | 119 | **6. Create a .dmg Installer (Optional)** 120 | 121 | ``` 122 | mkdir -p ~/dmg-tmp/GoogleMapsScraper 123 | cp -R /path/to/GoogleMapsScraper.app ~/dmg-tmp/GoogleMapsScraper/ 124 | ``` 125 | Replace /path/to/GoogleMapsScraper.app with your actual path (e.g. ~/Desktop/GoogleMapsScraper.app) 126 | 127 | Create the .dmg 128 | 129 | ``` 130 | hdiutil create -volname "GoogleMapsScraper" -srcfolder ~/dmg-tmp/GoogleMapsScraper -ov -format UDZO GoogleMapsScraper.dmg 131 | ``` 132 | You’ll now have a file named GoogleMapsScraper.dmg in the current folder. 133 | 134 | An example of the built .app can be seen here: https://github.com/melogabriel/google-maps-scraper/releases/tag/v1.0.0 135 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | APP_NAME := google_maps_scraper 2 | VERSION := 1.8.0 3 | 4 | default: help 5 | 6 | # generate help info from comments: thanks to https://marmelab.com/blog/2016/02/29/auto-documented-makefile.html 7 | help: ## help information about make commands 8 | @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' 9 | 10 | vet: ## runs go vet 11 | go vet ./... 12 | 13 | format: ## runs go fmt 14 | gofmt -s -w . 15 | 16 | test: ## runs the unit tests 17 | go test -v -race -timeout 5m ./... 18 | 19 | test-cover: ## outputs the coverage statistics 20 | go test -v -race -timeout 5m ./... -coverprofile coverage.out 21 | go tool cover -func coverage.out 22 | rm coverage.out 23 | 24 | test-cover-report: ## an html report of the coverage statistics 25 | go test -v ./... -covermode=count -coverpkg=./... -coverprofile coverage.out 26 | go tool cover -html coverage.out -o coverage.html 27 | open coverage.html 28 | 29 | vuln: ## runs vulnerability checks 30 | go tool govulncheck -C . -show verbose -format text -scan symbol ./... 31 | 32 | lint: ## runs the linter 33 | go tool golangci-lint -v run ./... 34 | 35 | cross-compile: ## cross compiles the application 36 | GOOS=linux GOARCH=amd64 go build -o bin/$(APP_NAME)-${VERSION}-linux-amd64 37 | GOOS=darwin GOARCH=amd64 go build -o bin/$(APP_NAME)-${VERSION}-darwin-amd64 38 | GOOS=windows GOARCH=amd64 go build -o bin/$(APP_NAME)-${VERSION}-windows-amd64.exe 39 | -------------------------------------------------------------------------------- /banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gosom/google-maps-scraper/e31bffd9776052628f781751cbcd8bf661d3f9d4/banner.png -------------------------------------------------------------------------------- /cmd/tools/tools.go: -------------------------------------------------------------------------------- 1 | //go:build tools 2 | // +build tools 3 | 4 | // this file exists to manage tools via go modules 5 | 6 | package main 7 | 8 | import ( 9 | _ "github.com/golangci/golangci-lint/cmd/golangci-lint" 10 | ) 11 | -------------------------------------------------------------------------------- /deduper/deduper.go: -------------------------------------------------------------------------------- 1 | package deduper 2 | 3 | import ( 4 | "context" 5 | "sync" 6 | ) 7 | 8 | type Deduper interface { 9 | AddIfNotExists(context.Context, string) bool 10 | } 11 | 12 | func New() Deduper { 13 | return &hashmap{ 14 | seen: make(map[uint64]struct{}), 15 | mux: &sync.RWMutex{}, 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /deduper/hashmap.go: -------------------------------------------------------------------------------- 1 | package deduper 2 | 3 | import ( 4 | "context" 5 | "hash/fnv" 6 | "sync" 7 | ) 8 | 9 | var _ Deduper = (*hashmap)(nil) 10 | 11 | type hashmap struct { 12 | mux *sync.RWMutex 13 | seen map[uint64]struct{} 14 | } 15 | 16 | func (d *hashmap) AddIfNotExists(_ context.Context, key string) bool { 17 | d.mux.RLock() 18 | if _, ok := d.seen[d.hash(key)]; ok { 19 | d.mux.RUnlock() 20 | return false 21 | } 22 | 23 | d.mux.RUnlock() 24 | 25 | d.mux.Lock() 26 | defer d.mux.Unlock() 27 | 28 | if _, ok := d.seen[d.hash(key)]; ok { 29 | return false 30 | } 31 | 32 | d.seen[d.hash(key)] = struct{}{} 33 | 34 | return true 35 | } 36 | 37 | func (d *hashmap) hash(key string) uint64 { 38 | h := fnv.New64() 39 | h.Write([]byte(key)) 40 | 41 | return h.Sum64() 42 | } 43 | -------------------------------------------------------------------------------- /docker-compose.dev.yaml: -------------------------------------------------------------------------------- 1 | services: 2 | db: 3 | image: postgres:15.2-alpine 4 | environment: 5 | - POSTGRES_USER=postgres 6 | - POSTGRES_PASSWORD=postgres 7 | ports: 8 | - '127.0.0.1:5432:5432' 9 | expose: 10 | - 5432 11 | volumes: 12 | - gmapsdev:/var/lib/postgresql/data 13 | restart: "unless-stopped" 14 | healthcheck: 15 | test: ["CMD-SHELL", "pg_isready -U postgres"] 16 | interval: 2s 17 | timeout: 30s 18 | retries: 5 19 | migrate: 20 | image: migrate/migrate 21 | volumes: 22 | - ./scripts/migrations:/migrations 23 | command: ["-path", "/migrations", "-database", "postgres://postgres:postgres@db:5432/postgres?sslmode=disable", "up"] 24 | healthcheck: 25 | test: "exit 0" 26 | depends_on: 27 | db: 28 | condition: service_healthy 29 | 30 | volumes: 31 | gmapsdev: 32 | -------------------------------------------------------------------------------- /example-queries.txt: -------------------------------------------------------------------------------- 1 | Rokoko Kitchen & Bar (Nicosia) 2 | Kitsios cafe nicosia latsia 3 | cafe in latsia 4 | -------------------------------------------------------------------------------- /examples/plugins/example_writer.go: -------------------------------------------------------------------------------- 1 | //go:build plugin 2 | // +build plugin 3 | 4 | package main 5 | 6 | import ( 7 | "bufio" 8 | "context" 9 | "fmt" 10 | "os" 11 | 12 | "github.com/gosom/google-maps-scraper/gmaps" 13 | "github.com/gosom/scrapemate" 14 | ) 15 | 16 | var _ scrapemate.ResultWriter = (*exampleWriter)(nil) 17 | 18 | var DummyPrinter scrapemate.ResultWriter = newWriter("dummy.txt") 19 | 20 | type exampleWriter struct { 21 | w *bufio.Writer 22 | } 23 | 24 | func newWriter(fname string) scrapemate.ResultWriter { 25 | fd, err := os.Create(fname) 26 | if err != nil { 27 | panic(err) 28 | } 29 | 30 | return &exampleWriter{ 31 | w: bufio.NewWriter(fd), 32 | } 33 | } 34 | 35 | // Run is the main function of the writer 36 | // we we write the job id and the title of the entries in a file 37 | // notice the asSlice function that converts the data to a slice of *gmaps.Entry 38 | func (e *exampleWriter) Run(_ context.Context, in <-chan scrapemate.Result) error { 39 | defer e.w.Flush() 40 | 41 | for result := range in { 42 | job, ok := result.Job.(scrapemate.IJob) 43 | if !ok { 44 | return fmt.Errorf("cannot cast %T to IJob", result.Job) 45 | } 46 | 47 | items, err := asSlice(result.Data) 48 | if err != nil { 49 | return err 50 | } 51 | 52 | for _, item := range items { 53 | _, err := fmt.Fprintf(e.w, "Job %s: %s\n", job.GetID(), item.Title) 54 | if err != nil { 55 | return err 56 | } 57 | } 58 | } 59 | 60 | return nil 61 | } 62 | 63 | func asSlice(t any) ([]*gmaps.Entry, error) { 64 | var elements []*gmaps.Entry 65 | 66 | isSlice, ok := t.([]any) 67 | if ok { 68 | elements := make([]*gmaps.Entry, len(isSlice)) 69 | for i, v := range isSlice { 70 | elements[i], ok = v.(*gmaps.Entry) 71 | if !ok { 72 | return nil, fmt.Errorf("cannot cast %T to *gmaps.Entry", v) 73 | } 74 | } 75 | } else { 76 | element, ok := t.(*gmaps.Entry) 77 | if !ok { 78 | return nil, fmt.Errorf("cannot cast %T to *gmaps.Entry", t) 79 | } 80 | 81 | elements = append(elements, element) 82 | } 83 | 84 | return elements, nil 85 | } 86 | -------------------------------------------------------------------------------- /exiter/exiter.go: -------------------------------------------------------------------------------- 1 | package exiter 2 | 3 | import ( 4 | "context" 5 | "sync" 6 | "time" 7 | ) 8 | 9 | type Exiter interface { 10 | SetSeedCount(int) 11 | SetCancelFunc(context.CancelFunc) 12 | IncrSeedCompleted(int) 13 | IncrPlacesFound(int) 14 | IncrPlacesCompleted(int) 15 | Run(context.Context) 16 | } 17 | 18 | type exiter struct { 19 | seedCount int 20 | seedCompleted int 21 | placesFound int 22 | placesCompleted int 23 | 24 | mu *sync.Mutex 25 | cancelFunc context.CancelFunc 26 | } 27 | 28 | func New() Exiter { 29 | return &exiter{ 30 | mu: &sync.Mutex{}, 31 | } 32 | } 33 | 34 | func (e *exiter) SetSeedCount(val int) { 35 | e.mu.Lock() 36 | defer e.mu.Unlock() 37 | 38 | e.seedCount = val 39 | } 40 | 41 | func (e *exiter) SetCancelFunc(fn context.CancelFunc) { 42 | e.mu.Lock() 43 | defer e.mu.Unlock() 44 | 45 | e.cancelFunc = fn 46 | } 47 | 48 | func (e *exiter) IncrSeedCompleted(val int) { 49 | e.mu.Lock() 50 | defer e.mu.Unlock() 51 | 52 | e.seedCompleted += val 53 | } 54 | 55 | func (e *exiter) IncrPlacesFound(val int) { 56 | e.mu.Lock() 57 | defer e.mu.Unlock() 58 | 59 | e.placesFound += val 60 | } 61 | 62 | func (e *exiter) IncrPlacesCompleted(val int) { 63 | e.mu.Lock() 64 | defer e.mu.Unlock() 65 | 66 | e.placesCompleted += val 67 | } 68 | 69 | func (e *exiter) Run(ctx context.Context) { 70 | ticker := time.NewTicker(time.Second * 5) 71 | defer ticker.Stop() 72 | 73 | for { 74 | select { 75 | case <-ctx.Done(): 76 | return 77 | case <-ticker.C: 78 | if e.isDone() { 79 | e.cancelFunc() 80 | 81 | return 82 | } 83 | } 84 | } 85 | } 86 | 87 | func (e *exiter) isDone() bool { 88 | e.mu.Lock() 89 | defer e.mu.Unlock() 90 | 91 | if e.seedCompleted != e.seedCount { 92 | return false 93 | } 94 | 95 | if e.placesFound != e.placesCompleted { 96 | return false 97 | } 98 | 99 | return true 100 | } 101 | -------------------------------------------------------------------------------- /gmaps-extractor.md: -------------------------------------------------------------------------------- 1 | **G Maps Extractor** 2 | A no-code Google Maps scraper that pulls business leads from Google Maps in one click. 3 | 4 | - 📇 **Includes** emails, social profiles, phone numbers, addresses, reviews, images and more. 5 | - 📥 **Export** to CSV · Excel · JSON 6 | - 🎁 **Free**: Get your first **1,000 leads** today 7 | [Get Started for Free](https://gmapsextractor.com?utm_source=github&utm_medium=banner&utm_campaign=gosom) 8 | -------------------------------------------------------------------------------- /gmaps/emailjob.go: -------------------------------------------------------------------------------- 1 | package gmaps 2 | 3 | import ( 4 | "context" 5 | "strings" 6 | 7 | "github.com/PuerkitoBio/goquery" 8 | "github.com/google/uuid" 9 | "github.com/gosom/google-maps-scraper/exiter" 10 | "github.com/gosom/scrapemate" 11 | "github.com/mcnijman/go-emailaddress" 12 | ) 13 | 14 | type EmailExtractJobOptions func(*EmailExtractJob) 15 | 16 | type EmailExtractJob struct { 17 | scrapemate.Job 18 | 19 | Entry *Entry 20 | ExitMonitor exiter.Exiter 21 | } 22 | 23 | func NewEmailJob(parentID string, entry *Entry, opts ...EmailExtractJobOptions) *EmailExtractJob { 24 | const ( 25 | defaultPrio = scrapemate.PriorityHigh 26 | defaultMaxRetries = 0 27 | ) 28 | 29 | job := EmailExtractJob{ 30 | Job: scrapemate.Job{ 31 | ID: uuid.New().String(), 32 | ParentID: parentID, 33 | Method: "GET", 34 | URL: entry.WebSite, 35 | MaxRetries: defaultMaxRetries, 36 | Priority: defaultPrio, 37 | }, 38 | } 39 | 40 | job.Entry = entry 41 | 42 | for _, opt := range opts { 43 | opt(&job) 44 | } 45 | 46 | return &job 47 | } 48 | 49 | func WithEmailJobExitMonitor(exitMonitor exiter.Exiter) EmailExtractJobOptions { 50 | return func(j *EmailExtractJob) { 51 | j.ExitMonitor = exitMonitor 52 | } 53 | } 54 | 55 | func (j *EmailExtractJob) Process(ctx context.Context, resp *scrapemate.Response) (any, []scrapemate.IJob, error) { 56 | defer func() { 57 | resp.Document = nil 58 | resp.Body = nil 59 | }() 60 | 61 | defer func() { 62 | if j.ExitMonitor != nil { 63 | j.ExitMonitor.IncrPlacesCompleted(1) 64 | } 65 | }() 66 | 67 | log := scrapemate.GetLoggerFromContext(ctx) 68 | 69 | log.Info("Processing email job", "url", j.URL) 70 | 71 | // if html fetch failed just return 72 | if resp.Error != nil { 73 | return j.Entry, nil, nil 74 | } 75 | 76 | doc, ok := resp.Document.(*goquery.Document) 77 | if !ok { 78 | return j.Entry, nil, nil 79 | } 80 | 81 | emails := docEmailExtractor(doc) 82 | if len(emails) == 0 { 83 | emails = regexEmailExtractor(resp.Body) 84 | } 85 | 86 | j.Entry.Emails = emails 87 | 88 | return j.Entry, nil, nil 89 | } 90 | 91 | func (j *EmailExtractJob) ProcessOnFetchError() bool { 92 | return true 93 | } 94 | 95 | func docEmailExtractor(doc *goquery.Document) []string { 96 | seen := map[string]bool{} 97 | 98 | var emails []string 99 | 100 | doc.Find("a[href^='mailto:']").Each(func(_ int, s *goquery.Selection) { 101 | mailto, exists := s.Attr("href") 102 | if exists { 103 | value := strings.TrimPrefix(mailto, "mailto:") 104 | if email, err := getValidEmail(value); err == nil { 105 | if !seen[email] { 106 | emails = append(emails, email) 107 | seen[email] = true 108 | } 109 | } 110 | } 111 | }) 112 | 113 | return emails 114 | } 115 | 116 | func regexEmailExtractor(body []byte) []string { 117 | seen := map[string]bool{} 118 | 119 | var emails []string 120 | 121 | addresses := emailaddress.Find(body, false) 122 | for i := range addresses { 123 | if !seen[addresses[i].String()] { 124 | emails = append(emails, addresses[i].String()) 125 | seen[addresses[i].String()] = true 126 | } 127 | } 128 | 129 | return emails 130 | } 131 | 132 | func getValidEmail(s string) (string, error) { 133 | email, err := emailaddress.Parse(strings.TrimSpace(s)) 134 | if err != nil { 135 | return "", err 136 | } 137 | 138 | return email.String(), nil 139 | } 140 | -------------------------------------------------------------------------------- /gmaps/entry.go: -------------------------------------------------------------------------------- 1 | package gmaps 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "iter" 7 | "math" 8 | "runtime/debug" 9 | "slices" 10 | "strconv" 11 | "strings" 12 | ) 13 | 14 | type Image struct { 15 | Title string `json:"title"` 16 | Image string `json:"image"` 17 | } 18 | 19 | type LinkSource struct { 20 | Link string `json:"link"` 21 | Source string `json:"source"` 22 | } 23 | 24 | type Owner struct { 25 | ID string `json:"id"` 26 | Name string `json:"name"` 27 | Link string `json:"link"` 28 | } 29 | 30 | type Address struct { 31 | Borough string `json:"borough"` 32 | Street string `json:"street"` 33 | City string `json:"city"` 34 | PostalCode string `json:"postal_code"` 35 | State string `json:"state"` 36 | Country string `json:"country"` 37 | } 38 | 39 | type Option struct { 40 | Name string `json:"name"` 41 | Enabled bool `json:"enabled"` 42 | } 43 | 44 | type About struct { 45 | ID string `json:"id"` 46 | Name string `json:"name"` 47 | Options []Option `json:"options"` 48 | } 49 | 50 | type Review struct { 51 | Name string 52 | ProfilePicture string 53 | Rating int 54 | Description string 55 | Images []string 56 | When string 57 | } 58 | 59 | type Entry struct { 60 | ID string `json:"input_id"` 61 | Link string `json:"link"` 62 | Cid string `json:"cid"` 63 | Title string `json:"title"` 64 | Categories []string `json:"categories"` 65 | Category string `json:"category"` 66 | Address string `json:"address"` 67 | OpenHours map[string][]string `json:"open_hours"` 68 | // PopularTImes is a map with keys the days of the week 69 | // and value is a map with key the hour and value the traffic in that time 70 | PopularTimes map[string]map[int]int `json:"popular_times"` 71 | WebSite string `json:"web_site"` 72 | Phone string `json:"phone"` 73 | PlusCode string `json:"plus_code"` 74 | ReviewCount int `json:"review_count"` 75 | ReviewRating float64 `json:"review_rating"` 76 | ReviewsPerRating map[int]int `json:"reviews_per_rating"` 77 | Latitude float64 `json:"latitude"` 78 | Longtitude float64 `json:"longtitude"` 79 | Status string `json:"status"` 80 | Description string `json:"description"` 81 | ReviewsLink string `json:"reviews_link"` 82 | Thumbnail string `json:"thumbnail"` 83 | Timezone string `json:"timezone"` 84 | PriceRange string `json:"price_range"` 85 | DataID string `json:"data_id"` 86 | Images []Image `json:"images"` 87 | Reservations []LinkSource `json:"reservations"` 88 | OrderOnline []LinkSource `json:"order_online"` 89 | Menu LinkSource `json:"menu"` 90 | Owner Owner `json:"owner"` 91 | CompleteAddress Address `json:"complete_address"` 92 | About []About `json:"about"` 93 | UserReviews []Review `json:"user_reviews"` 94 | UserReviewsExtended []Review `json:"user_reviews_extended"` 95 | Emails []string `json:"emails"` 96 | } 97 | 98 | func (e *Entry) haversineDistance(lat, lon float64) float64 { 99 | const R = 6371e3 // earth radius in meters 100 | 101 | clat := lat * math.Pi / 180 102 | clon := lon * math.Pi / 180 103 | 104 | elat := e.Latitude * math.Pi / 180 105 | elon := e.Longtitude * math.Pi / 180 106 | 107 | dlat := elat - clat 108 | dlon := elon - clon 109 | 110 | a := math.Sin(dlat/2)*math.Sin(dlat/2) + 111 | math.Cos(clat)*math.Cos(elat)* 112 | math.Sin(dlon/2)*math.Sin(dlon/2) 113 | 114 | c := 2 * math.Atan2(math.Sqrt(a), math.Sqrt(1-a)) 115 | 116 | return R * c 117 | } 118 | 119 | func (e *Entry) isWithinRadius(lat, lon, radius float64) bool { 120 | distance := e.haversineDistance(lat, lon) 121 | 122 | return distance <= radius 123 | } 124 | 125 | func (e *Entry) IsWebsiteValidForEmail() bool { 126 | if e.WebSite == "" { 127 | return false 128 | } 129 | 130 | needles := []string{ 131 | "facebook", 132 | "instragram", 133 | "twitter", 134 | } 135 | 136 | for i := range needles { 137 | if strings.Contains(e.WebSite, needles[i]) { 138 | return false 139 | } 140 | } 141 | 142 | return true 143 | } 144 | 145 | func (e *Entry) Validate() error { 146 | if e.Title == "" { 147 | return fmt.Errorf("title is empty") 148 | } 149 | 150 | if e.Category == "" { 151 | return fmt.Errorf("category is empty") 152 | } 153 | 154 | return nil 155 | } 156 | 157 | func (e *Entry) CsvHeaders() []string { 158 | return []string{ 159 | "input_id", 160 | "link", 161 | "title", 162 | "category", 163 | "address", 164 | "open_hours", 165 | "popular_times", 166 | "website", 167 | "phone", 168 | "plus_code", 169 | "review_count", 170 | "review_rating", 171 | "reviews_per_rating", 172 | "latitude", 173 | "longitude", 174 | "cid", 175 | "status", 176 | "descriptions", 177 | "reviews_link", 178 | "thumbnail", 179 | "timezone", 180 | "price_range", 181 | "data_id", 182 | "images", 183 | "reservations", 184 | "order_online", 185 | "menu", 186 | "owner", 187 | "complete_address", 188 | "about", 189 | "user_reviews", 190 | "user_reviews_extended", 191 | "emails", 192 | } 193 | } 194 | 195 | func (e *Entry) CsvRow() []string { 196 | return []string{ 197 | e.ID, 198 | e.Link, 199 | e.Title, 200 | e.Category, 201 | e.Address, 202 | stringify(e.OpenHours), 203 | stringify(e.PopularTimes), 204 | e.WebSite, 205 | e.Phone, 206 | e.PlusCode, 207 | stringify(e.ReviewCount), 208 | stringify(e.ReviewRating), 209 | stringify(e.ReviewsPerRating), 210 | stringify(e.Latitude), 211 | stringify(e.Longtitude), 212 | e.Cid, 213 | e.Status, 214 | e.Description, 215 | e.ReviewsLink, 216 | e.Thumbnail, 217 | e.Timezone, 218 | e.PriceRange, 219 | e.DataID, 220 | stringify(e.Images), 221 | stringify(e.Reservations), 222 | stringify(e.OrderOnline), 223 | stringify(e.Menu), 224 | stringify(e.Owner), 225 | stringify(e.CompleteAddress), 226 | stringify(e.About), 227 | stringify(e.UserReviews), 228 | stringify(e.UserReviewsExtended), 229 | stringSliceToString(e.Emails), 230 | } 231 | } 232 | 233 | func (e *Entry) AddExtraReviews(pages [][]byte) { 234 | if len(pages) == 0 { 235 | return 236 | } 237 | 238 | for _, page := range pages { 239 | reviews := extractReviews(page) 240 | if len(reviews) > 0 { 241 | e.UserReviewsExtended = append(e.UserReviewsExtended, reviews...) 242 | } 243 | } 244 | } 245 | 246 | func extractReviews(data []byte) []Review { 247 | if len(data) >= 4 && string(data[0:4]) == `)]}'` { 248 | data = data[4:] // Skip security prefix 249 | } 250 | 251 | var jd []any 252 | if err := json.Unmarshal(data, &jd); err != nil { 253 | fmt.Printf("Error unmarshalling JSON: %v\n", err) 254 | return nil 255 | } 256 | 257 | reviewsI := getNthElementAndCast[[]any](jd, 2) 258 | 259 | return parseReviews(reviewsI) 260 | } 261 | 262 | //nolint:gomnd // it's ok, I need the indexes 263 | func EntryFromJSON(raw []byte, reviewCountOnly ...bool) (entry Entry, err error) { 264 | defer func() { 265 | if r := recover(); r != nil { 266 | err = fmt.Errorf("recovered from panic: %v stack: %s", r, debug.Stack()) 267 | 268 | return 269 | } 270 | }() 271 | 272 | onlyReviewCount := false 273 | 274 | if len(reviewCountOnly) == 1 && reviewCountOnly[0] { 275 | onlyReviewCount = true 276 | } 277 | 278 | var jd []any 279 | if err := json.Unmarshal(raw, &jd); err != nil { 280 | return entry, err 281 | } 282 | 283 | if len(jd) < 7 { 284 | return entry, fmt.Errorf("invalid json") 285 | } 286 | 287 | darray, ok := jd[6].([]any) 288 | if !ok { 289 | return entry, fmt.Errorf("invalid json") 290 | } 291 | 292 | entry.ReviewCount = int(getNthElementAndCast[float64](darray, 4, 8)) 293 | 294 | if onlyReviewCount { 295 | return entry, nil 296 | } 297 | 298 | entry.Link = getNthElementAndCast[string](darray, 27) 299 | entry.Title = getNthElementAndCast[string](darray, 11) 300 | 301 | categoriesI := getNthElementAndCast[[]any](darray, 13) 302 | 303 | entry.Categories = make([]string, len(categoriesI)) 304 | for i := range categoriesI { 305 | entry.Categories[i], _ = categoriesI[i].(string) 306 | } 307 | 308 | if len(entry.Categories) > 0 { 309 | entry.Category = entry.Categories[0] 310 | } 311 | 312 | entry.Address = strings.TrimSpace( 313 | strings.TrimPrefix(getNthElementAndCast[string](darray, 18), entry.Title+","), 314 | ) 315 | entry.OpenHours = getHours(darray) 316 | entry.PopularTimes = getPopularTimes(darray) 317 | entry.WebSite = getNthElementAndCast[string](darray, 7, 0) 318 | entry.Phone = getNthElementAndCast[string](darray, 178, 0, 0) 319 | entry.PlusCode = getNthElementAndCast[string](darray, 183, 2, 2, 0) 320 | entry.ReviewRating = getNthElementAndCast[float64](darray, 4, 7) 321 | entry.Latitude = getNthElementAndCast[float64](darray, 9, 2) 322 | entry.Longtitude = getNthElementAndCast[float64](darray, 9, 3) 323 | entry.Cid = getNthElementAndCast[string](jd, 25, 3, 0, 13, 0, 0, 1) 324 | entry.Status = getNthElementAndCast[string](darray, 34, 4, 4) 325 | entry.Description = getNthElementAndCast[string](darray, 32, 1, 1) 326 | entry.ReviewsLink = getNthElementAndCast[string](darray, 4, 3, 0) 327 | entry.Thumbnail = getNthElementAndCast[string](darray, 72, 0, 1, 6, 0) 328 | entry.Timezone = getNthElementAndCast[string](darray, 30) 329 | entry.PriceRange = getNthElementAndCast[string](darray, 4, 2) 330 | entry.DataID = getNthElementAndCast[string](darray, 10) 331 | 332 | items := getLinkSource(getLinkSourceParams{ 333 | arr: getNthElementAndCast[[]any](darray, 171, 0), 334 | link: []int{3, 0, 6, 0}, 335 | source: []int{2}, 336 | }) 337 | 338 | entry.Images = make([]Image, len(items)) 339 | 340 | for i := range items { 341 | entry.Images[i] = Image{ 342 | Title: items[i].Source, 343 | Image: items[i].Link, 344 | } 345 | } 346 | 347 | entry.Reservations = getLinkSource(getLinkSourceParams{ 348 | arr: getNthElementAndCast[[]any](darray, 46), 349 | link: []int{0}, 350 | source: []int{1}, 351 | }) 352 | 353 | orderOnlineI := getNthElementAndCast[[]any](darray, 75, 0, 1, 2) 354 | 355 | if len(orderOnlineI) == 0 { 356 | orderOnlineI = getNthElementAndCast[[]any](darray, 75, 0, 0, 2) 357 | } 358 | 359 | entry.OrderOnline = getLinkSource(getLinkSourceParams{ 360 | arr: orderOnlineI, 361 | link: []int{1, 2, 0}, 362 | source: []int{0, 0}, 363 | }) 364 | 365 | entry.Menu = LinkSource{ 366 | Link: getNthElementAndCast[string](darray, 38, 0), 367 | Source: getNthElementAndCast[string](darray, 38, 1), 368 | } 369 | 370 | entry.Owner = Owner{ 371 | ID: getNthElementAndCast[string](darray, 57, 2), 372 | Name: getNthElementAndCast[string](darray, 57, 1), 373 | } 374 | 375 | if entry.Owner.ID != "" { 376 | entry.Owner.Link = fmt.Sprintf("https://www.google.com/maps/contrib/%s", entry.Owner.ID) 377 | } 378 | 379 | entry.CompleteAddress = Address{ 380 | Borough: getNthElementAndCast[string](darray, 183, 1, 0), 381 | Street: getNthElementAndCast[string](darray, 183, 1, 1), 382 | City: getNthElementAndCast[string](darray, 183, 1, 3), 383 | PostalCode: getNthElementAndCast[string](darray, 183, 1, 4), 384 | State: getNthElementAndCast[string](darray, 183, 1, 5), 385 | Country: getNthElementAndCast[string](darray, 183, 1, 6), 386 | } 387 | 388 | aboutI := getNthElementAndCast[[]any](darray, 100, 1) 389 | 390 | for i := range aboutI { 391 | el := getNthElementAndCast[[]any](aboutI, i) 392 | about := About{ 393 | ID: getNthElementAndCast[string](el, 0), 394 | Name: getNthElementAndCast[string](el, 1), 395 | } 396 | 397 | optsI := getNthElementAndCast[[]any](el, 2) 398 | 399 | for j := range optsI { 400 | opt := Option{ 401 | Enabled: (getNthElementAndCast[float64](optsI, j, 2, 1, 0, 0)) == 1, 402 | Name: getNthElementAndCast[string](optsI, j, 1), 403 | } 404 | 405 | if opt.Name != "" { 406 | about.Options = append(about.Options, opt) 407 | } 408 | } 409 | 410 | entry.About = append(entry.About, about) 411 | } 412 | 413 | entry.ReviewsPerRating = map[int]int{ 414 | 1: int(getNthElementAndCast[float64](darray, 175, 3, 0)), 415 | 2: int(getNthElementAndCast[float64](darray, 175, 3, 1)), 416 | 3: int(getNthElementAndCast[float64](darray, 175, 3, 2)), 417 | 4: int(getNthElementAndCast[float64](darray, 175, 3, 3)), 418 | 5: int(getNthElementAndCast[float64](darray, 175, 3, 4)), 419 | } 420 | 421 | reviewsI := getNthElementAndCast[[]any](darray, 175, 9, 0, 0) 422 | entry.UserReviews = make([]Review, 0, len(reviewsI)) 423 | 424 | return entry, nil 425 | } 426 | 427 | func parseReviews(reviewsI []any) []Review { 428 | ans := make([]Review, 0, len(reviewsI)) 429 | 430 | for i := range reviewsI { 431 | el := getNthElementAndCast[[]any](reviewsI, i, 0) 432 | 433 | time := getNthElementAndCast[[]any](el, 2, 2, 0, 1, 21, 6, 8) 434 | 435 | profilePic, err := decodeURL(getNthElementAndCast[string](el, 1, 4, 5, 1)) 436 | if err != nil { 437 | profilePic = "" 438 | } 439 | 440 | review := Review{ 441 | Name: getNthElementAndCast[string](el, 1, 4, 5, 0), 442 | ProfilePicture: profilePic, 443 | When: func() string { 444 | if len(time) < 3 { 445 | return "" 446 | } 447 | 448 | return fmt.Sprintf("%v-%v-%v", time[0], time[1], time[2]) 449 | }(), 450 | Rating: int(getNthElementAndCast[float64](el, 2, 0, 0)), 451 | Description: getNthElementAndCast[string](el, 2, 15, 0, 0), 452 | } 453 | 454 | if review.Name == "" { 455 | continue 456 | } 457 | 458 | optsI := getNthElementAndCast[[]any](el, 2, 2, 0, 1, 21, 7) 459 | 460 | for j := range optsI { 461 | val := getNthElementAndCast[string](optsI, j) 462 | if val != "" { 463 | review.Images = append(review.Images, val[2:]) 464 | } 465 | } 466 | 467 | ans = append(ans, review) 468 | } 469 | 470 | return ans 471 | } 472 | 473 | type getLinkSourceParams struct { 474 | arr []any 475 | source []int 476 | link []int 477 | } 478 | 479 | func getLinkSource(params getLinkSourceParams) []LinkSource { 480 | var result []LinkSource 481 | 482 | for i := range params.arr { 483 | item := getNthElementAndCast[[]any](params.arr, i) 484 | 485 | el := LinkSource{ 486 | Source: getNthElementAndCast[string](item, params.source...), 487 | Link: getNthElementAndCast[string](item, params.link...), 488 | } 489 | if el.Link != "" && el.Source != "" { 490 | result = append(result, el) 491 | } 492 | } 493 | 494 | return result 495 | } 496 | 497 | //nolint:gomnd // it's ok, I need the indexes 498 | func getHours(darray []any) map[string][]string { 499 | items := getNthElementAndCast[[]any](darray, 34, 1) 500 | hours := make(map[string][]string, len(items)) 501 | 502 | for _, item := range items { 503 | //nolint:errcheck // it's ok, I'm "sure" the indexes are correct 504 | day := getNthElementAndCast[string](item.([]any), 0) 505 | //nolint:errcheck // it's ok, I'm "sure" the indexes are correct 506 | timesI := getNthElementAndCast[[]any](item.([]any), 1) 507 | times := make([]string, len(timesI)) 508 | 509 | for i := range timesI { 510 | times[i], _ = timesI[i].(string) 511 | } 512 | 513 | hours[day] = times 514 | } 515 | 516 | return hours 517 | } 518 | 519 | func getPopularTimes(darray []any) map[string]map[int]int { 520 | items := getNthElementAndCast[[]any](darray, 84, 0) //nolint:gomnd // it's ok, I need the indexes 521 | popularTimes := make(map[string]map[int]int, len(items)) 522 | 523 | dayOfWeek := map[int]string{ 524 | 1: "Monday", 525 | 2: "Tuesday", 526 | 3: "Wednesday", 527 | 4: "Thursday", 528 | 5: "Friday", 529 | 6: "Saturday", 530 | 7: "Sunday", 531 | } 532 | 533 | for ii := range items { 534 | item, ok := items[ii].([]any) 535 | if !ok { 536 | return nil 537 | } 538 | 539 | day := int(getNthElementAndCast[float64](item, 0)) 540 | 541 | timesI := getNthElementAndCast[[]any](item, 1) 542 | 543 | times := make(map[int]int, len(timesI)) 544 | 545 | for i := range timesI { 546 | t, ok := timesI[i].([]any) 547 | if !ok { 548 | return nil 549 | } 550 | 551 | v, ok := t[1].(float64) 552 | if !ok { 553 | return nil 554 | } 555 | 556 | h, ok := t[0].(float64) 557 | if !ok { 558 | return nil 559 | } 560 | 561 | times[int(h)] = int(v) 562 | } 563 | 564 | popularTimes[dayOfWeek[day]] = times 565 | } 566 | 567 | return popularTimes 568 | } 569 | 570 | func getNthElementAndCast[T any](arr []any, indexes ...int) T { 571 | var ( 572 | defaultVal T 573 | idx int 574 | ) 575 | 576 | if len(indexes) == 0 { 577 | return defaultVal 578 | } 579 | 580 | for len(indexes) > 1 { 581 | idx, indexes = indexes[0], indexes[1:] 582 | 583 | if idx >= len(arr) { 584 | return defaultVal 585 | } 586 | 587 | next := arr[idx] 588 | 589 | if next == nil { 590 | return defaultVal 591 | } 592 | 593 | var ok bool 594 | 595 | arr, ok = next.([]any) 596 | if !ok { 597 | return defaultVal 598 | } 599 | } 600 | 601 | if len(indexes) == 0 || len(arr) == 0 { 602 | return defaultVal 603 | } 604 | 605 | ans, ok := arr[indexes[0]].(T) 606 | if !ok { 607 | return defaultVal 608 | } 609 | 610 | return ans 611 | } 612 | 613 | func stringSliceToString(s []string) string { 614 | return strings.Join(s, ", ") 615 | } 616 | 617 | func stringify(v any) string { 618 | switch val := v.(type) { 619 | case string: 620 | return val 621 | case float64: 622 | return fmt.Sprintf("%f", val) 623 | case nil: 624 | return "" 625 | default: 626 | d, _ := json.Marshal(v) 627 | return string(d) 628 | } 629 | } 630 | 631 | func decodeURL(url string) (string, error) { 632 | quoted := `"` + strings.ReplaceAll(url, `"`, `\"`) + `"` 633 | 634 | unquoted, err := strconv.Unquote(quoted) 635 | if err != nil { 636 | return "", fmt.Errorf("failed to decode URL: %v", err) 637 | } 638 | 639 | return unquoted, nil 640 | } 641 | 642 | type EntryWithDistance struct { 643 | Entry *Entry 644 | Distance float64 645 | } 646 | 647 | func filterAndSortEntriesWithinRadius(entries []*Entry, lat, lon, radius float64) []*Entry { 648 | withinRadiusIterator := func(yield func(EntryWithDistance) bool) { 649 | for _, entry := range entries { 650 | distance := entry.haversineDistance(lat, lon) 651 | if distance <= radius { 652 | if !yield(EntryWithDistance{Entry: entry, Distance: distance}) { 653 | return 654 | } 655 | } 656 | } 657 | } 658 | 659 | entriesWithDistance := slices.Collect(iter.Seq[EntryWithDistance](withinRadiusIterator)) 660 | 661 | slices.SortFunc(entriesWithDistance, func(a, b EntryWithDistance) int { 662 | switch { 663 | case a.Distance < b.Distance: 664 | return -1 665 | case a.Distance > b.Distance: 666 | return 1 667 | default: 668 | return 0 669 | } 670 | }) 671 | 672 | resultIterator := func(yield func(*Entry) bool) { 673 | for _, e := range entriesWithDistance { 674 | if !yield(e.Entry) { 675 | return 676 | } 677 | } 678 | } 679 | 680 | return slices.Collect(iter.Seq[*Entry](resultIterator)) 681 | } 682 | -------------------------------------------------------------------------------- /gmaps/entry_test.go: -------------------------------------------------------------------------------- 1 | package gmaps_test 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "testing" 7 | 8 | "github.com/PuerkitoBio/goquery" 9 | "github.com/stretchr/testify/require" 10 | 11 | "github.com/gosom/google-maps-scraper/gmaps" 12 | ) 13 | 14 | func createGoQueryFromFile(t *testing.T, path string) *goquery.Document { 15 | t.Helper() 16 | 17 | fd, err := os.Open(path) 18 | require.NoError(t, err) 19 | 20 | defer fd.Close() 21 | 22 | doc, err := goquery.NewDocumentFromReader(fd) 23 | require.NoError(t, err) 24 | 25 | return doc 26 | } 27 | 28 | func Test_EntryFromJSON(t *testing.T) { 29 | expected := gmaps.Entry{ 30 | Link: "https://www.google.com/maps/place/Kipriakon/data=!4m2!3m1!1s0x14e732fd76f0d90d:0xe5415928d6702b47!10m1!1e1", 31 | Title: "Kipriakon", 32 | Category: "Restaurant", 33 | Categories: []string{"Restaurant"}, 34 | Address: "Old port, Limassol 3042", 35 | OpenHours: map[string][]string{ 36 | "Monday": {"12:30–10 pm"}, 37 | "Tuesday": {"12:30–10 pm"}, 38 | "Wednesday": {"12:30–10 pm"}, 39 | "Thursday": {"12:30–10 pm"}, 40 | "Friday": {"12:30–10 pm"}, 41 | "Saturday": {"12:30–10 pm"}, 42 | "Sunday": {"12:30–10 pm"}, 43 | }, 44 | WebSite: "", 45 | Phone: "25 101555", 46 | PlusCode: "M2CR+6X Limassol", 47 | ReviewCount: 396, 48 | ReviewRating: 4.2, 49 | Latitude: 34.670595399999996, 50 | Longtitude: 33.042456699999995, 51 | Cid: "16519582940102929223", 52 | Status: "Closed ⋅ Opens 12:30\u202fpm Tue", 53 | ReviewsLink: "https://search.google.com/local/reviews?placeid=ChIJDdnwdv0y5xQRRytw1ihZQeU&q=Kipriakon&authuser=0&hl=en&gl=CY", 54 | Thumbnail: "https://lh5.googleusercontent.com/p/AF1QipP4Y7A8nYL3KKXznSl69pXSq9p2IXCYUjVvOh0F=w408-h408-k-no", 55 | Timezone: "Asia/Nicosia", 56 | PriceRange: "€€", 57 | DataID: "0x14e732fd76f0d90d:0xe5415928d6702b47", 58 | Images: []gmaps.Image{ 59 | { 60 | Title: "All", 61 | Image: "https://lh5.googleusercontent.com/p/AF1QipP4Y7A8nYL3KKXznSl69pXSq9p2IXCYUjVvOh0F=w298-h298-k-no", 62 | }, 63 | { 64 | Title: "Latest", 65 | Image: "https://lh5.googleusercontent.com/p/AF1QipNgMqyaQs2MqH1oiGC44eDcvudurxQfNb2RuDsd=w224-h298-k-no", 66 | }, 67 | { 68 | Title: "Videos", 69 | Image: "https://lh5.googleusercontent.com/p/AF1QipPZbq8v8K8RZfvL6gZ_4Dw6qwNJ_MUxxOOfBo7h=w224-h398-k-no", 70 | }, 71 | { 72 | Title: "Menu", 73 | Image: "https://lh5.googleusercontent.com/p/AF1QipNhoFtPcaLCIhdN3GhlJ6sQIvdhaESnRG8nyeC8=w397-h298-k-no", 74 | }, 75 | { 76 | Title: "Food & drink", 77 | Image: "https://lh5.googleusercontent.com/p/AF1QipMbu-iiWkE4DsXx3aI7nGaqyXJKbBYCrBXvzOnu=w298-h298-k-no", 78 | }, 79 | { 80 | Title: "Vibe", 81 | Image: "https://lh5.googleusercontent.com/p/AF1QipOGg_vrD4bzkOre5Ly6CFXuO3YCOGfFxQ-EiEkW=w224-h398-k-no", 82 | }, 83 | { 84 | Title: "Fried green tomatoes", 85 | Image: "https://lh5.googleusercontent.com/p/AF1QipOziHd2hqM1jnK9KfCGf1zVhcOrx8Bj7VdJXj0=w397-h298-k-no", 86 | }, 87 | { 88 | Title: "French fries", 89 | Image: "https://lh5.googleusercontent.com/p/AF1QipNJyq7nAlKtsxxbNy4PHUZOhJ0k7HPP8tTAlwcV=w397-h298-k-no", 90 | }, 91 | { 92 | Title: "By owner", 93 | Image: "https://lh5.googleusercontent.com/p/AF1QipNRE2R5k13zT-0WG4b6XOD_BES9-nMK04hlCMVV=w298-h298-k-no", 94 | }, 95 | { 96 | Title: "Street View & 360°", 97 | Image: "https://lh5.googleusercontent.com/p/AF1QipMwkHP8GmDCSuwnWS7pYVQvtDWdsdk-CUwxtsXL=w224-h298-k-no-pi-23.425545-ya289.20517-ro-8.658787-fo100", 98 | }, 99 | }, 100 | OrderOnline: []gmaps.LinkSource{ 101 | { 102 | Link: "https://foody.com.cy/delivery/lemesos/to-kypriakon?utm_source=google&utm_medium=organic&utm_campaign=google_reserve_place_order_action", 103 | Source: "foody.com.cy", 104 | }, 105 | { 106 | Link: "https://wolt.com/en/cyp/limassol/restaurant/kypriakon?utm_source=googlemapreserved&utm_campaign=kypriakon", 107 | Source: "wolt.com", 108 | }, 109 | }, 110 | Owner: gmaps.Owner{ 111 | ID: "102769814432182832009", 112 | Name: "Kipriakon (Owner)", 113 | Link: "https://www.google.com/maps/contrib/102769814432182832009", 114 | }, 115 | CompleteAddress: gmaps.Address{ 116 | Borough: "", 117 | Street: "Old port", 118 | City: "Limassol", 119 | PostalCode: "3042", 120 | State: "", 121 | Country: "CY", 122 | }, 123 | ReviewsPerRating: map[int]int{ 124 | 1: 37, 125 | 2: 16, 126 | 3: 27, 127 | 4: 60, 128 | 5: 256, 129 | }, 130 | } 131 | 132 | raw, err := os.ReadFile("../testdata/raw.json") 133 | require.NoError(t, err) 134 | require.NotEmpty(t, raw) 135 | 136 | entry, err := gmaps.EntryFromJSON(raw) 137 | require.NoError(t, err) 138 | 139 | require.Len(t, entry.About, 10) 140 | 141 | for _, about := range entry.About { 142 | require.NotEmpty(t, about.ID) 143 | require.NotEmpty(t, about.Name) 144 | require.NotEmpty(t, about.Options) 145 | } 146 | 147 | entry.About = nil 148 | 149 | require.Len(t, entry.PopularTimes, 7) 150 | 151 | for k, v := range entry.PopularTimes { 152 | require.Contains(t, 153 | []string{ 154 | "Monday", 155 | "Tuesday", 156 | "Wednesday", 157 | "Thursday", 158 | "Friday", 159 | "Saturday", 160 | "Sunday", 161 | }, k) 162 | 163 | for _, traffic := range v { 164 | require.GreaterOrEqual(t, traffic, 0) 165 | require.LessOrEqual(t, traffic, 100) 166 | } 167 | } 168 | 169 | monday := entry.PopularTimes["Monday"] 170 | require.Equal(t, 100, monday[20]) 171 | 172 | entry.PopularTimes = nil 173 | entry.UserReviews = nil 174 | 175 | require.Equal(t, expected, entry) 176 | } 177 | 178 | func Test_EntryFromJSON2(t *testing.T) { 179 | fnames := []string{ 180 | "../testdata/panic.json", 181 | "../testdata/panic2.json", 182 | } 183 | for _, fname := range fnames { 184 | raw, err := os.ReadFile(fname) 185 | require.NoError(t, err) 186 | require.NotEmpty(t, raw) 187 | 188 | _, err = gmaps.EntryFromJSON(raw) 189 | require.NoError(t, err) 190 | } 191 | } 192 | 193 | func Test_EntryFromJSONRaw2(t *testing.T) { 194 | raw, err := os.ReadFile("../testdata/raw2.json") 195 | 196 | require.NoError(t, err) 197 | require.NotEmpty(t, raw) 198 | 199 | entry, err := gmaps.EntryFromJSON(raw) 200 | 201 | require.NoError(t, err) 202 | require.Greater(t, len(entry.About), 0) 203 | } 204 | 205 | func Test_EntryFromJsonC(t *testing.T) { 206 | raw, err := os.ReadFile("../testdata/output.json") 207 | 208 | require.NoError(t, err) 209 | require.NotEmpty(t, raw) 210 | 211 | entries, err := gmaps.ParseSearchResults(raw) 212 | 213 | require.NoError(t, err) 214 | 215 | for _, entry := range entries { 216 | fmt.Printf("%+v\n", entry) 217 | } 218 | } 219 | -------------------------------------------------------------------------------- /gmaps/job.go: -------------------------------------------------------------------------------- 1 | package gmaps 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "net/http" 7 | "net/url" 8 | "strings" 9 | "time" 10 | 11 | "github.com/PuerkitoBio/goquery" 12 | "github.com/google/uuid" 13 | "github.com/gosom/google-maps-scraper/deduper" 14 | "github.com/gosom/google-maps-scraper/exiter" 15 | "github.com/gosom/scrapemate" 16 | "github.com/playwright-community/playwright-go" 17 | ) 18 | 19 | type GmapJobOptions func(*GmapJob) 20 | 21 | type GmapJob struct { 22 | scrapemate.Job 23 | 24 | MaxDepth int 25 | LangCode string 26 | ExtractEmail bool 27 | 28 | Deduper deduper.Deduper 29 | ExitMonitor exiter.Exiter 30 | ExtractExtraReviews bool 31 | } 32 | 33 | func NewGmapJob( 34 | id, langCode, query string, 35 | maxDepth int, 36 | extractEmail bool, 37 | geoCoordinates string, 38 | zoom int, 39 | opts ...GmapJobOptions, 40 | ) *GmapJob { 41 | query = url.QueryEscape(query) 42 | 43 | const ( 44 | maxRetries = 3 45 | prio = scrapemate.PriorityLow 46 | ) 47 | 48 | if id == "" { 49 | id = uuid.New().String() 50 | } 51 | 52 | mapURL := "" 53 | if geoCoordinates != "" && zoom > 0 { 54 | mapURL = fmt.Sprintf("https://www.google.com/maps/search/%s/@%s,%dz", query, strings.ReplaceAll(geoCoordinates, " ", ""), zoom) 55 | } else { 56 | //Warning: geo and zoom MUST be both set or not 57 | mapURL = fmt.Sprintf("https://www.google.com/maps/search/%s", query) 58 | } 59 | 60 | job := GmapJob{ 61 | Job: scrapemate.Job{ 62 | ID: id, 63 | Method: http.MethodGet, 64 | URL: mapURL, 65 | URLParams: map[string]string{"hl": langCode}, 66 | MaxRetries: maxRetries, 67 | Priority: prio, 68 | }, 69 | MaxDepth: maxDepth, 70 | LangCode: langCode, 71 | ExtractEmail: extractEmail, 72 | } 73 | 74 | for _, opt := range opts { 75 | opt(&job) 76 | } 77 | 78 | return &job 79 | } 80 | 81 | func WithDeduper(d deduper.Deduper) GmapJobOptions { 82 | return func(j *GmapJob) { 83 | j.Deduper = d 84 | } 85 | } 86 | 87 | func WithExitMonitor(e exiter.Exiter) GmapJobOptions { 88 | return func(j *GmapJob) { 89 | j.ExitMonitor = e 90 | } 91 | } 92 | 93 | func WithExtraReviews() GmapJobOptions { 94 | return func(j *GmapJob) { 95 | j.ExtractExtraReviews = true 96 | } 97 | } 98 | 99 | func (j *GmapJob) UseInResults() bool { 100 | return false 101 | } 102 | 103 | func (j *GmapJob) Process(ctx context.Context, resp *scrapemate.Response) (any, []scrapemate.IJob, error) { 104 | defer func() { 105 | resp.Document = nil 106 | resp.Body = nil 107 | }() 108 | 109 | log := scrapemate.GetLoggerFromContext(ctx) 110 | 111 | doc, ok := resp.Document.(*goquery.Document) 112 | if !ok { 113 | return nil, nil, fmt.Errorf("could not convert to goquery document") 114 | } 115 | 116 | var next []scrapemate.IJob 117 | 118 | if strings.Contains(resp.URL, "/maps/place/") { 119 | jopts := []PlaceJobOptions{} 120 | if j.ExitMonitor != nil { 121 | jopts = append(jopts, WithPlaceJobExitMonitor(j.ExitMonitor)) 122 | } 123 | 124 | placeJob := NewPlaceJob(j.ID, j.LangCode, resp.URL, j.ExtractEmail, j.ExtractExtraReviews, jopts...) 125 | 126 | next = append(next, placeJob) 127 | } else { 128 | doc.Find(`div[role=feed] div[jsaction]>a`).Each(func(_ int, s *goquery.Selection) { 129 | if href := s.AttrOr("href", ""); href != "" { 130 | jopts := []PlaceJobOptions{} 131 | if j.ExitMonitor != nil { 132 | jopts = append(jopts, WithPlaceJobExitMonitor(j.ExitMonitor)) 133 | } 134 | 135 | nextJob := NewPlaceJob(j.ID, j.LangCode, href, j.ExtractEmail, j.ExtractExtraReviews, jopts...) 136 | 137 | if j.Deduper == nil || j.Deduper.AddIfNotExists(ctx, href) { 138 | next = append(next, nextJob) 139 | } 140 | } 141 | }) 142 | } 143 | 144 | if j.ExitMonitor != nil { 145 | j.ExitMonitor.IncrPlacesFound(len(next)) 146 | j.ExitMonitor.IncrSeedCompleted(1) 147 | } 148 | 149 | log.Info(fmt.Sprintf("%d places found", len(next))) 150 | 151 | return nil, next, nil 152 | } 153 | 154 | func (j *GmapJob) BrowserActions(ctx context.Context, page playwright.Page) scrapemate.Response { 155 | var resp scrapemate.Response 156 | 157 | pageResponse, err := page.Goto(j.GetFullURL(), playwright.PageGotoOptions{ 158 | WaitUntil: playwright.WaitUntilStateDomcontentloaded, 159 | }) 160 | 161 | if err != nil { 162 | resp.Error = err 163 | 164 | return resp 165 | } 166 | 167 | if err = clickRejectCookiesIfRequired(page); err != nil { 168 | resp.Error = err 169 | 170 | return resp 171 | } 172 | 173 | const defaultTimeout = 5000 174 | 175 | err = page.WaitForURL(page.URL(), playwright.PageWaitForURLOptions{ 176 | WaitUntil: playwright.WaitUntilStateDomcontentloaded, 177 | Timeout: playwright.Float(defaultTimeout), 178 | }) 179 | 180 | if err != nil { 181 | resp.Error = err 182 | 183 | return resp 184 | } 185 | 186 | resp.URL = pageResponse.URL() 187 | resp.StatusCode = pageResponse.Status() 188 | resp.Headers = make(http.Header, len(pageResponse.Headers())) 189 | 190 | for k, v := range pageResponse.Headers() { 191 | resp.Headers.Add(k, v) 192 | } 193 | 194 | // When Google Maps finds only 1 place, it slowly redirects to that place's URL 195 | // check element scroll 196 | sel := `div[role='feed']` 197 | 198 | //nolint:staticcheck // TODO replace with the new playwright API 199 | _, err = page.WaitForSelector(sel, playwright.PageWaitForSelectorOptions{ 200 | Timeout: playwright.Float(700), 201 | }) 202 | 203 | var singlePlace bool 204 | 205 | if err != nil { 206 | waitCtx, waitCancel := context.WithTimeout(ctx, time.Second*5) 207 | defer waitCancel() 208 | 209 | singlePlace = waitUntilURLContains(waitCtx, page, "/maps/place/") 210 | 211 | waitCancel() 212 | } 213 | 214 | if singlePlace { 215 | resp.URL = page.URL() 216 | 217 | var body string 218 | 219 | body, err = page.Content() 220 | if err != nil { 221 | resp.Error = err 222 | return resp 223 | } 224 | 225 | resp.Body = []byte(body) 226 | 227 | return resp 228 | } 229 | 230 | scrollSelector := `div[role='feed']` 231 | 232 | _, err = scroll(ctx, page, j.MaxDepth, scrollSelector) 233 | if err != nil { 234 | resp.Error = err 235 | 236 | return resp 237 | } 238 | 239 | body, err := page.Content() 240 | if err != nil { 241 | resp.Error = err 242 | return resp 243 | } 244 | 245 | resp.Body = []byte(body) 246 | 247 | return resp 248 | } 249 | 250 | func waitUntilURLContains(ctx context.Context, page playwright.Page, s string) bool { 251 | ticker := time.NewTicker(time.Millisecond * 150) 252 | defer ticker.Stop() 253 | 254 | for { 255 | select { 256 | case <-ctx.Done(): 257 | return false 258 | case <-ticker.C: 259 | if strings.Contains(page.URL(), s) { 260 | return true 261 | } 262 | } 263 | } 264 | } 265 | 266 | func clickRejectCookiesIfRequired(page playwright.Page) error { 267 | // click the cookie reject button if exists 268 | sel := `form[action="https://consent.google.com/save"]:first-of-type button:first-of-type` 269 | 270 | const timeout = 500 271 | 272 | //nolint:staticcheck // TODO replace with the new playwright API 273 | el, err := page.WaitForSelector(sel, playwright.PageWaitForSelectorOptions{ 274 | Timeout: playwright.Float(timeout), 275 | }) 276 | 277 | if err != nil { 278 | return nil 279 | } 280 | 281 | if el == nil { 282 | return nil 283 | } 284 | 285 | //nolint:staticcheck // TODO replace with the new playwright API 286 | return el.Click() 287 | } 288 | 289 | func scroll(ctx context.Context, 290 | page playwright.Page, 291 | maxDepth int, 292 | scrollSelector string, 293 | ) (int, error) { 294 | expr := `async () => { 295 | const el = document.querySelector("` + scrollSelector + `"); 296 | el.scrollTop = el.scrollHeight; 297 | 298 | return new Promise((resolve, reject) => { 299 | setTimeout(() => { 300 | resolve(el.scrollHeight); 301 | }, %d); 302 | }); 303 | }` 304 | 305 | var currentScrollHeight int 306 | // Scroll to the bottom of the page. 307 | waitTime := 100. 308 | cnt := 0 309 | 310 | const ( 311 | timeout = 500 312 | maxWait2 = 2000 313 | ) 314 | 315 | for i := 0; i < maxDepth; i++ { 316 | cnt++ 317 | waitTime2 := timeout * cnt 318 | 319 | if waitTime2 > timeout { 320 | waitTime2 = maxWait2 321 | } 322 | 323 | // Scroll to the bottom of the page. 324 | scrollHeight, err := page.Evaluate(fmt.Sprintf(expr, waitTime2)) 325 | if err != nil { 326 | return cnt, err 327 | } 328 | 329 | height, ok := scrollHeight.(int) 330 | if !ok { 331 | return cnt, fmt.Errorf("scrollHeight is not an int") 332 | } 333 | 334 | if height == currentScrollHeight { 335 | break 336 | } 337 | 338 | currentScrollHeight = height 339 | 340 | select { 341 | case <-ctx.Done(): 342 | return currentScrollHeight, nil 343 | default: 344 | } 345 | 346 | waitTime *= 1.5 347 | 348 | if waitTime > maxWait2 { 349 | waitTime = maxWait2 350 | } 351 | 352 | //nolint:staticcheck // TODO replace with the new playwright API 353 | page.WaitForTimeout(waitTime) 354 | } 355 | 356 | return cnt, nil 357 | } 358 | -------------------------------------------------------------------------------- /gmaps/multiple.go: -------------------------------------------------------------------------------- 1 | package gmaps 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "strings" 7 | 8 | olc "github.com/google/open-location-code/go" 9 | ) 10 | 11 | func ParseSearchResults(raw []byte) ([]*Entry, error) { 12 | var data []any 13 | if err := json.Unmarshal(raw, &data); err != nil { 14 | return nil, fmt.Errorf("failed to unmarshal JSON: %w", err) 15 | } 16 | 17 | if len(data) == 0 { 18 | return nil, fmt.Errorf("empty JSON data") 19 | } 20 | 21 | container, ok := data[0].([]any) 22 | if !ok || len(container) == 0 { 23 | return nil, fmt.Errorf("invalid business list structure") 24 | } 25 | 26 | items := getNthElementAndCast[[]any](container, 1) 27 | if len(items) < 2 { 28 | return nil, fmt.Errorf("empty business list") 29 | } 30 | 31 | entries := make([]*Entry, 0, len(items)-1) 32 | 33 | for i := 1; i < len(items); i++ { 34 | arr, ok := items[i].([]any) 35 | if !ok { 36 | continue 37 | } 38 | 39 | business := getNthElementAndCast[[]any](arr, 14) 40 | 41 | var entry Entry 42 | 43 | entry.ID = getNthElementAndCast[string](business, 0) 44 | entry.Title = getNthElementAndCast[string](business, 11) 45 | entry.Categories = toStringSlice(getNthElementAndCast[[]any](business, 13)) 46 | entry.WebSite = getNthElementAndCast[string](business, 7, 0) 47 | 48 | entry.ReviewRating = getNthElementAndCast[float64](business, 4, 7) 49 | entry.ReviewCount = int(getNthElementAndCast[float64](business, 4, 8)) 50 | 51 | fullAddress := getNthElementAndCast[[]any](business, 2) 52 | 53 | entry.Address = func() string { 54 | sb := strings.Builder{} 55 | 56 | for i, part := range fullAddress { 57 | if i > 0 { 58 | sb.WriteString(", ") 59 | } 60 | 61 | sb.WriteString(fmt.Sprintf("%v", part)) 62 | } 63 | 64 | return sb.String() 65 | }() 66 | 67 | entry.Latitude = getNthElementAndCast[float64](business, 9, 2) 68 | entry.Longtitude = getNthElementAndCast[float64](business, 9, 3) 69 | entry.Phone = strings.ReplaceAll(getNthElementAndCast[string](business, 178, 0, 0), " ", "") 70 | entry.OpenHours = getHours(business) 71 | entry.Status = getNthElementAndCast[string](business, 34, 4, 4) 72 | entry.Timezone = getNthElementAndCast[string](business, 30) 73 | entry.DataID = getNthElementAndCast[string](business, 10) 74 | 75 | entry.PlusCode = olc.Encode(entry.Latitude, entry.Longtitude, 10) 76 | 77 | entries = append(entries, &entry) 78 | } 79 | 80 | return entries, nil 81 | } 82 | 83 | func toStringSlice(arr []any) []string { 84 | ans := make([]string, 0, len(arr)) 85 | for _, v := range arr { 86 | ans = append(ans, fmt.Sprintf("%v", v)) 87 | } 88 | 89 | return ans 90 | } 91 | -------------------------------------------------------------------------------- /gmaps/place.go: -------------------------------------------------------------------------------- 1 | package gmaps 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "net/http" 7 | "strings" 8 | "time" 9 | 10 | "github.com/google/uuid" 11 | "github.com/gosom/google-maps-scraper/exiter" 12 | "github.com/gosom/scrapemate" 13 | "github.com/playwright-community/playwright-go" 14 | ) 15 | 16 | type PlaceJobOptions func(*PlaceJob) 17 | 18 | type PlaceJob struct { 19 | scrapemate.Job 20 | 21 | UsageInResultststs bool 22 | ExtractEmail bool 23 | ExitMonitor exiter.Exiter 24 | ExtractExtraReviews bool 25 | } 26 | 27 | func NewPlaceJob(parentID, langCode, u string, extractEmail, extraExtraReviews bool, opts ...PlaceJobOptions) *PlaceJob { 28 | const ( 29 | defaultPrio = scrapemate.PriorityMedium 30 | defaultMaxRetries = 3 31 | ) 32 | 33 | job := PlaceJob{ 34 | Job: scrapemate.Job{ 35 | ID: uuid.New().String(), 36 | ParentID: parentID, 37 | Method: "GET", 38 | URL: u, 39 | URLParams: map[string]string{"hl": langCode}, 40 | MaxRetries: defaultMaxRetries, 41 | Priority: defaultPrio, 42 | }, 43 | } 44 | 45 | job.UsageInResultststs = true 46 | job.ExtractEmail = extractEmail 47 | job.ExtractExtraReviews = extraExtraReviews 48 | 49 | for _, opt := range opts { 50 | opt(&job) 51 | } 52 | 53 | return &job 54 | } 55 | 56 | func WithPlaceJobExitMonitor(exitMonitor exiter.Exiter) PlaceJobOptions { 57 | return func(j *PlaceJob) { 58 | j.ExitMonitor = exitMonitor 59 | } 60 | } 61 | 62 | func (j *PlaceJob) Process(_ context.Context, resp *scrapemate.Response) (any, []scrapemate.IJob, error) { 63 | defer func() { 64 | resp.Document = nil 65 | resp.Body = nil 66 | resp.Meta = nil 67 | }() 68 | 69 | raw, ok := resp.Meta["json"].([]byte) 70 | if !ok { 71 | return nil, nil, fmt.Errorf("could not convert to []byte") 72 | } 73 | 74 | entry, err := EntryFromJSON(raw) 75 | if err != nil { 76 | return nil, nil, err 77 | } 78 | 79 | entry.ID = j.ParentID 80 | 81 | if entry.Link == "" { 82 | entry.Link = j.GetURL() 83 | } 84 | 85 | allReviewsRaw, ok := resp.Meta["reviews_raw"].(fetchReviewsResponse) 86 | if ok && len(allReviewsRaw.pages) > 0 { 87 | entry.AddExtraReviews(allReviewsRaw.pages) 88 | } 89 | 90 | if j.ExtractEmail && entry.IsWebsiteValidForEmail() { 91 | opts := []EmailExtractJobOptions{} 92 | if j.ExitMonitor != nil { 93 | opts = append(opts, WithEmailJobExitMonitor(j.ExitMonitor)) 94 | } 95 | 96 | emailJob := NewEmailJob(j.ID, &entry, opts...) 97 | 98 | j.UsageInResultststs = false 99 | 100 | return nil, []scrapemate.IJob{emailJob}, nil 101 | } else if j.ExitMonitor != nil { 102 | j.ExitMonitor.IncrPlacesCompleted(1) 103 | } 104 | 105 | return &entry, nil, err 106 | } 107 | 108 | func (j *PlaceJob) BrowserActions(ctx context.Context, page playwright.Page) scrapemate.Response { 109 | var resp scrapemate.Response 110 | 111 | pageResponse, err := page.Goto(j.GetURL(), playwright.PageGotoOptions{ 112 | WaitUntil: playwright.WaitUntilStateDomcontentloaded, 113 | }) 114 | 115 | if err != nil { 116 | resp.Error = err 117 | 118 | return resp 119 | } 120 | 121 | if err = clickRejectCookiesIfRequired(page); err != nil { 122 | resp.Error = err 123 | 124 | return resp 125 | } 126 | 127 | const defaultTimeout = 5000 128 | 129 | err = page.WaitForURL(page.URL(), playwright.PageWaitForURLOptions{ 130 | WaitUntil: playwright.WaitUntilStateDomcontentloaded, 131 | Timeout: playwright.Float(defaultTimeout), 132 | }) 133 | if err != nil { 134 | resp.Error = err 135 | 136 | return resp 137 | } 138 | 139 | resp.URL = pageResponse.URL() 140 | resp.StatusCode = pageResponse.Status() 141 | resp.Headers = make(http.Header, len(pageResponse.Headers())) 142 | 143 | for k, v := range pageResponse.Headers() { 144 | resp.Headers.Add(k, v) 145 | } 146 | 147 | raw, err := j.extractJSON(page) 148 | if err != nil { 149 | resp.Error = err 150 | 151 | return resp 152 | } 153 | 154 | if resp.Meta == nil { 155 | resp.Meta = make(map[string]any) 156 | } 157 | 158 | resp.Meta["json"] = raw 159 | 160 | if j.ExtractExtraReviews { 161 | reviewCount := j.getReviewCount(raw) 162 | if reviewCount > 8 { // we have more reviews 163 | params := fetchReviewsParams{ 164 | page: page, 165 | mapURL: page.URL(), 166 | reviewCount: reviewCount, 167 | } 168 | 169 | reviewFetcher := newReviewFetcher(params) 170 | 171 | reviewData, err := reviewFetcher.fetch(ctx) 172 | if err != nil { 173 | return resp 174 | } 175 | 176 | resp.Meta["reviews_raw"] = reviewData 177 | } 178 | } 179 | 180 | return resp 181 | } 182 | 183 | func (j *PlaceJob) extractJSON(page playwright.Page) ([]byte, error) { 184 | rawI, err := page.Evaluate(js) 185 | if err != nil { 186 | return nil, err 187 | } 188 | 189 | raw, ok := rawI.(string) 190 | if !ok { 191 | return nil, fmt.Errorf("could not convert to string") 192 | } 193 | 194 | const prefix = `)]}'` 195 | 196 | raw = strings.TrimSpace(strings.TrimPrefix(raw, prefix)) 197 | 198 | return []byte(raw), nil 199 | } 200 | 201 | func (j *PlaceJob) getReviewCount(data []byte) int { 202 | tmpEntry, err := EntryFromJSON(data, true) 203 | if err != nil { 204 | return 0 205 | } 206 | 207 | return tmpEntry.ReviewCount 208 | } 209 | 210 | func (j *PlaceJob) UseInResults() bool { 211 | return j.UsageInResultststs 212 | } 213 | 214 | func ctxWait(ctx context.Context, dur time.Duration) { 215 | select { 216 | case <-ctx.Done(): 217 | case <-time.After(dur): 218 | } 219 | } 220 | 221 | const js = ` 222 | function parse() { 223 | const inputString = window.APP_INITIALIZATION_STATE[3][6] 224 | return inputString 225 | } 226 | ` 227 | -------------------------------------------------------------------------------- /gmaps/reviews.go: -------------------------------------------------------------------------------- 1 | package gmaps 2 | 3 | import ( 4 | "context" 5 | "crypto/rand" 6 | "encoding/base64" 7 | "encoding/json" 8 | "errors" 9 | "fmt" 10 | "net/url" 11 | "regexp" 12 | "strings" 13 | 14 | "github.com/gosom/scrapemate" 15 | "github.com/gosom/scrapemate/adapters/fetchers/stealth" 16 | "github.com/playwright-community/playwright-go" 17 | ) 18 | 19 | type fetchReviewsParams struct { 20 | page playwright.Page 21 | mapURL string 22 | reviewCount int 23 | } 24 | 25 | type fetchReviewsResponse struct { 26 | pages [][]byte 27 | } 28 | 29 | type fetcher struct { 30 | httpClient scrapemate.HTTPFetcher 31 | params fetchReviewsParams 32 | } 33 | 34 | func newReviewFetcher(params fetchReviewsParams) *fetcher { 35 | netClient := stealth.New("firefox", nil) 36 | ans := fetcher{ 37 | params: params, 38 | httpClient: netClient, 39 | } 40 | 41 | return &ans 42 | } 43 | 44 | func (f *fetcher) fetch(ctx context.Context) (fetchReviewsResponse, error) { 45 | requestIDForSession, err := generateRandomID(21) 46 | if err != nil { 47 | return fetchReviewsResponse{}, fmt.Errorf("failed to generate session request ID: %v", err) 48 | } 49 | 50 | reviewURL, err := f.generateURL(f.params.mapURL, "", 20, requestIDForSession) 51 | if err != nil { 52 | return fetchReviewsResponse{}, fmt.Errorf("failed to generate initial URL: %v", err) 53 | } 54 | 55 | currentPageBody, err := f.fetchReviewPage(ctx, reviewURL) 56 | if err != nil { 57 | return fetchReviewsResponse{}, fmt.Errorf("failed to fetch initial review page: %v", err) 58 | } 59 | 60 | ans := fetchReviewsResponse{} 61 | ans.pages = append(ans.pages, currentPageBody) 62 | 63 | nextPageToken := extractNextPageToken(currentPageBody) 64 | 65 | for nextPageToken != "" { 66 | reviewURL, err = f.generateURL(f.params.mapURL, nextPageToken, 20, requestIDForSession) 67 | if err != nil { 68 | fmt.Printf("Error generating URL for token %s: %v\n", nextPageToken, err) 69 | break 70 | } 71 | 72 | currentPageBody, err = f.fetchReviewPage(ctx, reviewURL) 73 | if err != nil { 74 | fmt.Printf("Error fetching review page with token %s: %v (%s)\n", nextPageToken, err, reviewURL) 75 | break 76 | } 77 | 78 | ans.pages = append(ans.pages, currentPageBody) 79 | nextPageToken = extractNextPageToken(currentPageBody) 80 | } 81 | 82 | return ans, nil 83 | } 84 | 85 | // Note the added 'requestID' parameter 86 | func (f *fetcher) generateURL(mapURL, pageToken string, pageSize int, requestID string) (string, error) { 87 | placeIDRegex := regexp.MustCompile(`!1s([^!]+)`) 88 | 89 | placeIDMatch := placeIDRegex.FindStringSubmatch(mapURL) 90 | if len(placeIDMatch) < 2 { 91 | return "", fmt.Errorf("could not extract place ID from URL: %s", mapURL) 92 | } 93 | 94 | rawPlaceID, err := url.QueryUnescape(placeIDMatch[1]) 95 | if err != nil { 96 | rawPlaceID = placeIDMatch[1] 97 | } 98 | 99 | encodedPlaceID := url.QueryEscape(rawPlaceID) 100 | 101 | encodedPageToken := url.QueryEscape(pageToken) 102 | 103 | pbComponents := []string{ 104 | fmt.Sprintf("!1m6!1s%s", encodedPlaceID), 105 | "!6m4!4m1!1e1!4m1!1e3", 106 | fmt.Sprintf("!2m2!1i%d!2s%s", pageSize, encodedPageToken), 107 | fmt.Sprintf("!5m2!1s%s!7e81", requestID), 108 | "!8m9!2b1!3b1!5b1!7b1", 109 | "!12m4!1b1!2b1!4m1!1e1!11m0!13m1!1e1", 110 | } 111 | 112 | fullURL := fmt.Sprintf( 113 | "https://www.google.com/maps/rpc/listugcposts?authuser=0&hl=el&pb=%s", 114 | strings.Join(pbComponents, ""), 115 | ) 116 | 117 | return fullURL, nil 118 | } 119 | 120 | func (f *fetcher) fetchReviewPage(ctx context.Context, u string) ([]byte, error) { 121 | job := scrapemate.Job{ 122 | Method: "GET", 123 | URL: u, 124 | } 125 | 126 | resp := f.httpClient.Fetch(ctx, &job) 127 | if resp.Error != nil { 128 | return nil, fmt.Errorf("fetch error for %s: %w", u, resp.Error) 129 | } 130 | 131 | if resp.StatusCode != 200 { 132 | return nil, fmt.Errorf("%s: unexpected status code: %d", u, resp.StatusCode) 133 | } 134 | 135 | return resp.Body, nil 136 | } 137 | 138 | func extractNextPageToken(data []byte) string { 139 | text := string(data) 140 | prefix := ")]}'\n" 141 | text = strings.TrimPrefix(text, prefix) 142 | 143 | var result []interface{} 144 | 145 | err := json.Unmarshal([]byte(text), &result) 146 | if err != nil { 147 | return "" 148 | } 149 | 150 | if len(result) < 2 || result[1] == nil { 151 | return "" 152 | } 153 | 154 | token, ok := result[1].(string) 155 | if !ok { 156 | return "" 157 | } 158 | 159 | return token 160 | } 161 | 162 | func generateRandomID(length int) (string, error) { 163 | numBytes := (length*6 + 7) / 8 164 | if numBytes < 16 { 165 | numBytes = 16 166 | } 167 | 168 | b := make([]byte, numBytes) 169 | 170 | _, err := rand.Read(b) 171 | if err != nil { 172 | return "", err 173 | } 174 | 175 | encoded := base64.URLEncoding.WithPadding(base64.NoPadding).EncodeToString(b) 176 | if len(encoded) >= length { 177 | return encoded[:length], nil 178 | } 179 | 180 | return "", errors.New("generated ID is shorter than expected") 181 | } 182 | -------------------------------------------------------------------------------- /gmaps/searchjob.go: -------------------------------------------------------------------------------- 1 | package gmaps 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "fmt" 7 | "net/http" 8 | 9 | "github.com/google/uuid" 10 | "github.com/gosom/google-maps-scraper/exiter" 11 | "github.com/gosom/scrapemate" 12 | ) 13 | 14 | type SearchJobOptions func(*SearchJob) 15 | 16 | type MapLocation struct { 17 | Lat float64 18 | Lon float64 19 | ZoomLvl float64 20 | Radius float64 21 | } 22 | 23 | type MapSearchParams struct { 24 | Location MapLocation 25 | Query string 26 | ViewportW int 27 | ViewportH int 28 | Hl string 29 | } 30 | 31 | type SearchJob struct { 32 | scrapemate.Job 33 | 34 | params *MapSearchParams 35 | ExitMonitor exiter.Exiter 36 | } 37 | 38 | func NewSearchJob(params *MapSearchParams, opts ...SearchJobOptions) *SearchJob { 39 | const ( 40 | defaultPrio = scrapemate.PriorityMedium 41 | defaultMaxRetries = 3 42 | baseURL = "https://maps.google.com/search" 43 | ) 44 | 45 | job := SearchJob{ 46 | Job: scrapemate.Job{ 47 | ID: uuid.New().String(), 48 | Method: http.MethodGet, 49 | URL: baseURL, 50 | URLParams: buildGoogleMapsParams(params), 51 | MaxRetries: defaultMaxRetries, 52 | Priority: defaultPrio, 53 | }, 54 | } 55 | 56 | job.params = params 57 | 58 | for _, opt := range opts { 59 | opt(&job) 60 | } 61 | 62 | return &job 63 | } 64 | 65 | func WithSearchJobExitMonitor(exitMonitor exiter.Exiter) SearchJobOptions { 66 | return func(j *SearchJob) { 67 | j.ExitMonitor = exitMonitor 68 | } 69 | } 70 | 71 | func (j *SearchJob) Process(_ context.Context, resp *scrapemate.Response) (any, []scrapemate.IJob, error) { 72 | defer func() { 73 | resp.Document = nil 74 | resp.Body = nil 75 | resp.Meta = nil 76 | }() 77 | 78 | body := removeFirstLine(resp.Body) 79 | if len(body) == 0 { 80 | return nil, nil, fmt.Errorf("empty response body") 81 | } 82 | 83 | entries, err := ParseSearchResults(body) 84 | if err != nil { 85 | return nil, nil, fmt.Errorf("failed to parse search results: %w", err) 86 | } 87 | 88 | entries = filterAndSortEntriesWithinRadius(entries, 89 | j.params.Location.Lat, 90 | j.params.Location.Lon, 91 | j.params.Location.Radius, 92 | ) 93 | 94 | if j.ExitMonitor != nil { 95 | j.ExitMonitor.IncrSeedCompleted(1) 96 | j.ExitMonitor.IncrPlacesFound(len(entries)) 97 | j.ExitMonitor.IncrPlacesCompleted(len(entries)) 98 | } 99 | 100 | return entries, nil, nil 101 | } 102 | 103 | func removeFirstLine(data []byte) []byte { 104 | if len(data) == 0 { 105 | return data 106 | } 107 | 108 | index := bytes.IndexByte(data, '\n') 109 | if index == -1 { 110 | return []byte{} 111 | } 112 | 113 | return data[index+1:] 114 | } 115 | 116 | func buildGoogleMapsParams(params *MapSearchParams) map[string]string { 117 | params.ViewportH = 800 118 | params.ViewportW = 600 119 | 120 | ans := map[string]string{ 121 | "tbm": "map", 122 | "authuser": "0", 123 | "hl": params.Hl, 124 | "q": params.Query, 125 | } 126 | 127 | pb := fmt.Sprintf("!4m12!1m3!1d3826.902183192154!2d%.4f!3d%.4f!2m3!1f0!2f0!3f0!3m2!1i%d!2i%d!4f%.1f!7i20!8i0"+ 128 | "!10b1!12m22!1m3!18b1!30b1!34e1!2m3!5m1!6e2!20e3!4b0!10b1!12b1!13b1!16b1!17m1!3e1!20m3!5e2!6b1!14b1!46m1!1b0"+ 129 | "!96b1!19m4!2m3!1i360!2i120!4i8", 130 | params.Location.Lon, 131 | params.Location.Lat, 132 | params.ViewportW, 133 | params.ViewportH, 134 | params.Location.ZoomLvl, 135 | ) 136 | 137 | ans["pb"] = pb 138 | 139 | return ans 140 | } 141 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/gosom/google-maps-scraper 2 | 3 | go 1.24.3 4 | 5 | require ( 6 | github.com/PuerkitoBio/goquery v1.10.3 7 | github.com/aws/aws-lambda-go v1.48.0 8 | github.com/aws/aws-sdk-go-v2 v1.36.3 9 | github.com/aws/aws-sdk-go-v2/config v1.29.14 10 | github.com/aws/aws-sdk-go-v2/credentials v1.17.67 11 | github.com/aws/aws-sdk-go-v2/service/lambda v1.71.2 12 | github.com/aws/aws-sdk-go-v2/service/s3 v1.79.3 13 | github.com/golangci/golangci-lint v1.64.8 14 | github.com/google/open-location-code/go v0.0.0-20250415120251-fa6d7f9d4765 15 | github.com/google/uuid v1.6.0 16 | github.com/gosom/scrapemate v0.9.4 17 | github.com/jackc/pgx/v5 v5.7.4 18 | github.com/mattn/go-runewidth v0.0.16 19 | github.com/mcnijman/go-emailaddress v1.1.1 20 | github.com/playwright-community/playwright-go v0.5200.0 21 | github.com/posthog/posthog-go v1.5.2 22 | github.com/shirou/gopsutil/v4 v4.25.4 23 | github.com/stretchr/testify v1.10.0 24 | golang.org/x/sync v0.14.0 25 | golang.org/x/term v0.32.0 26 | modernc.org/sqlite v1.37.0 27 | ) 28 | 29 | require ( 30 | 4d63.com/gocheckcompilerdirectives v1.3.0 // indirect 31 | 4d63.com/gochecknoglobals v0.2.2 // indirect 32 | github.com/4meepo/tagalign v1.4.2 // indirect 33 | github.com/Abirdcfly/dupword v0.1.3 // indirect 34 | github.com/Antonboom/errname v1.0.0 // indirect 35 | github.com/Antonboom/nilnil v1.0.1 // indirect 36 | github.com/Antonboom/testifylint v1.5.2 // indirect 37 | github.com/BurntSushi/toml v1.4.1-0.20240526193622-a339e1f7089c // indirect 38 | github.com/Crocmagnon/fatcontext v0.7.1 // indirect 39 | github.com/Djarvur/go-err113 v0.0.0-20210108212216-aea10b59be24 // indirect 40 | github.com/GaijinEntertainment/go-exhaustruct/v3 v3.3.1 // indirect 41 | github.com/Masterminds/semver/v3 v3.3.0 // indirect 42 | github.com/Noooste/azuretls-client v1.7.3 // indirect 43 | github.com/Noooste/fhttp v1.0.13 // indirect 44 | github.com/Noooste/utls v1.3.8 // indirect 45 | github.com/Noooste/websocket v1.0.3 // indirect 46 | github.com/OpenPeeDeeP/depguard/v2 v2.2.1 // indirect 47 | github.com/alecthomas/go-check-sumtype v0.3.1 // indirect 48 | github.com/alexkohler/nakedret/v2 v2.0.5 // indirect 49 | github.com/alexkohler/prealloc v1.0.0 // indirect 50 | github.com/alingse/asasalint v0.0.11 // indirect 51 | github.com/alingse/nilnesserr v0.1.2 // indirect 52 | github.com/andybalholm/brotli v1.1.1 // indirect 53 | github.com/andybalholm/cascadia v1.3.3 // indirect 54 | github.com/ashanbrown/forbidigo v1.6.0 // indirect 55 | github.com/ashanbrown/makezero v1.2.0 // indirect 56 | github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10 // indirect 57 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.30 // indirect 58 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.34 // indirect 59 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.34 // indirect 60 | github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 // indirect 61 | github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.34 // indirect 62 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3 // indirect 63 | github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.7.1 // indirect 64 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.15 // indirect 65 | github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.15 // indirect 66 | github.com/aws/aws-sdk-go-v2/service/sso v1.25.3 // indirect 67 | github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.1 // indirect 68 | github.com/aws/aws-sdk-go-v2/service/sts v1.33.19 // indirect 69 | github.com/aws/smithy-go v1.22.3 // indirect 70 | github.com/beorn7/perks v1.0.1 // indirect 71 | github.com/bkielbasa/cyclop v1.2.3 // indirect 72 | github.com/blizzy78/varnamelen v0.8.0 // indirect 73 | github.com/bombsimon/wsl/v4 v4.5.0 // indirect 74 | github.com/breml/bidichk v0.3.2 // indirect 75 | github.com/breml/errchkjson v0.4.0 // indirect 76 | github.com/butuzov/ireturn v0.3.1 // indirect 77 | github.com/butuzov/mirror v1.3.0 // indirect 78 | github.com/catenacyber/perfsprint v0.8.2 // indirect 79 | github.com/ccojocar/zxcvbn-go v1.0.2 // indirect 80 | github.com/cespare/xxhash/v2 v2.3.0 // indirect 81 | github.com/charithe/durationcheck v0.0.10 // indirect 82 | github.com/chavacava/garif v0.1.0 // indirect 83 | github.com/ckaznocha/intrange v0.3.0 // indirect 84 | github.com/cloudflare/circl v1.6.1 // indirect 85 | github.com/curioswitch/go-reassign v0.3.0 // indirect 86 | github.com/daixiang0/gci v0.13.5 // indirect 87 | github.com/davecgh/go-spew v1.1.1 // indirect 88 | github.com/deckarep/golang-set/v2 v2.8.0 // indirect 89 | github.com/denis-tingaikin/go-header v0.5.0 // indirect 90 | github.com/dustin/go-humanize v1.0.1 // indirect 91 | github.com/ebitengine/purego v0.8.3-0.20250507171810-1638563e3615 // indirect 92 | github.com/ettle/strcase v0.2.0 // indirect 93 | github.com/fatih/color v1.18.0 // indirect 94 | github.com/fatih/structtag v1.2.0 // indirect 95 | github.com/firefart/nonamedreturns v1.0.5 // indirect 96 | github.com/fsnotify/fsnotify v1.5.4 // indirect 97 | github.com/fzipp/gocyclo v0.6.0 // indirect 98 | github.com/gabriel-vasile/mimetype v1.4.9 // indirect 99 | github.com/ghostiam/protogetter v0.3.9 // indirect 100 | github.com/go-critic/go-critic v0.12.0 // indirect 101 | github.com/go-jose/go-jose/v3 v3.0.4 // indirect 102 | github.com/go-ole/go-ole v1.3.0 // indirect 103 | github.com/go-playground/locales v0.14.1 // indirect 104 | github.com/go-playground/universal-translator v0.18.1 // indirect 105 | github.com/go-playground/validator/v10 v10.26.0 // indirect 106 | github.com/go-stack/stack v1.8.1 // indirect 107 | github.com/go-toolsmith/astcast v1.1.0 // indirect 108 | github.com/go-toolsmith/astcopy v1.1.0 // indirect 109 | github.com/go-toolsmith/astequal v1.2.0 // indirect 110 | github.com/go-toolsmith/astfmt v1.1.0 // indirect 111 | github.com/go-toolsmith/astp v1.1.0 // indirect 112 | github.com/go-toolsmith/strparse v1.1.0 // indirect 113 | github.com/go-toolsmith/typep v1.1.0 // indirect 114 | github.com/go-viper/mapstructure/v2 v2.2.1 // indirect 115 | github.com/go-xmlfmt/xmlfmt v1.1.3 // indirect 116 | github.com/gobwas/glob v0.2.3 // indirect 117 | github.com/gofrs/flock v0.12.1 // indirect 118 | github.com/golang/protobuf v1.5.3 // indirect 119 | github.com/golang/snappy v1.0.0 // indirect 120 | github.com/golangci/dupl v0.0.0-20250308024227-f665c8d69b32 // indirect 121 | github.com/golangci/go-printf-func-name v0.1.0 // indirect 122 | github.com/golangci/gofmt v0.0.0-20250106114630-d62b90e6713d // indirect 123 | github.com/golangci/misspell v0.6.0 // indirect 124 | github.com/golangci/plugin-module-register v0.1.1 // indirect 125 | github.com/golangci/revgrep v0.8.0 // indirect 126 | github.com/golangci/unconvert v0.0.0-20240309020433-c5143eacb3ed // indirect 127 | github.com/google/go-cmp v0.7.0 // indirect 128 | github.com/gordonklaus/ineffassign v0.1.0 // indirect 129 | github.com/gosom/kit v0.0.0-20230309082109-543b32ac686a // indirect 130 | github.com/gostaticanalysis/analysisutil v0.7.1 // indirect 131 | github.com/gostaticanalysis/comment v1.5.0 // indirect 132 | github.com/gostaticanalysis/forcetypeassert v0.2.0 // indirect 133 | github.com/gostaticanalysis/nilerr v0.1.1 // indirect 134 | github.com/hashicorp/go-immutable-radix/v2 v2.1.0 // indirect 135 | github.com/hashicorp/go-version v1.7.0 // indirect 136 | github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect 137 | github.com/hashicorp/hcl v1.0.0 // indirect 138 | github.com/hexops/gotextdiff v1.0.3 // indirect 139 | github.com/inconshreveable/mousetrap v1.1.0 // indirect 140 | github.com/jackc/pgpassfile v1.0.0 // indirect 141 | github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect 142 | github.com/jackc/puddle/v2 v2.2.2 // indirect 143 | github.com/jgautheron/goconst v1.7.1 // indirect 144 | github.com/jingyugao/rowserrcheck v1.1.1 // indirect 145 | github.com/jjti/go-spancheck v0.6.4 // indirect 146 | github.com/julz/importas v0.2.0 // indirect 147 | github.com/karamaru-alpha/copyloopvar v1.2.1 // indirect 148 | github.com/kisielk/errcheck v1.9.0 // indirect 149 | github.com/kkHAIKE/contextcheck v1.1.6 // indirect 150 | github.com/klauspost/compress v1.18.0 // indirect 151 | github.com/kulti/thelper v0.6.3 // indirect 152 | github.com/kunwardeep/paralleltest v1.0.10 // indirect 153 | github.com/lasiar/canonicalheader v1.1.2 // indirect 154 | github.com/ldez/exptostd v0.4.2 // indirect 155 | github.com/ldez/gomoddirectives v0.6.1 // indirect 156 | github.com/ldez/grignotin v0.9.0 // indirect 157 | github.com/ldez/tagliatelle v0.7.1 // indirect 158 | github.com/ldez/usetesting v0.4.2 // indirect 159 | github.com/leodido/go-urn v1.4.0 // indirect 160 | github.com/leonklingele/grouper v1.1.2 // indirect 161 | github.com/lufia/plan9stats v0.0.0-20250317134145-8bc96cf8fc35 // indirect 162 | github.com/macabu/inamedparam v0.1.3 // indirect 163 | github.com/magiconair/properties v1.8.6 // indirect 164 | github.com/maratori/testableexamples v1.0.0 // indirect 165 | github.com/maratori/testpackage v1.1.1 // indirect 166 | github.com/matoous/godox v1.1.0 // indirect 167 | github.com/mattn/go-colorable v0.1.14 // indirect 168 | github.com/mattn/go-isatty v0.0.20 // indirect 169 | github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect 170 | github.com/mgechev/revive v1.7.0 // indirect 171 | github.com/mitchellh/go-homedir v1.1.0 // indirect 172 | github.com/mitchellh/mapstructure v1.5.0 // indirect 173 | github.com/moricho/tparallel v0.3.2 // indirect 174 | github.com/nakabonne/nestif v0.3.1 // indirect 175 | github.com/ncruces/go-strftime v0.1.9 // indirect 176 | github.com/nishanths/exhaustive v0.12.0 // indirect 177 | github.com/nishanths/predeclared v0.2.2 // indirect 178 | github.com/nunnatsa/ginkgolinter v0.19.1 // indirect 179 | github.com/olekukonko/tablewriter v0.0.5 // indirect 180 | github.com/pelletier/go-toml v1.9.5 // indirect 181 | github.com/pelletier/go-toml/v2 v2.2.3 // indirect 182 | github.com/pmezard/go-difflib v1.0.0 // indirect 183 | github.com/polyfloyd/go-errorlint v1.7.1 // indirect 184 | github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect 185 | github.com/prometheus/client_golang v1.12.1 // indirect 186 | github.com/prometheus/client_model v0.2.0 // indirect 187 | github.com/prometheus/common v0.32.1 // indirect 188 | github.com/prometheus/procfs v0.7.3 // indirect 189 | github.com/quasilyte/go-ruleguard v0.4.3-0.20240823090925-0fe6f58b47b1 // indirect 190 | github.com/quasilyte/go-ruleguard/dsl v0.3.22 // indirect 191 | github.com/quasilyte/gogrep v0.5.0 // indirect 192 | github.com/quasilyte/regex/syntax v0.0.0-20210819130434-b3f0c404a727 // indirect 193 | github.com/quasilyte/stdinfo v0.0.0-20220114132959-f7386bf02567 // indirect 194 | github.com/raeperd/recvcheck v0.2.0 // indirect 195 | github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect 196 | github.com/rivo/uniseg v0.4.7 // indirect 197 | github.com/rogpeppe/go-internal v1.14.1 // indirect 198 | github.com/rs/zerolog v1.34.0 // indirect 199 | github.com/ryancurrah/gomodguard v1.3.5 // indirect 200 | github.com/ryanrolds/sqlclosecheck v0.5.1 // indirect 201 | github.com/sanposhiho/wastedassign/v2 v2.1.0 // indirect 202 | github.com/santhosh-tekuri/jsonschema/v6 v6.0.1 // indirect 203 | github.com/sashamelentyev/interfacebloat v1.1.0 // indirect 204 | github.com/sashamelentyev/usestdlibvars v1.28.0 // indirect 205 | github.com/securego/gosec/v2 v2.22.2 // indirect 206 | github.com/sirupsen/logrus v1.9.3 // indirect 207 | github.com/sivchari/containedctx v1.0.3 // indirect 208 | github.com/sivchari/tenv v1.12.1 // indirect 209 | github.com/sonatard/noctx v0.1.0 // indirect 210 | github.com/sourcegraph/go-diff v0.7.0 // indirect 211 | github.com/spf13/afero v1.12.0 // indirect 212 | github.com/spf13/cast v1.5.0 // indirect 213 | github.com/spf13/cobra v1.9.1 // indirect 214 | github.com/spf13/jwalterweatherman v1.1.0 // indirect 215 | github.com/spf13/pflag v1.0.6 // indirect 216 | github.com/spf13/viper v1.12.0 // indirect 217 | github.com/ssgreg/nlreturn/v2 v2.2.1 // indirect 218 | github.com/stbenjam/no-sprintf-host-port v0.2.0 // indirect 219 | github.com/stretchr/objx v0.5.2 // indirect 220 | github.com/subosito/gotenv v1.4.1 // indirect 221 | github.com/syndtr/goleveldb v1.0.0 // indirect 222 | github.com/tdakkota/asciicheck v0.4.1 // indirect 223 | github.com/tetafro/godot v1.5.0 // indirect 224 | github.com/timakin/bodyclose v0.0.0-20241017074812-ed6a65f985e3 // indirect 225 | github.com/timonwong/loggercheck v0.10.1 // indirect 226 | github.com/tklauser/go-sysconf v0.3.15 // indirect 227 | github.com/tklauser/numcpus v0.10.0 // indirect 228 | github.com/tomarrell/wrapcheck/v2 v2.10.0 // indirect 229 | github.com/tommy-muehle/go-mnd/v2 v2.5.1 // indirect 230 | github.com/ultraware/funlen v0.2.0 // indirect 231 | github.com/ultraware/whitespace v0.2.0 // indirect 232 | github.com/uudashr/gocognit v1.2.0 // indirect 233 | github.com/uudashr/iface v1.3.1 // indirect 234 | github.com/xen0n/gosmopolitan v1.2.2 // indirect 235 | github.com/yagipy/maintidx v1.0.0 // indirect 236 | github.com/yeya24/promlinter v0.3.0 // indirect 237 | github.com/ykadowak/zerologlint v0.1.5 // indirect 238 | github.com/yusufpapurcu/wmi v1.2.4 // indirect 239 | gitlab.com/bosi/decorder v0.4.2 // indirect 240 | go-simpler.org/musttag v0.13.0 // indirect 241 | go-simpler.org/sloglint v0.9.0 // indirect 242 | go.uber.org/atomic v1.7.0 // indirect 243 | go.uber.org/automaxprocs v1.6.0 // indirect 244 | go.uber.org/multierr v1.11.0 // indirect 245 | go.uber.org/zap v1.24.0 // indirect 246 | golang.org/x/crypto v0.38.0 // indirect 247 | golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6 // indirect 248 | golang.org/x/exp/typeparams v0.0.0-20250210185358-939b2ce775ac // indirect 249 | golang.org/x/mod v0.24.0 // indirect 250 | golang.org/x/net v0.40.0 // indirect 251 | golang.org/x/sys v0.33.0 // indirect 252 | golang.org/x/telemetry v0.0.0-20240522233618-39ace7a40ae7 // indirect 253 | golang.org/x/text v0.25.0 // indirect 254 | golang.org/x/tools v0.33.0 // indirect 255 | golang.org/x/vuln v1.1.4 // indirect 256 | google.golang.org/protobuf v1.36.5 // indirect 257 | gopkg.in/ini.v1 v1.67.0 // indirect 258 | gopkg.in/yaml.v2 v2.4.0 // indirect 259 | gopkg.in/yaml.v3 v3.0.1 // indirect 260 | honnef.co/go/tools v0.6.1 // indirect 261 | modernc.org/libc v1.65.3 // indirect 262 | modernc.org/mathutil v1.7.1 // indirect 263 | modernc.org/memory v1.10.0 // indirect 264 | mvdan.cc/gofumpt v0.7.0 // indirect 265 | mvdan.cc/unparam v0.0.0-20240528143540-8a5130ca722f // indirect 266 | ) 267 | 268 | tool ( 269 | github.com/golangci/golangci-lint/cmd/golangci-lint 270 | golang.org/x/vuln/cmd/govulncheck 271 | ) 272 | -------------------------------------------------------------------------------- /go.work: -------------------------------------------------------------------------------- 1 | go 1.24.3 2 | 3 | use . 4 | -------------------------------------------------------------------------------- /img/SerpApi-banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gosom/google-maps-scraper/e31bffd9776052628f781751cbcd8bf661d3f9d4/img/SerpApi-banner.png -------------------------------------------------------------------------------- /img/SerpApi-logo-w.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gosom/google-maps-scraper/e31bffd9776052628f781751cbcd8bf661d3f9d4/img/SerpApi-logo-w.png -------------------------------------------------------------------------------- /img/capsolver-banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gosom/google-maps-scraper/e31bffd9776052628f781751cbcd8bf661d3f9d4/img/capsolver-banner.png -------------------------------------------------------------------------------- /img/example.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gosom/google-maps-scraper/e31bffd9776052628f781751cbcd8bf661d3f9d4/img/example.gif -------------------------------------------------------------------------------- /img/gmaps-extractor-banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gosom/google-maps-scraper/e31bffd9776052628f781751cbcd8bf661d3f9d4/img/gmaps-extractor-banner.png -------------------------------------------------------------------------------- /img/gmaps-extractor-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gosom/google-maps-scraper/e31bffd9776052628f781751cbcd8bf661d3f9d4/img/gmaps-extractor-logo.png -------------------------------------------------------------------------------- /img/premium_scrap_io.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gosom/google-maps-scraper/e31bffd9776052628f781751cbcd8bf661d3f9d4/img/premium_scrap_io.png -------------------------------------------------------------------------------- /img/premium_scrap_io_demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gosom/google-maps-scraper/e31bffd9776052628f781751cbcd8bf661d3f9d4/img/premium_scrap_io_demo.gif -------------------------------------------------------------------------------- /img/scrapeless_dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gosom/google-maps-scraper/e31bffd9776052628f781751cbcd8bf661d3f9d4/img/scrapeless_dark.png -------------------------------------------------------------------------------- /img/scrapeless_light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gosom/google-maps-scraper/e31bffd9776052628f781751cbcd8bf661d3f9d4/img/scrapeless_light.png -------------------------------------------------------------------------------- /lint.go: -------------------------------------------------------------------------------- 1 | //go:build lint 2 | // +build lint 3 | 4 | package main 5 | 6 | //go:generate go run "github.com/golangci/golangci-lint/cmd/golangci-lint" -v run 7 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "log" 8 | "os" 9 | "os/signal" 10 | "syscall" 11 | 12 | "github.com/gosom/google-maps-scraper/runner" 13 | "github.com/gosom/google-maps-scraper/runner/databaserunner" 14 | "github.com/gosom/google-maps-scraper/runner/filerunner" 15 | "github.com/gosom/google-maps-scraper/runner/installplaywright" 16 | "github.com/gosom/google-maps-scraper/runner/lambdaaws" 17 | "github.com/gosom/google-maps-scraper/runner/webrunner" 18 | ) 19 | 20 | func main() { 21 | ctx, cancel := context.WithCancel(context.Background()) 22 | 23 | runner.Banner() 24 | 25 | sigChan := make(chan os.Signal, 1) 26 | signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM) 27 | 28 | go func() { 29 | <-sigChan 30 | 31 | log.Println("Received signal, shutting down...") 32 | 33 | cancel() 34 | }() 35 | 36 | cfg := runner.ParseConfig() 37 | 38 | runnerInstance, err := runnerFactory(cfg) 39 | if err != nil { 40 | cancel() 41 | os.Stderr.WriteString(err.Error() + "\n") 42 | 43 | runner.Telemetry().Close() 44 | 45 | os.Exit(1) 46 | } 47 | 48 | if err := runnerInstance.Run(ctx); err != nil && !errors.Is(err, context.Canceled) { 49 | os.Stderr.WriteString(err.Error() + "\n") 50 | 51 | _ = runnerInstance.Close(ctx) 52 | runner.Telemetry().Close() 53 | 54 | cancel() 55 | 56 | os.Exit(1) 57 | } 58 | 59 | _ = runnerInstance.Close(ctx) 60 | runner.Telemetry().Close() 61 | 62 | cancel() 63 | 64 | os.Exit(0) 65 | } 66 | 67 | func runnerFactory(cfg *runner.Config) (runner.Runner, error) { 68 | switch cfg.RunMode { 69 | case runner.RunModeFile: 70 | return filerunner.New(cfg) 71 | case runner.RunModeDatabase, runner.RunModeDatabaseProduce: 72 | return databaserunner.New(cfg) 73 | case runner.RunModeInstallPlaywright: 74 | return installplaywright.New(cfg) 75 | case runner.RunModeWeb: 76 | return webrunner.New(cfg) 77 | case runner.RunModeAwsLambda: 78 | return lambdaaws.New(cfg) 79 | case runner.RunModeAwsLambdaInvoker: 80 | return lambdaaws.NewInvoker(cfg) 81 | default: 82 | return nil, fmt.Errorf("%w: %d", runner.ErrInvalidRunMode, cfg.RunMode) 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /postgres/provider.go: -------------------------------------------------------------------------------- 1 | package postgres 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "database/sql" 7 | "encoding/gob" 8 | "fmt" 9 | "sync" 10 | "time" 11 | 12 | "github.com/gosom/scrapemate" 13 | 14 | "github.com/gosom/google-maps-scraper/gmaps" 15 | ) 16 | 17 | const ( 18 | statusNew = "new" 19 | statusQueued = "queued" 20 | batchSize = 10 21 | ) 22 | 23 | var _ scrapemate.JobProvider = (*provider)(nil) 24 | 25 | type provider struct { 26 | db *sql.DB 27 | mu *sync.Mutex 28 | jobc chan scrapemate.IJob 29 | errc chan error 30 | started bool 31 | batchSize int 32 | } 33 | 34 | func NewProvider(db *sql.DB, opts ...ProviderOption) scrapemate.JobProvider { 35 | prov := provider{ 36 | db: db, 37 | mu: &sync.Mutex{}, 38 | errc: make(chan error, 1), 39 | batchSize: batchSize, 40 | } 41 | 42 | for _, opt := range opts { 43 | opt(&prov) 44 | } 45 | 46 | prov.jobc = make(chan scrapemate.IJob, 2*prov.batchSize) 47 | 48 | return &prov 49 | } 50 | 51 | // ProviderOption allows configuring the provider 52 | type ProviderOption func(*provider) 53 | 54 | // WithBatchSize sets custom batch size 55 | func WithBatchSize(size int) ProviderOption { 56 | return func(p *provider) { 57 | if size > 0 { 58 | p.batchSize = size 59 | } 60 | } 61 | } 62 | 63 | //nolint:gocritic // it contains about unnamed results 64 | func (p *provider) Jobs(ctx context.Context) (<-chan scrapemate.IJob, <-chan error) { 65 | outc := make(chan scrapemate.IJob) 66 | errc := make(chan error, 1) 67 | 68 | p.mu.Lock() 69 | if !p.started { 70 | go p.fetchJobs(ctx) 71 | 72 | p.started = true 73 | } 74 | p.mu.Unlock() 75 | 76 | go func() { 77 | for { 78 | select { 79 | case <-ctx.Done(): 80 | return 81 | case err := <-p.errc: 82 | errc <- err 83 | 84 | return 85 | case job, ok := <-p.jobc: 86 | if !ok { 87 | return 88 | } 89 | 90 | if job == nil || job.GetID() == "" { 91 | continue 92 | } 93 | 94 | select { 95 | case outc <- job: 96 | case <-ctx.Done(): 97 | return 98 | } 99 | } 100 | } 101 | }() 102 | 103 | return outc, errc 104 | } 105 | 106 | // Push pushes a job to the job provider 107 | func (p *provider) Push(ctx context.Context, job scrapemate.IJob) error { 108 | q := `INSERT INTO gmaps_jobs 109 | (id, priority, payload_type, payload, created_at, status) 110 | VALUES 111 | ($1, $2, $3, $4, $5, $6) ON CONFLICT DO NOTHING` 112 | 113 | var buf bytes.Buffer 114 | enc := gob.NewEncoder(&buf) 115 | 116 | var payloadType string 117 | 118 | switch j := job.(type) { 119 | case *gmaps.GmapJob: 120 | payloadType = "search" 121 | 122 | if err := enc.Encode(j); err != nil { 123 | return err 124 | } 125 | case *gmaps.PlaceJob: 126 | payloadType = "place" 127 | 128 | if err := enc.Encode(j); err != nil { 129 | return err 130 | } 131 | case *gmaps.EmailExtractJob: 132 | payloadType = "email" 133 | 134 | if err := enc.Encode(j); err != nil { 135 | return err 136 | } 137 | default: 138 | return fmt.Errorf("invalid job type %T", job) 139 | } 140 | 141 | _, err := p.db.ExecContext(ctx, q, 142 | job.GetID(), job.GetPriority(), payloadType, buf.Bytes(), time.Now().UTC(), statusNew, 143 | ) 144 | 145 | return err 146 | } 147 | 148 | func (p *provider) fetchJobs(ctx context.Context) { 149 | defer close(p.jobc) 150 | defer close(p.errc) 151 | 152 | q := ` 153 | WITH updated AS ( 154 | UPDATE gmaps_jobs 155 | SET status = $1 156 | WHERE id IN ( 157 | SELECT id from gmaps_jobs 158 | WHERE status = $2 159 | ORDER BY priority ASC, created_at ASC FOR UPDATE SKIP LOCKED 160 | LIMIT $3 161 | ) 162 | RETURNING * 163 | ) 164 | SELECT payload_type, payload from updated ORDER by priority ASC, created_at ASC 165 | ` 166 | 167 | baseDelay := time.Millisecond * 50 168 | maxDelay := time.Millisecond * 300 169 | factor := 2 170 | currentDelay := baseDelay 171 | 172 | jobs := make([]scrapemate.IJob, 0, p.batchSize) 173 | 174 | for { 175 | select { 176 | case <-ctx.Done(): 177 | return 178 | default: 179 | } 180 | 181 | rows, err := p.db.QueryContext(ctx, q, statusQueued, statusNew, p.batchSize) 182 | if err != nil { 183 | p.errc <- err 184 | 185 | return 186 | } 187 | 188 | for rows.Next() { 189 | var ( 190 | payloadType string 191 | payload []byte 192 | ) 193 | 194 | if err := rows.Scan(&payloadType, &payload); err != nil { 195 | p.errc <- err 196 | 197 | return 198 | } 199 | 200 | job, err := decodeJob(payloadType, payload) 201 | if err != nil { 202 | p.errc <- err 203 | 204 | return 205 | } 206 | 207 | jobs = append(jobs, job) 208 | } 209 | 210 | if err := rows.Err(); err != nil { 211 | p.errc <- err 212 | 213 | return 214 | } 215 | 216 | if err := rows.Close(); err != nil { 217 | p.errc <- err 218 | 219 | return 220 | } 221 | 222 | if len(jobs) > 0 { 223 | for _, job := range jobs { 224 | select { 225 | case p.jobc <- job: 226 | case <-ctx.Done(): 227 | return 228 | } 229 | } 230 | 231 | jobs = jobs[:0] 232 | } else if len(jobs) == 0 { 233 | select { 234 | case <-time.After(currentDelay): 235 | currentDelay = time.Duration(float64(currentDelay) * float64(factor)) 236 | if currentDelay > maxDelay { 237 | currentDelay = maxDelay 238 | } 239 | case <-ctx.Done(): 240 | return 241 | } 242 | } 243 | } 244 | } 245 | 246 | type encjob struct { 247 | Type string 248 | Data scrapemate.IJob 249 | } 250 | 251 | func decodeJob(payloadType string, payload []byte) (scrapemate.IJob, error) { 252 | buf := bytes.NewBuffer(payload) 253 | dec := gob.NewDecoder(buf) 254 | 255 | switch payloadType { 256 | case "search": 257 | j := new(gmaps.GmapJob) 258 | if err := dec.Decode(j); err != nil { 259 | return nil, fmt.Errorf("failed to decode search job: %w", err) 260 | } 261 | 262 | return j, nil 263 | case "place": 264 | j := new(gmaps.PlaceJob) 265 | if err := dec.Decode(j); err != nil { 266 | return nil, fmt.Errorf("failed to decode place job: %w", err) 267 | } 268 | 269 | return j, nil 270 | case "email": 271 | j := new(gmaps.EmailExtractJob) 272 | if err := dec.Decode(j); err != nil { 273 | return nil, fmt.Errorf("failed to decode email job: %w", err) 274 | } 275 | 276 | return j, nil 277 | default: 278 | return nil, fmt.Errorf("invalid payload type: %s", payloadType) 279 | } 280 | } 281 | -------------------------------------------------------------------------------- /postgres/resultwriter.go: -------------------------------------------------------------------------------- 1 | package postgres 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "encoding/json" 7 | "errors" 8 | "fmt" 9 | "strings" 10 | "time" 11 | 12 | "github.com/gosom/scrapemate" 13 | 14 | "github.com/gosom/google-maps-scraper/gmaps" 15 | ) 16 | 17 | func NewResultWriter(db *sql.DB) scrapemate.ResultWriter { 18 | return &resultWriter{db: db} 19 | } 20 | 21 | type resultWriter struct { 22 | db *sql.DB 23 | } 24 | 25 | func (r *resultWriter) Run(ctx context.Context, in <-chan scrapemate.Result) error { 26 | const maxBatchSize = 50 27 | 28 | buff := make([]*gmaps.Entry, 0, 50) 29 | lastSave := time.Now().UTC() 30 | 31 | for result := range in { 32 | entry, ok := result.Data.(*gmaps.Entry) 33 | 34 | if !ok { 35 | return errors.New("invalid data type") 36 | } 37 | 38 | buff = append(buff, entry) 39 | 40 | if len(buff) >= maxBatchSize || time.Now().UTC().Sub(lastSave) >= time.Minute { 41 | err := r.batchSave(ctx, buff) 42 | if err != nil { 43 | return err 44 | } 45 | 46 | buff = buff[:0] 47 | } 48 | } 49 | 50 | if len(buff) > 0 { 51 | err := r.batchSave(ctx, buff) 52 | if err != nil { 53 | return err 54 | } 55 | } 56 | 57 | return nil 58 | } 59 | 60 | func (r *resultWriter) batchSave(ctx context.Context, entries []*gmaps.Entry) error { 61 | if len(entries) == 0 { 62 | return nil 63 | } 64 | 65 | q := `INSERT INTO results 66 | (data) 67 | VALUES 68 | ` 69 | elements := make([]string, 0, len(entries)) 70 | args := make([]interface{}, 0, len(entries)) 71 | 72 | for i, entry := range entries { 73 | data, err := json.Marshal(entry) 74 | if err != nil { 75 | return err 76 | } 77 | 78 | elements = append(elements, fmt.Sprintf("($%d)", i+1)) 79 | args = append(args, data) 80 | } 81 | 82 | q += strings.Join(elements, ", ") 83 | q += " ON CONFLICT DO NOTHING" 84 | 85 | tx, err := r.db.BeginTx(ctx, nil) 86 | if err != nil { 87 | return err 88 | } 89 | 90 | defer func() { 91 | _ = tx.Rollback() 92 | }() 93 | 94 | _, err = tx.ExecContext(ctx, q, args...) 95 | if err != nil { 96 | return err 97 | } 98 | 99 | err = tx.Commit() 100 | 101 | return err 102 | } 103 | -------------------------------------------------------------------------------- /runner/databaserunner/databaserunner.go: -------------------------------------------------------------------------------- 1 | package databaserunner 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "fmt" 7 | "io" 8 | "os" 9 | 10 | // postgres driver 11 | _ "github.com/jackc/pgx/v5/stdlib" 12 | 13 | "github.com/gosom/google-maps-scraper/postgres" 14 | "github.com/gosom/google-maps-scraper/runner" 15 | "github.com/gosom/google-maps-scraper/tlmt" 16 | "github.com/gosom/scrapemate" 17 | "github.com/gosom/scrapemate/scrapemateapp" 18 | ) 19 | 20 | type dbrunner struct { 21 | cfg *runner.Config 22 | provider scrapemate.JobProvider 23 | produce bool 24 | app *scrapemateapp.ScrapemateApp 25 | conn *sql.DB 26 | } 27 | 28 | func New(cfg *runner.Config) (runner.Runner, error) { 29 | if cfg.RunMode != runner.RunModeDatabase && cfg.RunMode != runner.RunModeDatabaseProduce { 30 | return nil, fmt.Errorf("%w: %d", runner.ErrInvalidRunMode, cfg.RunMode) 31 | } 32 | 33 | conn, err := openPsqlConn(cfg.Dsn) 34 | if err != nil { 35 | return nil, err 36 | } 37 | 38 | ans := dbrunner{ 39 | cfg: cfg, 40 | provider: postgres.NewProvider(conn), 41 | produce: cfg.ProduceOnly, 42 | conn: conn, 43 | } 44 | 45 | if ans.produce { 46 | return &ans, nil 47 | } 48 | 49 | psqlWriter := postgres.NewResultWriter(conn) 50 | 51 | writers := []scrapemate.ResultWriter{ 52 | psqlWriter, 53 | } 54 | 55 | opts := []func(*scrapemateapp.Config) error{ 56 | // scrapemateapp.WithCache("leveldb", "cache"), 57 | scrapemateapp.WithConcurrency(cfg.Concurrency), 58 | scrapemateapp.WithProvider(ans.provider), 59 | scrapemateapp.WithExitOnInactivity(cfg.ExitOnInactivityDuration), 60 | } 61 | 62 | if len(cfg.Proxies) > 0 { 63 | opts = append(opts, 64 | scrapemateapp.WithProxies(cfg.Proxies), 65 | ) 66 | } 67 | 68 | if !cfg.FastMode { 69 | if cfg.Debug { 70 | opts = append(opts, scrapemateapp.WithJS( 71 | scrapemateapp.Headfull(), 72 | scrapemateapp.DisableImages(), 73 | )) 74 | } else { 75 | opts = append(opts, scrapemateapp.WithJS(scrapemateapp.DisableImages())) 76 | } 77 | } else { 78 | opts = append(opts, scrapemateapp.WithStealth("firefox")) 79 | } 80 | 81 | if !cfg.DisablePageReuse { 82 | opts = append(opts, 83 | scrapemateapp.WithPageReuseLimit(2), 84 | scrapemateapp.WithPageReuseLimit(200), 85 | ) 86 | } 87 | 88 | matecfg, err := scrapemateapp.NewConfig( 89 | writers, 90 | opts..., 91 | ) 92 | if err != nil { 93 | return nil, err 94 | } 95 | 96 | ans.app, err = scrapemateapp.NewScrapeMateApp(matecfg) 97 | if err != nil { 98 | return nil, err 99 | } 100 | 101 | return &ans, nil 102 | } 103 | 104 | func (d *dbrunner) Run(ctx context.Context) error { 105 | _ = runner.Telemetry().Send(ctx, tlmt.NewEvent("databaserunner.Run", nil)) 106 | 107 | if d.produce { 108 | return d.produceSeedJobs(ctx) 109 | } 110 | 111 | return d.app.Start(ctx) 112 | } 113 | 114 | func (d *dbrunner) Close(context.Context) error { 115 | if d.app != nil { 116 | return d.app.Close() 117 | } 118 | 119 | if d.conn != nil { 120 | return d.conn.Close() 121 | } 122 | 123 | return nil 124 | } 125 | 126 | func (d *dbrunner) produceSeedJobs(ctx context.Context) error { 127 | var input io.Reader 128 | 129 | switch d.cfg.InputFile { 130 | case "stdin": 131 | input = os.Stdin 132 | default: 133 | f, err := os.Open(d.cfg.InputFile) 134 | if err != nil { 135 | return err 136 | } 137 | 138 | defer f.Close() 139 | 140 | input = f 141 | } 142 | 143 | jobs, err := runner.CreateSeedJobs( 144 | d.cfg.FastMode, 145 | d.cfg.LangCode, 146 | input, 147 | d.cfg.MaxDepth, 148 | d.cfg.Email, 149 | d.cfg.GeoCoordinates, 150 | d.cfg.Zoom, 151 | d.cfg.Radius, 152 | nil, 153 | nil, 154 | d.cfg.ExtraReviews, 155 | ) 156 | if err != nil { 157 | return err 158 | } 159 | 160 | for i := range jobs { 161 | if err := d.provider.Push(ctx, jobs[i]); err != nil { 162 | return err 163 | } 164 | } 165 | 166 | _ = runner.Telemetry().Send(ctx, tlmt.NewEvent("databaserunner.produceSeedJobs", map[string]any{ 167 | "job_count": len(jobs), 168 | })) 169 | 170 | return nil 171 | } 172 | 173 | func openPsqlConn(dsn string) (conn *sql.DB, err error) { 174 | conn, err = sql.Open("pgx", dsn) 175 | if err != nil { 176 | return 177 | } 178 | 179 | err = conn.Ping() 180 | if err != nil { 181 | return 182 | } 183 | 184 | conn.SetMaxOpenConns(10) 185 | 186 | return 187 | } 188 | -------------------------------------------------------------------------------- /runner/filerunner/filerunner.go: -------------------------------------------------------------------------------- 1 | package filerunner 2 | 3 | import ( 4 | "context" 5 | "encoding/csv" 6 | "fmt" 7 | "io" 8 | "os" 9 | "strings" 10 | "time" 11 | 12 | "github.com/gosom/google-maps-scraper/deduper" 13 | "github.com/gosom/google-maps-scraper/exiter" 14 | "github.com/gosom/google-maps-scraper/runner" 15 | "github.com/gosom/google-maps-scraper/tlmt" 16 | "github.com/gosom/scrapemate" 17 | "github.com/gosom/scrapemate/adapters/writers/csvwriter" 18 | "github.com/gosom/scrapemate/adapters/writers/jsonwriter" 19 | "github.com/gosom/scrapemate/scrapemateapp" 20 | ) 21 | 22 | type fileRunner struct { 23 | cfg *runner.Config 24 | input io.Reader 25 | writers []scrapemate.ResultWriter 26 | app *scrapemateapp.ScrapemateApp 27 | outfile *os.File 28 | } 29 | 30 | func New(cfg *runner.Config) (runner.Runner, error) { 31 | if cfg.RunMode != runner.RunModeFile { 32 | return nil, fmt.Errorf("%w: %d", runner.ErrInvalidRunMode, cfg.RunMode) 33 | } 34 | 35 | ans := &fileRunner{ 36 | cfg: cfg, 37 | } 38 | 39 | if err := ans.setInput(); err != nil { 40 | return nil, err 41 | } 42 | 43 | if err := ans.setWriters(); err != nil { 44 | return nil, err 45 | } 46 | 47 | if err := ans.setApp(); err != nil { 48 | return nil, err 49 | } 50 | 51 | return ans, nil 52 | } 53 | 54 | func (r *fileRunner) Run(ctx context.Context) (err error) { 55 | var seedJobs []scrapemate.IJob 56 | 57 | t0 := time.Now().UTC() 58 | 59 | defer func() { 60 | elapsed := time.Now().UTC().Sub(t0) 61 | params := map[string]any{ 62 | "job_count": len(seedJobs), 63 | "duration": elapsed.String(), 64 | } 65 | 66 | if err != nil { 67 | params["error"] = err.Error() 68 | } 69 | 70 | evt := tlmt.NewEvent("file_runner", params) 71 | 72 | _ = runner.Telemetry().Send(ctx, evt) 73 | }() 74 | 75 | dedup := deduper.New() 76 | exitMonitor := exiter.New() 77 | 78 | seedJobs, err = runner.CreateSeedJobs( 79 | r.cfg.FastMode, 80 | r.cfg.LangCode, 81 | r.input, 82 | r.cfg.MaxDepth, 83 | r.cfg.Email, 84 | r.cfg.GeoCoordinates, 85 | r.cfg.Zoom, 86 | r.cfg.Radius, 87 | dedup, 88 | exitMonitor, 89 | r.cfg.ExtraReviews, 90 | ) 91 | if err != nil { 92 | return err 93 | } 94 | 95 | exitMonitor.SetSeedCount(len(seedJobs)) 96 | 97 | ctx, cancel := context.WithCancel(ctx) 98 | defer cancel() 99 | 100 | exitMonitor.SetCancelFunc(cancel) 101 | 102 | go exitMonitor.Run(ctx) 103 | 104 | err = r.app.Start(ctx, seedJobs...) 105 | 106 | return err 107 | } 108 | 109 | func (r *fileRunner) Close(context.Context) error { 110 | if r.app != nil { 111 | return r.app.Close() 112 | } 113 | 114 | if r.input != nil { 115 | if closer, ok := r.input.(io.Closer); ok { 116 | return closer.Close() 117 | } 118 | } 119 | 120 | if r.outfile != nil { 121 | return r.outfile.Close() 122 | } 123 | 124 | return nil 125 | } 126 | 127 | func (r *fileRunner) setInput() error { 128 | switch r.cfg.InputFile { 129 | case "stdin": 130 | r.input = os.Stdin 131 | default: 132 | f, err := os.Open(r.cfg.InputFile) 133 | if err != nil { 134 | return err 135 | } 136 | 137 | r.input = f 138 | } 139 | 140 | return nil 141 | } 142 | 143 | func (r *fileRunner) setWriters() error { 144 | if r.cfg.CustomWriter != "" { 145 | parts := strings.Split(r.cfg.CustomWriter, ":") 146 | if len(parts) != 2 { 147 | return fmt.Errorf("invalid custom writer format: %s", r.cfg.CustomWriter) 148 | } 149 | 150 | dir, pluginName := parts[0], parts[1] 151 | 152 | customWriter, err := runner.LoadCustomWriter(dir, pluginName) 153 | if err != nil { 154 | return err 155 | } 156 | 157 | r.writers = append(r.writers, customWriter) 158 | } else { 159 | var resultsWriter io.Writer 160 | 161 | switch r.cfg.ResultsFile { 162 | case "stdout": 163 | resultsWriter = os.Stdout 164 | default: 165 | f, err := os.Create(r.cfg.ResultsFile) 166 | if err != nil { 167 | return err 168 | } 169 | 170 | r.outfile = f 171 | 172 | resultsWriter = r.outfile 173 | } 174 | 175 | csvWriter := csvwriter.NewCsvWriter(csv.NewWriter(resultsWriter)) 176 | 177 | if r.cfg.JSON { 178 | r.writers = append(r.writers, jsonwriter.NewJSONWriter(resultsWriter)) 179 | } else { 180 | r.writers = append(r.writers, csvWriter) 181 | } 182 | } 183 | 184 | return nil 185 | } 186 | 187 | func (r *fileRunner) setApp() error { 188 | opts := []func(*scrapemateapp.Config) error{ 189 | // scrapemateapp.WithCache("leveldb", "cache"), 190 | scrapemateapp.WithConcurrency(r.cfg.Concurrency), 191 | scrapemateapp.WithExitOnInactivity(r.cfg.ExitOnInactivityDuration), 192 | } 193 | 194 | if len(r.cfg.Proxies) > 0 { 195 | opts = append(opts, 196 | scrapemateapp.WithProxies(r.cfg.Proxies), 197 | ) 198 | } 199 | 200 | if !r.cfg.FastMode { 201 | if r.cfg.Debug { 202 | opts = append(opts, scrapemateapp.WithJS( 203 | scrapemateapp.Headfull(), 204 | scrapemateapp.DisableImages(), 205 | ), 206 | ) 207 | } else { 208 | opts = append(opts, scrapemateapp.WithJS(scrapemateapp.DisableImages())) 209 | } 210 | } else { 211 | opts = append(opts, scrapemateapp.WithStealth("firefox")) 212 | } 213 | 214 | if !r.cfg.DisablePageReuse { 215 | opts = append(opts, 216 | scrapemateapp.WithPageReuseLimit(2), 217 | scrapemateapp.WithPageReuseLimit(200), 218 | ) 219 | } 220 | 221 | matecfg, err := scrapemateapp.NewConfig( 222 | r.writers, 223 | opts..., 224 | ) 225 | if err != nil { 226 | return err 227 | } 228 | 229 | r.app, err = scrapemateapp.NewScrapeMateApp(matecfg) 230 | if err != nil { 231 | return err 232 | } 233 | 234 | return nil 235 | } 236 | -------------------------------------------------------------------------------- /runner/installplaywright/installplaywright.go: -------------------------------------------------------------------------------- 1 | package installplaywright 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | "github.com/gosom/google-maps-scraper/runner" 8 | "github.com/playwright-community/playwright-go" 9 | ) 10 | 11 | type installer struct { 12 | } 13 | 14 | func New(cfg *runner.Config) (runner.Runner, error) { 15 | if cfg.RunMode != runner.RunModeInstallPlaywright { 16 | return nil, fmt.Errorf("%w: %d", runner.ErrInvalidRunMode, cfg.RunMode) 17 | } 18 | 19 | return &installer{}, nil 20 | } 21 | 22 | func (i *installer) Run(context.Context) error { 23 | opts := []*playwright.RunOptions{ 24 | { 25 | Browsers: []string{"chromium"}, 26 | }, 27 | } 28 | 29 | return playwright.Install(opts...) 30 | } 31 | 32 | func (i *installer) Close(context.Context) error { 33 | return nil 34 | } 35 | -------------------------------------------------------------------------------- /runner/jobs.go: -------------------------------------------------------------------------------- 1 | package runner 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "io" 7 | "os" 8 | "path/filepath" 9 | "plugin" 10 | "strconv" 11 | "strings" 12 | 13 | "github.com/gosom/google-maps-scraper/deduper" 14 | "github.com/gosom/google-maps-scraper/exiter" 15 | "github.com/gosom/google-maps-scraper/gmaps" 16 | "github.com/gosom/scrapemate" 17 | ) 18 | 19 | func CreateSeedJobs( 20 | fastmode bool, 21 | langCode string, 22 | r io.Reader, 23 | maxDepth int, 24 | email bool, 25 | geoCoordinates string, 26 | zoom int, 27 | radius float64, 28 | dedup deduper.Deduper, 29 | exitMonitor exiter.Exiter, 30 | extraReviews bool, 31 | ) (jobs []scrapemate.IJob, err error) { 32 | var lat, lon float64 33 | 34 | if fastmode { 35 | if geoCoordinates == "" { 36 | return nil, fmt.Errorf("geo coordinates are required in fast mode") 37 | } 38 | 39 | parts := strings.Split(geoCoordinates, ",") 40 | if len(parts) != 2 { 41 | return nil, fmt.Errorf("invalid geo coordinates: %s", geoCoordinates) 42 | } 43 | 44 | lat, err = strconv.ParseFloat(parts[0], 64) 45 | if err != nil { 46 | return nil, fmt.Errorf("invalid latitude: %w", err) 47 | } 48 | 49 | lon, err = strconv.ParseFloat(parts[1], 64) 50 | if err != nil { 51 | return nil, fmt.Errorf("invalid longitude: %w", err) 52 | } 53 | 54 | if lat < -90 || lat > 90 { 55 | return nil, fmt.Errorf("invalid latitude: %f", lat) 56 | } 57 | 58 | if lon < -180 || lon > 180 { 59 | return nil, fmt.Errorf("invalid longitude: %f", lon) 60 | } 61 | 62 | if zoom < 1 || zoom > 21 { 63 | return nil, fmt.Errorf("invalid zoom level: %d", zoom) 64 | } 65 | 66 | if radius < 0 { 67 | return nil, fmt.Errorf("invalid radius: %f", radius) 68 | } 69 | } 70 | 71 | scanner := bufio.NewScanner(r) 72 | 73 | for scanner.Scan() { 74 | query := strings.TrimSpace(scanner.Text()) 75 | if query == "" { 76 | continue 77 | } 78 | 79 | var id string 80 | 81 | if before, after, ok := strings.Cut(query, "#!#"); ok { 82 | query = strings.TrimSpace(before) 83 | id = strings.TrimSpace(after) 84 | } 85 | 86 | var job scrapemate.IJob 87 | 88 | if !fastmode { 89 | opts := []gmaps.GmapJobOptions{} 90 | 91 | if dedup != nil { 92 | opts = append(opts, gmaps.WithDeduper(dedup)) 93 | } 94 | 95 | if exitMonitor != nil { 96 | opts = append(opts, gmaps.WithExitMonitor(exitMonitor)) 97 | } 98 | 99 | if extraReviews { 100 | opts = append(opts, gmaps.WithExtraReviews()) 101 | } 102 | 103 | job = gmaps.NewGmapJob(id, langCode, query, maxDepth, email, geoCoordinates, zoom, opts...) 104 | } else { 105 | jparams := gmaps.MapSearchParams{ 106 | Location: gmaps.MapLocation{ 107 | Lat: lat, 108 | Lon: lon, 109 | ZoomLvl: float64(zoom), 110 | Radius: radius, 111 | }, 112 | Query: query, 113 | ViewportW: 1920, 114 | ViewportH: 450, 115 | Hl: langCode, 116 | } 117 | 118 | opts := []gmaps.SearchJobOptions{} 119 | 120 | if exitMonitor != nil { 121 | opts = append(opts, gmaps.WithSearchJobExitMonitor(exitMonitor)) 122 | } 123 | 124 | job = gmaps.NewSearchJob(&jparams, opts...) 125 | } 126 | 127 | jobs = append(jobs, job) 128 | } 129 | 130 | return jobs, scanner.Err() 131 | } 132 | 133 | func LoadCustomWriter(pluginDir, pluginName string) (scrapemate.ResultWriter, error) { 134 | files, err := os.ReadDir(pluginDir) 135 | if err != nil { 136 | return nil, fmt.Errorf("failed to read plugin directory: %w", err) 137 | } 138 | 139 | for _, file := range files { 140 | if file.IsDir() { 141 | continue 142 | } 143 | 144 | if filepath.Ext(file.Name()) != ".so" && filepath.Ext(file.Name()) != ".dll" { 145 | continue 146 | } 147 | 148 | pluginPath := filepath.Join(pluginDir, file.Name()) 149 | 150 | p, err := plugin.Open(pluginPath) 151 | if err != nil { 152 | return nil, fmt.Errorf("failed to open plugin %s: %w", file.Name(), err) 153 | } 154 | 155 | symWriter, err := p.Lookup(pluginName) 156 | if err != nil { 157 | return nil, fmt.Errorf("failed to lookup symbol %s: %w", pluginName, err) 158 | } 159 | 160 | writer, ok := symWriter.(*scrapemate.ResultWriter) 161 | if !ok { 162 | return nil, fmt.Errorf("unexpected type %T from writer symbol in plugin %s", symWriter, file.Name()) 163 | } 164 | 165 | return *writer, nil 166 | } 167 | 168 | return nil, fmt.Errorf("no plugin found in %s", pluginDir) 169 | } 170 | -------------------------------------------------------------------------------- /runner/lambdaaws/invoker.go: -------------------------------------------------------------------------------- 1 | package lambdaaws 2 | 3 | import ( 4 | "bufio" 5 | "context" 6 | "encoding/json" 7 | "fmt" 8 | "log" 9 | "os" 10 | "strings" 11 | 12 | "github.com/aws/aws-sdk-go-v2/config" 13 | "github.com/aws/aws-sdk-go-v2/credentials" 14 | "github.com/aws/aws-sdk-go-v2/service/lambda" 15 | "github.com/aws/aws-sdk-go-v2/service/lambda/types" 16 | "github.com/google/uuid" 17 | "github.com/gosom/google-maps-scraper/runner" 18 | ) 19 | 20 | var _ runner.Runner = (*invoker)(nil) 21 | 22 | type invoker struct { 23 | lclient *lambda.Client 24 | payloads []lInput 25 | } 26 | 27 | func NewInvoker(cfg *runner.Config) (runner.Runner, error) { 28 | if cfg.RunMode != runner.RunModeAwsLambdaInvoker { 29 | return nil, fmt.Errorf("%w: %d", runner.ErrInvalidRunMode, cfg.RunMode) 30 | } 31 | 32 | creds := credentials.NewStaticCredentialsProvider( 33 | cfg.AwsAccessKey, 34 | cfg.AwsSecretKey, 35 | "", 36 | ) 37 | 38 | awscfg, err := config.LoadDefaultConfig(context.TODO(), 39 | config.WithCredentialsProvider(creds), 40 | config.WithRegion(cfg.AwsRegion), 41 | ) 42 | if err != nil { 43 | return nil, fmt.Errorf("unable to load SDK config: %v", err) 44 | } 45 | 46 | ans := invoker{ 47 | lclient: lambda.NewFromConfig(awscfg), 48 | } 49 | 50 | if err := ans.setPayloads(cfg); err != nil { 51 | return nil, err 52 | } 53 | 54 | return &ans, nil 55 | } 56 | 57 | func (i *invoker) Run(ctx context.Context) error { 58 | for j := range i.payloads { 59 | if err := i.invoke(ctx, i.payloads[j]); err != nil { 60 | return err 61 | } 62 | } 63 | 64 | return nil 65 | } 66 | 67 | //nolint:gocritic // let's pass the input as is 68 | func (i *invoker) invoke(ctx context.Context, input lInput) error { 69 | payloadBytes, err := json.Marshal(input) 70 | if err != nil { 71 | return err 72 | } 73 | 74 | finput := &lambda.InvokeInput{ 75 | FunctionName: &input.FunctionName, 76 | Payload: payloadBytes, 77 | InvocationType: types.InvocationTypeEvent, 78 | } 79 | 80 | result, err := i.lclient.Invoke(ctx, finput) 81 | if err != nil { 82 | return err 83 | } 84 | 85 | log.Printf("Lambda function %s invoked with JobID %s, Part %d, StatusCode %d\n", 86 | input.FunctionName, input.JobID, input.Part, result.StatusCode) 87 | 88 | return nil 89 | } 90 | 91 | func (i *invoker) Close(context.Context) error { 92 | return nil 93 | } 94 | 95 | func (i *invoker) setPayloads(cfg *runner.Config) error { 96 | f, err := os.Open(cfg.InputFile) 97 | if err != nil { 98 | return err 99 | } 100 | 101 | defer f.Close() 102 | 103 | scanner := bufio.NewScanner(f) 104 | 105 | chunkSize := cfg.AwsLambdaChunkSize 106 | 107 | var currentChunk []string 108 | 109 | chunkNumber := 0 110 | jobID := uuid.New().String() 111 | 112 | for scanner.Scan() { 113 | keyword := strings.TrimSpace(scanner.Text()) 114 | if keyword == "" { 115 | continue 116 | } 117 | 118 | currentChunk = append(currentChunk, keyword) 119 | 120 | // When we reach chunkSize or EOF, create a new payload 121 | if len(currentChunk) >= chunkSize { 122 | payload := lInput{ 123 | JobID: jobID, 124 | Part: chunkNumber, 125 | BucketName: cfg.S3Bucket, 126 | Keywords: currentChunk, 127 | Depth: cfg.MaxDepth, 128 | Concurrency: cfg.Concurrency, 129 | Language: cfg.LangCode, 130 | FunctionName: cfg.FunctionName, 131 | ExtraReviews: cfg.ExtraReviews, 132 | } 133 | i.payloads = append(i.payloads, payload) 134 | 135 | currentChunk = []string{} 136 | chunkNumber++ 137 | } 138 | } 139 | 140 | if len(currentChunk) > 0 { 141 | payload := lInput{ 142 | JobID: jobID, 143 | Part: chunkNumber, 144 | BucketName: cfg.S3Bucket, 145 | Keywords: currentChunk, 146 | Depth: cfg.MaxDepth, 147 | Concurrency: cfg.Concurrency, 148 | Language: cfg.LangCode, 149 | FunctionName: cfg.FunctionName, 150 | ExtraReviews: cfg.ExtraReviews, 151 | } 152 | i.payloads = append(i.payloads, payload) 153 | } 154 | 155 | if err := scanner.Err(); err != nil { 156 | return err 157 | } 158 | 159 | if len(i.payloads) == 0 { 160 | return fmt.Errorf("no keywords found in input file") 161 | } 162 | 163 | return nil 164 | } 165 | -------------------------------------------------------------------------------- /runner/lambdaaws/io.go: -------------------------------------------------------------------------------- 1 | package lambdaaws 2 | 3 | type lInput struct { 4 | JobID string `json:"job_id"` 5 | Part int `json:"part"` 6 | BucketName string `json:"bucket_name"` 7 | Keywords []string `json:"keywords"` 8 | Depth int `json:"depth"` 9 | Concurrency int `json:"concurrency"` 10 | Language string `json:"language"` 11 | FunctionName string `json:"function_name"` 12 | DisablePageReuse bool `json:"disable_page_reuse"` 13 | ExtraReviews bool `json:"extra_reviews"` 14 | } 15 | -------------------------------------------------------------------------------- /runner/lambdaaws/lambdaaws.go: -------------------------------------------------------------------------------- 1 | package lambdaaws 2 | 3 | import ( 4 | "context" 5 | "encoding/csv" 6 | "errors" 7 | "fmt" 8 | "io" 9 | "log" 10 | "os" 11 | "os/exec" 12 | "path/filepath" 13 | "strings" 14 | "time" 15 | 16 | "github.com/aws/aws-lambda-go/lambda" 17 | 18 | "github.com/gosom/google-maps-scraper/exiter" 19 | "github.com/gosom/google-maps-scraper/runner" 20 | "github.com/gosom/scrapemate" 21 | "github.com/gosom/scrapemate/adapters/writers/csvwriter" 22 | "github.com/gosom/scrapemate/scrapemateapp" 23 | ) 24 | 25 | var _ runner.Runner = (*lambdaAwsRunner)(nil) 26 | 27 | type lambdaAwsRunner struct { 28 | uploader runner.S3Uploader 29 | } 30 | 31 | func New(cfg *runner.Config) (runner.Runner, error) { 32 | if cfg.RunMode != runner.RunModeAwsLambda { 33 | return nil, fmt.Errorf("%w: %d", runner.ErrInvalidRunMode, cfg.RunMode) 34 | } 35 | 36 | ans := lambdaAwsRunner{ 37 | uploader: cfg.S3Uploader, 38 | } 39 | 40 | return &ans, nil 41 | } 42 | 43 | func (l *lambdaAwsRunner) Run(context.Context) error { 44 | lambda.Start(l.handler) 45 | 46 | return nil 47 | } 48 | 49 | func (l *lambdaAwsRunner) Close(context.Context) error { 50 | return nil 51 | } 52 | 53 | //nolint:gocritic // we pass a value to the handler 54 | func (l *lambdaAwsRunner) handler(ctx context.Context, input lInput) error { 55 | tmpDir := "/tmp" 56 | browsersDst := filepath.Join(tmpDir, "browsers") 57 | driverDst := filepath.Join(tmpDir, "ms-playwright-go") 58 | 59 | if err := l.setupBrowsersAndDriver(browsersDst, driverDst); err != nil { 60 | return err 61 | } 62 | 63 | out, err := os.Create(filepath.Join(tmpDir, "output.csv")) 64 | if err != nil { 65 | return err 66 | } 67 | 68 | defer out.Close() 69 | 70 | app, err := l.getApp(ctx, input, out) 71 | if err != nil { 72 | return err 73 | } 74 | 75 | in := strings.NewReader(strings.Join(input.Keywords, "\n")) 76 | 77 | var seedJobs []scrapemate.IJob 78 | 79 | exitMonitor := exiter.New() 80 | 81 | seedJobs, err = runner.CreateSeedJobs( 82 | false, // TODO supoort fast mode 83 | input.Language, 84 | in, 85 | input.Depth, 86 | false, 87 | "", 88 | 0, 89 | 10000, // TODO support radius 90 | nil, 91 | exitMonitor, 92 | input.ExtraReviews, 93 | ) 94 | if err != nil { 95 | return err 96 | } 97 | 98 | exitMonitor.SetSeedCount(len(seedJobs)) 99 | 100 | bCtx, cancel := context.WithTimeout(ctx, time.Minute*10) 101 | defer cancel() 102 | 103 | exitMonitor.SetCancelFunc(cancel) 104 | 105 | go exitMonitor.Run(bCtx) 106 | 107 | err = app.Start(bCtx, seedJobs...) 108 | if err != nil && !errors.Is(err, context.DeadlineExceeded) && !errors.Is(err, context.Canceled) { 109 | return err 110 | } 111 | 112 | out.Close() 113 | 114 | if l.uploader != nil { 115 | key := fmt.Sprintf("%s-%d.csv", input.JobID, input.Part) 116 | 117 | fd, err := os.Open(out.Name()) 118 | if err != nil { 119 | return err 120 | } 121 | 122 | err = l.uploader.Upload(ctx, input.BucketName, key, fd) 123 | if err != nil { 124 | return err 125 | } 126 | } else { 127 | log.Println("no uploader set results are at ", out.Name()) 128 | } 129 | 130 | return nil 131 | } 132 | 133 | //nolint:gocritic // we pass a value to the handler 134 | func (l *lambdaAwsRunner) getApp(_ context.Context, input lInput, out io.Writer) (*scrapemateapp.ScrapemateApp, error) { 135 | csvWriter := csvwriter.NewCsvWriter(csv.NewWriter(out)) 136 | 137 | writers := []scrapemate.ResultWriter{csvWriter} 138 | 139 | opts := []func(*scrapemateapp.Config) error{ 140 | scrapemateapp.WithConcurrency(max(1, input.Concurrency)), 141 | scrapemateapp.WithExitOnInactivity(time.Minute), 142 | scrapemateapp.WithJS( 143 | scrapemateapp.DisableImages(), 144 | ), 145 | } 146 | 147 | if !input.DisablePageReuse { 148 | opts = append(opts, scrapemateapp.WithPageReuseLimit(2)) 149 | opts = append(opts, scrapemateapp.WithBrowserReuseLimit(200)) 150 | } 151 | 152 | mateCfg, err := scrapemateapp.NewConfig(writers, opts...) 153 | if err != nil { 154 | return nil, err 155 | } 156 | 157 | app, err := scrapemateapp.NewScrapeMateApp(mateCfg) 158 | if err != nil { 159 | return nil, err 160 | } 161 | 162 | return app, nil 163 | } 164 | 165 | func (l *lambdaAwsRunner) setupBrowsersAndDriver(browsersDst, driverDst string) error { 166 | if err := copyDir("/opt/browsers", browsersDst); err != nil { 167 | return fmt.Errorf("failed to copy browsers: %w", err) 168 | } 169 | 170 | if err := copyDir("/opt/ms-playwright-go", driverDst); err != nil { 171 | return fmt.Errorf("failed to copy driver: %w", err) 172 | } 173 | 174 | return nil 175 | } 176 | 177 | func copyDir(src, dst string) error { 178 | cmd := exec.Command("cp", "-rf", src, dst) 179 | 180 | output, err := cmd.CombinedOutput() 181 | if err != nil { 182 | return fmt.Errorf("copy failed: %v, output: %s", err, string(output)) 183 | } 184 | 185 | return nil 186 | } 187 | -------------------------------------------------------------------------------- /runner/runner.go: -------------------------------------------------------------------------------- 1 | package runner 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "flag" 7 | "fmt" 8 | "io" 9 | "os" 10 | "runtime" 11 | "strings" 12 | "sync" 13 | "time" 14 | 15 | "github.com/mattn/go-runewidth" 16 | "golang.org/x/term" 17 | 18 | "github.com/gosom/google-maps-scraper/s3uploader" 19 | "github.com/gosom/google-maps-scraper/tlmt" 20 | "github.com/gosom/google-maps-scraper/tlmt/gonoop" 21 | "github.com/gosom/google-maps-scraper/tlmt/goposthog" 22 | ) 23 | 24 | const ( 25 | RunModeFile = iota + 1 26 | RunModeDatabase 27 | RunModeDatabaseProduce 28 | RunModeInstallPlaywright 29 | RunModeWeb 30 | RunModeAwsLambda 31 | RunModeAwsLambdaInvoker 32 | ) 33 | 34 | var ( 35 | ErrInvalidRunMode = errors.New("invalid run mode") 36 | ) 37 | 38 | type Runner interface { 39 | Run(context.Context) error 40 | Close(context.Context) error 41 | } 42 | 43 | type S3Uploader interface { 44 | Upload(ctx context.Context, bucketName, key string, body io.Reader) error 45 | } 46 | 47 | type Config struct { 48 | Concurrency int 49 | CacheDir string 50 | MaxDepth int 51 | InputFile string 52 | ResultsFile string 53 | JSON bool 54 | LangCode string 55 | Debug bool 56 | Dsn string 57 | ProduceOnly bool 58 | ExitOnInactivityDuration time.Duration 59 | Email bool 60 | CustomWriter string 61 | GeoCoordinates string 62 | Zoom int 63 | RunMode int 64 | DisableTelemetry bool 65 | WebRunner bool 66 | AwsLamdbaRunner bool 67 | DataFolder string 68 | Proxies []string 69 | AwsAccessKey string 70 | AwsSecretKey string 71 | AwsRegion string 72 | S3Uploader S3Uploader 73 | S3Bucket string 74 | AwsLambdaInvoker bool 75 | FunctionName string 76 | AwsLambdaChunkSize int 77 | FastMode bool 78 | Radius float64 79 | Addr string 80 | DisablePageReuse bool 81 | ExtraReviews bool 82 | } 83 | 84 | func ParseConfig() *Config { 85 | cfg := Config{} 86 | 87 | if os.Getenv("PLAYWRIGHT_INSTALL_ONLY") == "1" { 88 | cfg.RunMode = RunModeInstallPlaywright 89 | 90 | return &cfg 91 | } 92 | 93 | var ( 94 | proxies string 95 | ) 96 | 97 | flag.IntVar(&cfg.Concurrency, "c", min(runtime.NumCPU()/2, 1), "sets the concurrency [default: half of CPU cores]") 98 | flag.StringVar(&cfg.CacheDir, "cache", "cache", "sets the cache directory [no effect at the moment]") 99 | flag.IntVar(&cfg.MaxDepth, "depth", 10, "maximum scroll depth in search results [default: 10]") 100 | flag.StringVar(&cfg.ResultsFile, "results", "stdout", "path to the results file [default: stdout]") 101 | flag.StringVar(&cfg.InputFile, "input", "", "path to the input file with queries (one per line) [default: empty]") 102 | flag.StringVar(&cfg.LangCode, "lang", "en", "language code for Google (e.g., 'de' for German) [default: en]") 103 | flag.BoolVar(&cfg.Debug, "debug", false, "enable headful crawl (opens browser window) [default: false]") 104 | flag.StringVar(&cfg.Dsn, "dsn", "", "database connection string [only valid with database provider]") 105 | flag.BoolVar(&cfg.ProduceOnly, "produce", false, "produce seed jobs only (requires dsn)") 106 | flag.DurationVar(&cfg.ExitOnInactivityDuration, "exit-on-inactivity", 0, "exit after inactivity duration (e.g., '5m')") 107 | flag.BoolVar(&cfg.JSON, "json", false, "produce JSON output instead of CSV") 108 | flag.BoolVar(&cfg.Email, "email", false, "extract emails from websites") 109 | flag.StringVar(&cfg.CustomWriter, "writer", "", "use custom writer plugin (format: 'dir:pluginName')") 110 | flag.StringVar(&cfg.GeoCoordinates, "geo", "", "set geo coordinates for search (e.g., '37.7749,-122.4194')") 111 | flag.IntVar(&cfg.Zoom, "zoom", 15, "set zoom level (0-21) for search") 112 | flag.BoolVar(&cfg.WebRunner, "web", false, "run web server instead of crawling") 113 | flag.StringVar(&cfg.DataFolder, "data-folder", "webdata", "data folder for web runner") 114 | flag.StringVar(&proxies, "proxies", "", "comma separated list of proxies to use in the format protocol://user:pass@host:port example: socks5://localhost:9050 or http://user:pass@localhost:9050") 115 | flag.BoolVar(&cfg.AwsLamdbaRunner, "aws-lambda", false, "run as AWS Lambda function") 116 | flag.BoolVar(&cfg.AwsLambdaInvoker, "aws-lambda-invoker", false, "run as AWS Lambda invoker") 117 | flag.StringVar(&cfg.FunctionName, "function-name", "", "AWS Lambda function name") 118 | flag.StringVar(&cfg.AwsAccessKey, "aws-access-key", "", "AWS access key") 119 | flag.StringVar(&cfg.AwsSecretKey, "aws-secret-key", "", "AWS secret key") 120 | flag.StringVar(&cfg.AwsRegion, "aws-region", "", "AWS region") 121 | flag.StringVar(&cfg.S3Bucket, "s3-bucket", "", "S3 bucket name") 122 | flag.IntVar(&cfg.AwsLambdaChunkSize, "aws-lambda-chunk-size", 100, "AWS Lambda chunk size") 123 | flag.BoolVar(&cfg.FastMode, "fast-mode", false, "fast mode (reduced data collection)") 124 | flag.Float64Var(&cfg.Radius, "radius", 10000, "search radius in meters. Default is 10000 meters") 125 | flag.StringVar(&cfg.Addr, "addr", ":8080", "address to listen on for web server") 126 | flag.BoolVar(&cfg.DisablePageReuse, "disable-page-reuse", false, "disable page reuse in playwright") 127 | flag.BoolVar(&cfg.ExtraReviews, "extra-reviews", false, "enable extra reviews collection") 128 | 129 | flag.Parse() 130 | 131 | if cfg.AwsAccessKey == "" { 132 | cfg.AwsAccessKey = os.Getenv("MY_AWS_ACCESS_KEY") 133 | } 134 | 135 | if cfg.AwsSecretKey == "" { 136 | cfg.AwsSecretKey = os.Getenv("MY_AWS_SECRET_KEY") 137 | } 138 | 139 | if cfg.AwsRegion == "" { 140 | cfg.AwsRegion = os.Getenv("MY_AWS_REGION") 141 | } 142 | 143 | if cfg.AwsLambdaInvoker && cfg.FunctionName == "" { 144 | panic("FunctionName must be provided when using AwsLambdaInvoker") 145 | } 146 | 147 | if cfg.AwsLambdaInvoker && cfg.S3Bucket == "" { 148 | panic("S3Bucket must be provided when using AwsLambdaInvoker") 149 | } 150 | 151 | if cfg.AwsLambdaInvoker && cfg.InputFile == "" { 152 | panic("InputFile must be provided when using AwsLambdaInvoker") 153 | } 154 | 155 | if cfg.Concurrency < 1 { 156 | panic("Concurrency must be greater than 0") 157 | } 158 | 159 | if cfg.MaxDepth < 1 { 160 | panic("MaxDepth must be greater than 0") 161 | } 162 | 163 | if cfg.Zoom < 0 || cfg.Zoom > 21 { 164 | panic("Zoom must be between 0 and 21") 165 | } 166 | 167 | if cfg.Dsn == "" && cfg.ProduceOnly { 168 | panic("Dsn must be provided when using ProduceOnly") 169 | } 170 | 171 | if proxies != "" { 172 | cfg.Proxies = strings.Split(proxies, ",") 173 | } 174 | 175 | if cfg.AwsAccessKey != "" && cfg.AwsSecretKey != "" && cfg.AwsRegion != "" { 176 | cfg.S3Uploader = s3uploader.New(cfg.AwsAccessKey, cfg.AwsSecretKey, cfg.AwsRegion) 177 | } 178 | 179 | switch { 180 | case cfg.AwsLambdaInvoker: 181 | cfg.RunMode = RunModeAwsLambdaInvoker 182 | case cfg.AwsLamdbaRunner: 183 | cfg.RunMode = RunModeAwsLambda 184 | case cfg.WebRunner || (cfg.Dsn == "" && cfg.InputFile == ""): 185 | cfg.RunMode = RunModeWeb 186 | case cfg.Dsn == "": 187 | cfg.RunMode = RunModeFile 188 | case cfg.ProduceOnly: 189 | cfg.RunMode = RunModeDatabaseProduce 190 | case cfg.Dsn != "": 191 | cfg.RunMode = RunModeDatabase 192 | default: 193 | panic("Invalid configuration") 194 | } 195 | 196 | return &cfg 197 | } 198 | 199 | var ( 200 | telemetryOnce sync.Once 201 | telemetry tlmt.Telemetry 202 | ) 203 | 204 | func Telemetry() tlmt.Telemetry { 205 | telemetryOnce.Do(func() { 206 | disableTel := func() bool { 207 | return os.Getenv("DISABLE_TELEMETRY") == "1" 208 | }() 209 | 210 | if disableTel { 211 | telemetry = gonoop.New() 212 | 213 | return 214 | } 215 | 216 | val, err := goposthog.New("phc_CHYBGEd1eJZzDE7ZWhyiSFuXa9KMLRnaYN47aoIAY2A", "https://eu.i.posthog.com") 217 | if err != nil || val == nil { 218 | telemetry = gonoop.New() 219 | 220 | return 221 | } 222 | 223 | telemetry = val 224 | }) 225 | 226 | return telemetry 227 | } 228 | 229 | func wrapText(text string, width int) []string { 230 | var lines []string 231 | 232 | currentLine := "" 233 | currentWidth := 0 234 | 235 | for _, r := range text { 236 | runeWidth := runewidth.RuneWidth(r) 237 | if currentWidth+runeWidth > width { 238 | lines = append(lines, currentLine) 239 | currentLine = string(r) 240 | currentWidth = runeWidth 241 | } else { 242 | currentLine += string(r) 243 | currentWidth += runeWidth 244 | } 245 | } 246 | 247 | if currentLine != "" { 248 | lines = append(lines, currentLine) 249 | } 250 | 251 | return lines 252 | } 253 | 254 | func banner(messages []string, width int) string { 255 | if width <= 0 { 256 | var err error 257 | 258 | width, _, err = term.GetSize(0) 259 | if err != nil { 260 | width = 80 261 | } 262 | } 263 | 264 | if width < 20 { 265 | width = 20 266 | } 267 | 268 | contentWidth := width - 4 269 | 270 | var wrappedLines []string 271 | for _, message := range messages { 272 | wrappedLines = append(wrappedLines, wrapText(message, contentWidth)...) 273 | } 274 | 275 | var builder strings.Builder 276 | 277 | builder.WriteString("╔" + strings.Repeat("═", width-2) + "╗\n") 278 | 279 | for _, line := range wrappedLines { 280 | lineWidth := runewidth.StringWidth(line) 281 | paddingRight := contentWidth - lineWidth 282 | 283 | if paddingRight < 0 { 284 | paddingRight = 0 285 | } 286 | 287 | builder.WriteString(fmt.Sprintf("║ %s%s ║\n", line, strings.Repeat(" ", paddingRight))) 288 | } 289 | 290 | builder.WriteString("╚" + strings.Repeat("═", width-2) + "╝\n") 291 | 292 | return builder.String() 293 | } 294 | 295 | func Banner() { 296 | message1 := "🌍 Google Maps Scraper" 297 | message2 := "⭐ If you find this project useful, please star it on GitHub: https://github.com/gosom/google-maps-scraper" 298 | message3 := "💖 Consider sponsoring to support development: https://github.com/sponsors/gosom" 299 | 300 | fmt.Fprintln(os.Stderr, banner([]string{message1, message2, message3}, 0)) 301 | } 302 | -------------------------------------------------------------------------------- /runner/webrunner/webrunner.go: -------------------------------------------------------------------------------- 1 | package webrunner 2 | 3 | import ( 4 | "context" 5 | "encoding/csv" 6 | "errors" 7 | "fmt" 8 | "io" 9 | "log" 10 | "os" 11 | "path/filepath" 12 | "strings" 13 | "time" 14 | 15 | "github.com/gosom/google-maps-scraper/deduper" 16 | "github.com/gosom/google-maps-scraper/exiter" 17 | "github.com/gosom/google-maps-scraper/runner" 18 | "github.com/gosom/google-maps-scraper/tlmt" 19 | "github.com/gosom/google-maps-scraper/web" 20 | "github.com/gosom/google-maps-scraper/web/sqlite" 21 | "github.com/gosom/scrapemate" 22 | "github.com/gosom/scrapemate/adapters/writers/csvwriter" 23 | "github.com/gosom/scrapemate/scrapemateapp" 24 | "golang.org/x/sync/errgroup" 25 | ) 26 | 27 | type webrunner struct { 28 | srv *web.Server 29 | svc *web.Service 30 | cfg *runner.Config 31 | } 32 | 33 | func New(cfg *runner.Config) (runner.Runner, error) { 34 | if cfg.DataFolder == "" { 35 | return nil, fmt.Errorf("data folder is required") 36 | } 37 | 38 | if err := os.MkdirAll(cfg.DataFolder, os.ModePerm); err != nil { 39 | return nil, err 40 | } 41 | 42 | const dbfname = "jobs.db" 43 | 44 | dbpath := filepath.Join(cfg.DataFolder, dbfname) 45 | 46 | repo, err := sqlite.New(dbpath) 47 | if err != nil { 48 | return nil, err 49 | } 50 | 51 | svc := web.NewService(repo, cfg.DataFolder) 52 | 53 | srv, err := web.New(svc, cfg.Addr) 54 | if err != nil { 55 | return nil, err 56 | } 57 | 58 | ans := webrunner{ 59 | srv: srv, 60 | svc: svc, 61 | cfg: cfg, 62 | } 63 | 64 | return &ans, nil 65 | } 66 | 67 | func (w *webrunner) Run(ctx context.Context) error { 68 | egroup, ctx := errgroup.WithContext(ctx) 69 | 70 | egroup.Go(func() error { 71 | return w.work(ctx) 72 | }) 73 | 74 | egroup.Go(func() error { 75 | return w.srv.Start(ctx) 76 | }) 77 | 78 | return egroup.Wait() 79 | } 80 | 81 | func (w *webrunner) Close(context.Context) error { 82 | return nil 83 | } 84 | 85 | func (w *webrunner) work(ctx context.Context) error { 86 | ticker := time.NewTicker(time.Second) 87 | defer ticker.Stop() 88 | 89 | for { 90 | select { 91 | case <-ctx.Done(): 92 | return nil 93 | case <-ticker.C: 94 | jobs, err := w.svc.SelectPending(ctx) 95 | if err != nil { 96 | return err 97 | } 98 | 99 | for i := range jobs { 100 | select { 101 | case <-ctx.Done(): 102 | return nil 103 | default: 104 | t0 := time.Now().UTC() 105 | if err := w.scrapeJob(ctx, &jobs[i]); err != nil { 106 | params := map[string]any{ 107 | "job_count": len(jobs[i].Data.Keywords), 108 | "duration": time.Now().UTC().Sub(t0).String(), 109 | "error": err.Error(), 110 | } 111 | 112 | evt := tlmt.NewEvent("web_runner", params) 113 | 114 | _ = runner.Telemetry().Send(ctx, evt) 115 | 116 | log.Printf("error scraping job %s: %v", jobs[i].ID, err) 117 | } else { 118 | params := map[string]any{ 119 | "job_count": len(jobs[i].Data.Keywords), 120 | "duration": time.Now().UTC().Sub(t0).String(), 121 | } 122 | 123 | _ = runner.Telemetry().Send(ctx, tlmt.NewEvent("web_runner", params)) 124 | 125 | log.Printf("job %s scraped successfully", jobs[i].ID) 126 | } 127 | } 128 | } 129 | } 130 | } 131 | } 132 | 133 | func (w *webrunner) scrapeJob(ctx context.Context, job *web.Job) error { 134 | job.Status = web.StatusWorking 135 | 136 | err := w.svc.Update(ctx, job) 137 | if err != nil { 138 | return err 139 | } 140 | 141 | if len(job.Data.Keywords) == 0 { 142 | job.Status = web.StatusFailed 143 | 144 | return w.svc.Update(ctx, job) 145 | } 146 | 147 | outpath := filepath.Join(w.cfg.DataFolder, job.ID+".csv") 148 | 149 | outfile, err := os.Create(outpath) 150 | if err != nil { 151 | return err 152 | } 153 | 154 | defer func() { 155 | _ = outfile.Close() 156 | }() 157 | 158 | mate, err := w.setupMate(ctx, outfile, job) 159 | if err != nil { 160 | job.Status = web.StatusFailed 161 | 162 | err2 := w.svc.Update(ctx, job) 163 | if err2 != nil { 164 | log.Printf("failed to update job status: %v", err2) 165 | } 166 | 167 | return err 168 | } 169 | 170 | defer mate.Close() 171 | 172 | var coords string 173 | if job.Data.Lat != "" && job.Data.Lon != "" { 174 | coords = job.Data.Lat + "," + job.Data.Lon 175 | } 176 | 177 | dedup := deduper.New() 178 | exitMonitor := exiter.New() 179 | 180 | seedJobs, err := runner.CreateSeedJobs( 181 | job.Data.FastMode, 182 | job.Data.Lang, 183 | strings.NewReader(strings.Join(job.Data.Keywords, "\n")), 184 | job.Data.Depth, 185 | job.Data.Email, 186 | coords, 187 | job.Data.Zoom, 188 | func() float64 { 189 | if job.Data.Radius <= 0 { 190 | return 10000 // 10 km 191 | } 192 | 193 | return float64(job.Data.Radius) 194 | }(), 195 | dedup, 196 | exitMonitor, 197 | w.cfg.ExtraReviews, 198 | ) 199 | if err != nil { 200 | err2 := w.svc.Update(ctx, job) 201 | if err2 != nil { 202 | log.Printf("failed to update job status: %v", err2) 203 | } 204 | 205 | return err 206 | } 207 | 208 | if len(seedJobs) > 0 { 209 | exitMonitor.SetSeedCount(len(seedJobs)) 210 | 211 | allowedSeconds := max(60, len(seedJobs)*10*job.Data.Depth/50+120) 212 | 213 | if job.Data.MaxTime > 0 { 214 | if job.Data.MaxTime.Seconds() < 180 { 215 | allowedSeconds = 180 216 | } else { 217 | allowedSeconds = int(job.Data.MaxTime.Seconds()) 218 | } 219 | } 220 | 221 | log.Printf("running job %s with %d seed jobs and %d allowed seconds", job.ID, len(seedJobs), allowedSeconds) 222 | 223 | mateCtx, cancel := context.WithTimeout(ctx, time.Duration(allowedSeconds)*time.Second) 224 | defer cancel() 225 | 226 | exitMonitor.SetCancelFunc(cancel) 227 | 228 | go exitMonitor.Run(mateCtx) 229 | 230 | err = mate.Start(mateCtx, seedJobs...) 231 | if err != nil && !errors.Is(err, context.DeadlineExceeded) && !errors.Is(err, context.Canceled) { 232 | cancel() 233 | 234 | err2 := w.svc.Update(ctx, job) 235 | if err2 != nil { 236 | log.Printf("failed to update job status: %v", err2) 237 | } 238 | 239 | return err 240 | } 241 | 242 | cancel() 243 | } 244 | 245 | mate.Close() 246 | 247 | job.Status = web.StatusOK 248 | 249 | return w.svc.Update(ctx, job) 250 | } 251 | 252 | func (w *webrunner) setupMate(_ context.Context, writer io.Writer, job *web.Job) (*scrapemateapp.ScrapemateApp, error) { 253 | opts := []func(*scrapemateapp.Config) error{ 254 | scrapemateapp.WithConcurrency(w.cfg.Concurrency), 255 | scrapemateapp.WithExitOnInactivity(time.Minute * 3), 256 | } 257 | 258 | if !job.Data.FastMode { 259 | opts = append(opts, 260 | scrapemateapp.WithJS(scrapemateapp.DisableImages()), 261 | ) 262 | } else { 263 | opts = append(opts, 264 | scrapemateapp.WithStealth("firefox"), 265 | ) 266 | } 267 | 268 | hasProxy := false 269 | 270 | if len(w.cfg.Proxies) > 0 { 271 | opts = append(opts, scrapemateapp.WithProxies(w.cfg.Proxies)) 272 | hasProxy = true 273 | } else if len(job.Data.Proxies) > 0 { 274 | opts = append(opts, 275 | scrapemateapp.WithProxies(job.Data.Proxies), 276 | ) 277 | hasProxy = true 278 | } 279 | 280 | if !w.cfg.DisablePageReuse { 281 | opts = append(opts, 282 | scrapemateapp.WithPageReuseLimit(2), 283 | scrapemateapp.WithPageReuseLimit(200), 284 | ) 285 | } 286 | 287 | log.Printf("job %s has proxy: %v", job.ID, hasProxy) 288 | 289 | csvWriter := csvwriter.NewCsvWriter(csv.NewWriter(writer)) 290 | 291 | writers := []scrapemate.ResultWriter{csvWriter} 292 | 293 | matecfg, err := scrapemateapp.NewConfig( 294 | writers, 295 | opts..., 296 | ) 297 | if err != nil { 298 | return nil, err 299 | } 300 | 301 | return scrapemateapp.NewScrapeMateApp(matecfg) 302 | } 303 | -------------------------------------------------------------------------------- /s3uploader/s3uploader.go: -------------------------------------------------------------------------------- 1 | package s3uploader 2 | 3 | import ( 4 | "context" 5 | "io" 6 | 7 | "github.com/aws/aws-sdk-go-v2/aws" 8 | "github.com/aws/aws-sdk-go-v2/config" 9 | "github.com/aws/aws-sdk-go-v2/credentials" 10 | "github.com/aws/aws-sdk-go-v2/service/s3" 11 | ) 12 | 13 | type Uploader struct { 14 | client *s3.Client 15 | } 16 | 17 | func New(accessKey, secretKey, region string) *Uploader { 18 | creds := credentials.NewStaticCredentialsProvider(accessKey, secretKey, "") 19 | 20 | cfg, err := config.LoadDefaultConfig(context.Background(), 21 | config.WithCredentialsProvider(creds), 22 | config.WithRegion(region), 23 | ) 24 | if err != nil { 25 | return nil 26 | } 27 | 28 | client := s3.NewFromConfig(cfg) 29 | 30 | return &Uploader{ 31 | client: client, 32 | } 33 | } 34 | 35 | func (u *Uploader) Upload(ctx context.Context, bucketName, key string, body io.Reader) error { 36 | input := &s3.PutObjectInput{ 37 | Bucket: aws.String(bucketName), 38 | Key: aws.String(key), 39 | Body: body, 40 | } 41 | 42 | _, err := u.client.PutObject(ctx, input) 43 | if err != nil { 44 | return err 45 | } 46 | 47 | return nil 48 | } 49 | -------------------------------------------------------------------------------- /scrap_io.md: -------------------------------------------------------------------------------- 1 | # Scrap.io: Google Maps Scraping Premium 2 | 3 | [![No time for code? Extract all Google Maps listings at country-scale in 2 clicks without keywords or limits](img/premium_scrap_io.png)](https://scrap.io?utm_medium=ads&utm_source=github_gosom_gmap_scraper) 4 | 5 | ## 🚀 Why Choose Scrap.io? 6 | 7 | ### Country-Wide Scraping Without The Hassle 8 | - Get **all businesses** in an entire country instantly 9 | - No keyword-by-keyword searches 10 | - No results limitations per search 11 | - Complete data without technical overhead 12 | 13 | ### Zero Setup Required 14 | - No code to install 15 | - No configuration needed 16 | - Start scraping immediately 17 | - Premium support included 18 | 19 | ## 🎯 Get Started 20 | Stop wasting time with technical setups and get your Google Maps data instantly. 21 | 22 | [Let's go →](https://scrap.io?utm_medium=ads&utm_source=github_gosom_gmap_scraper/) 23 | 24 | [![Scrape a full country in just 2 clicks](img/premium_scrap_io_demo.gif)](https://scrap.io?utm_medium=ads&utm_source=github_gosom_gmap_scraper) -------------------------------------------------------------------------------- /scripts/migrations/0001_create_tables.down.sql: -------------------------------------------------------------------------------- 1 | BEGIN; 2 | DROP TABLE gmaps_jobs; 3 | COMMIT; 4 | -------------------------------------------------------------------------------- /scripts/migrations/0001_create_tables.up.sql: -------------------------------------------------------------------------------- 1 | BEGIN; 2 | CREATE TABLE gmaps_jobs( 3 | id UUID PRIMARY KEY, 4 | priority SMALLINT NOT NULL, 5 | payload_type TEXT NOT NULL, 6 | payload BYTEA NOT NULL, 7 | created_at TIMESTAMP WITH TIME ZONE NOT NULL, 8 | status TEXT NOT NULL 9 | ); 10 | 11 | CREATE TABLE results( 12 | id INT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, 13 | title TEXT NOT NULL, 14 | category TEXT NOT NULL, 15 | address TEXT NOT NULL, 16 | openhours TEXT NOT NULL, 17 | website TEXT NOT NULL, 18 | phone TEXT NOT NULL, 19 | pluscode TEXT NOT NULL, 20 | review_count INT NOT NULL, 21 | rating NUMERIC NOT NULL 22 | ); 23 | 24 | COMMIT; 25 | -------------------------------------------------------------------------------- /scripts/migrations/0002_add_lat_lon_results.down.sql: -------------------------------------------------------------------------------- 1 | BEGIN; 2 | ALTER TABLE results DROP COLUMN latitude; 3 | ALTER TABLE results DROP COLUMN longitude; 4 | COMMIT; 5 | -------------------------------------------------------------------------------- /scripts/migrations/0002_add_lat_lon_results.up.sql: -------------------------------------------------------------------------------- 1 | BEGIN; 2 | ALTER TABLE results 3 | ADD COLUMN latitude DOUBLE PRECISION NOT NULL DEFAULT 0, 4 | ADD COLUMN longitude DOUBLE PRECISION NOT NULL DEFAULT 0; 5 | COMMIT; 6 | -------------------------------------------------------------------------------- /scripts/migrations/0003_results_jsonb.dow.sql: -------------------------------------------------------------------------------- 1 | BEGIN; 2 | ALTER TABLE results 3 | ADD COLUMN title TEXT NOT NULL, 4 | ADD COLUMN category TEXT NOT NULL, 5 | ADD COLUMN address TEXT NOT NULL, 6 | ADD COLUMN openhours TEXT NOT NULL, 7 | ADD COLUMN website TEXT NOT NULL, 8 | ADD COLUMN phone TEXT NOT NULL, 9 | ADD COLUMN pluscode TEXT NOT NULL, 10 | ADD COLUMN review_count INT NOT NULL, 11 | ADD COLUMN rating NUMERIC NOT NULL, 12 | ADD COLUMN latitude DOUBLE PRECISION NOT NULL DEFAULT 0, 13 | ADD COLUMN longitude DOUBLE PRECISION NOT NULL DEFAULT 0; 14 | COMMIT; 15 | -------------------------------------------------------------------------------- /scripts/migrations/0003_results_jsonb.up.sql: -------------------------------------------------------------------------------- 1 | BEGIN; 2 | ALTER TABLE results DROP COLUMN title; 3 | ALTER TABLE results DROP COLUMN category; 4 | ALTER TABLE results DROP COLUMN address; 5 | ALTER TABLE results DROP COLUMN openhours; 6 | ALTER TABLE results DROP COLUMN website; 7 | ALTER TABLE results DROP COLUMN phone; 8 | ALTER TABLE results DROP COLUMN pluscode; 9 | ALTER TABLE results DROP COLUMN review_count; 10 | ALTER TABLE results DROP COLUMN rating; 11 | ALTER TABLE results DROP COLUMN latitude; 12 | ALTER TABLE results DROP COLUMN longitude; 13 | 14 | ALTER TABLE results 15 | ADD COLUMN data JSONB NOT NULL; 16 | 17 | COMMIT; 18 | -------------------------------------------------------------------------------- /scripts/migrations/0004_add-index-gmaps_jobs.up.sql: -------------------------------------------------------------------------------- 1 | BEGIN; 2 | 3 | CREATE INDEX idx_gmaps_jobs_status_priority_created ON gmaps_jobs(status, priority ASC, created_at ASC); 4 | 5 | COMMIT; 6 | -------------------------------------------------------------------------------- /serpapi.md: -------------------------------------------------------------------------------- 1 | # SerpApi 2 | 3 | At [SerpApi](https://serpapi.com/?utm_source=google-maps-scraper), we scrape public data not only from Google but from all other top search engines, as well as Home Depot and Walmart product catalogs. You can check the full list of our APIs on the left panel under the following link: 4 | 5 | 6 | 7 | If you want to get a sense of what SerpApi can accomplish, I would suggest checking out our Playground, where you can try out our API and experiment with many of the search engines and parameters we support: 8 | 9 | 10 | 11 | We offer the best response times and success rates on the market. You can refer to it here: 12 | 13 | 14 | 15 | SerpApi manages the intricacies of scraping and returns structured JSON results. Search Engines constantly experiment with new layouts, new elements, and other changes. We do all the work to maintain all of our parsers and adapt them to respond to changes quickly. By taking care of this for you, we eliminate a lot of time and complexity from your workflow. 16 | 17 | We are a subscription-based company, and we offer a wide variety of plans. You can view all of our self-serve plans by registering a free account at serpapi.com and checking the following link: 18 | 19 | 20 | 21 | ​If you need a higher volume, custom features, payment by wire transfer, custom contracts, or have any questions please reach out to us at . We will be happy to help you get started. 22 | 23 | ### About our Google Maps API 24 | 25 | This API makes it easy to scrape information, photos, and reviews of businesses and locations on Google Maps. Whether you want to get data for a location by its name, or are doing a keyword search for a list of related businesses, you can scrape Google Maps data for any query with our Google Maps API. 26 | 27 | This can be a powerful tool for content aggregators, business owners, entrepreneurs, and others. To see this in action, head to the Playground: 28 | 29 | You can find the documentation for our Google Maps API here: 30 | 31 | ### Other Relevant APIs 32 | 33 | We offer a couple of APIs which allow you to scrape specific information from Google Maps: 34 | 35 | [Google Maps Autocomplete API](https://serpapi.com/google-maps-autocomplete-api): Allows you to get suggestions for a keyword in Google Maps. 36 | 37 | [Google Maps Contributor Review API](https://serpapi.com/google-maps-contributor-reviews-api): Allows you to scrape all the reviews from a Google Maps user. 38 | 39 | [Google Maps Directions API](https://serpapi.com/google-maps-directions-api): Allows you to scrape directions results from Google Maps. 40 | 41 | [Google Maps Local Results API](https://serpapi.com/maps-local-results): Allows you to scrape the results of a local Google Maps Search. 42 | 43 | [Google Maps Photo Meta API](https://serpapi.com/google-maps-photo-meta-api): Allows you to scrape information like location, user and date of photos available on Google Maps.  44 | 45 | [Google Maps Photos API](https://serpapi.com/google-maps-photos-api): Allows you to scrape photos from Google Maps. 46 | 47 | [Google Maps Place Results API](https://serpapi.com/maps-place-results): Allows you to scrape data about a particular place on Google Maps. 48 | 49 | [Google Maps Review API](https://serpapi.com/google-maps-reviews-api): Allows you to scrape reviews from Google Maps. 50 | 51 | We also offer additional APIs which allow you to scrape maps data from other search engines: 52 | 53 | [Bing Maps API](https://serpapi.com/bing-maps-api): Allows you to scrape results from Bing Maps. 54 | 55 | [DuckDuckGo Maps API](https://serpapi.com/duckduckgo-maps-api): Allows you to scrape results from the DuckDuckGo Maps search page. 56 | 57 | If you have any questions please reach out to us at . Our team will be happy to help you. -------------------------------------------------------------------------------- /tlmt/gonoop/gonoop.go: -------------------------------------------------------------------------------- 1 | package gonoop 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/gosom/google-maps-scraper/tlmt" 7 | ) 8 | 9 | type service struct { 10 | } 11 | 12 | func New() tlmt.Telemetry { 13 | return &service{} 14 | } 15 | 16 | func (s *service) Send(context.Context, tlmt.Event) error { 17 | return nil 18 | } 19 | 20 | func (s *service) Close() error { 21 | return nil 22 | } 23 | -------------------------------------------------------------------------------- /tlmt/goposthog/goposthog.go: -------------------------------------------------------------------------------- 1 | package goposthog 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/gosom/google-maps-scraper/tlmt" 7 | "github.com/posthog/posthog-go" 8 | ) 9 | 10 | type service struct { 11 | client posthog.Client 12 | } 13 | 14 | func New(publicAPIKEY, endpointURL string) (tlmt.Telemetry, error) { 15 | client, err := posthog.NewWithConfig(publicAPIKEY, posthog.Config{Endpoint: endpointURL}) 16 | if err != nil { 17 | return nil, err 18 | } 19 | 20 | ans := service{ 21 | client: client, 22 | } 23 | 24 | return &ans, nil 25 | } 26 | 27 | func (s *service) Send(_ context.Context, event tlmt.Event) error { 28 | capture := posthog.Capture{ 29 | DistinctId: event.AnonymousID, 30 | Event: event.Name, 31 | Properties: event.Properties, 32 | } 33 | 34 | if err := capture.Validate(); err != nil { 35 | return err 36 | } 37 | 38 | return s.client.Enqueue(capture) 39 | } 40 | 41 | func (s *service) Close() error { 42 | if s.client != nil { 43 | return s.client.Close() 44 | } 45 | 46 | return nil 47 | } 48 | -------------------------------------------------------------------------------- /tlmt/tlmt.go: -------------------------------------------------------------------------------- 1 | package tlmt 2 | 3 | import ( 4 | "context" 5 | "crypto/sha256" 6 | "fmt" 7 | "io" 8 | "math/rand/v2" 9 | "net/http" 10 | "runtime" 11 | "strings" 12 | "sync" 13 | "time" 14 | 15 | "github.com/google/uuid" 16 | "github.com/shirou/gopsutil/v4/host" 17 | ) 18 | 19 | var ( 20 | once sync.Once 21 | identifier machineIdentifier 22 | ) 23 | 24 | type Event struct { 25 | AnonymousID string 26 | Name string 27 | Properties map[string]any 28 | } 29 | 30 | func NewEvent(name string, props map[string]any) Event { 31 | ev := Event{ 32 | AnonymousID: generateMachineID().id, 33 | Name: name, 34 | Properties: generateMachineID().meta, 35 | } 36 | 37 | for k, v := range props { 38 | ev.Properties[k] = v 39 | } 40 | 41 | return ev 42 | } 43 | 44 | type Telemetry interface { 45 | Send(ctx context.Context, event Event) error 46 | Close() error 47 | } 48 | 49 | type machineIdentifier struct { 50 | id string 51 | meta map[string]any 52 | } 53 | 54 | func generateMachineID() machineIdentifier { 55 | once.Do(func() { 56 | ip := fetchExternalIP() 57 | if ip == "" { 58 | ip = uuid.New().String() 59 | } 60 | 61 | hash := sha256.New() 62 | hash.Write([]byte(ip)) 63 | hash.Write([]byte(runtime.GOARCH)) 64 | hash.Write([]byte(runtime.GOOS)) 65 | hash.Write([]byte(runtime.Version())) 66 | 67 | id := fmt.Sprintf("%x", hash.Sum(nil)) 68 | 69 | meta := make(map[string]any) 70 | 71 | info, err := host.Info() 72 | if err == nil { 73 | meta["os"] = info.OS 74 | meta["platform"] = info.Platform 75 | meta["platform_family"] = info.PlatformFamily 76 | meta["platform_version"] = info.PlatformVersion 77 | } 78 | 79 | identifier.id = id 80 | identifier.meta = meta 81 | }) 82 | 83 | return identifier 84 | } 85 | 86 | func fetchExternalIP() string { 87 | endpoints := []string{ 88 | "https://api.ipify.org", 89 | "https://ifconfig.me", 90 | "https://icanhazip.com", 91 | "https://ident.me", 92 | "https://ifconfig.co", 93 | } 94 | 95 | rand.Shuffle(len(endpoints), func(i, j int) { 96 | endpoints[i], endpoints[j] = endpoints[j], endpoints[i] 97 | }) 98 | 99 | client := http.Client{ 100 | Timeout: 5 * time.Second, 101 | } 102 | 103 | for _, endpoint := range endpoints { 104 | ip := func(u string) string { 105 | req, err := http.NewRequest(http.MethodGet, u, http.NoBody) 106 | if err != nil { 107 | return "" 108 | } 109 | 110 | resp, err := client.Do(req) 111 | if err != nil { 112 | return "" 113 | } 114 | 115 | defer func() { 116 | _, _ = io.Copy(io.Discard, resp.Body) 117 | resp.Body.Close() 118 | }() 119 | 120 | if resp.StatusCode != http.StatusOK { 121 | return "" 122 | } 123 | 124 | ip, err := io.ReadAll(resp.Body) 125 | if err != nil { 126 | return "" 127 | } 128 | 129 | return strings.TrimSpace(string(ip)) 130 | }(endpoint) 131 | 132 | if ip != "" { 133 | return ip 134 | } 135 | } 136 | 137 | return "" 138 | } 139 | -------------------------------------------------------------------------------- /web/errors.go: -------------------------------------------------------------------------------- 1 | package web 2 | 3 | import "errors" 4 | 5 | var ( 6 | ErrNotFound = errors.New("not found") 7 | ErrAlreadyExists = errors.New("already exists") 8 | ) 9 | -------------------------------------------------------------------------------- /web/job.go: -------------------------------------------------------------------------------- 1 | package web 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "time" 7 | ) 8 | 9 | var jobs []Job 10 | 11 | const ( 12 | StatusPending = "pending" 13 | StatusWorking = "working" 14 | StatusOK = "ok" 15 | StatusFailed = "failed" 16 | ) 17 | 18 | type SelectParams struct { 19 | Status string 20 | Limit int 21 | } 22 | 23 | type JobRepository interface { 24 | Get(context.Context, string) (Job, error) 25 | Create(context.Context, *Job) error 26 | Delete(context.Context, string) error 27 | Select(context.Context, SelectParams) ([]Job, error) 28 | Update(context.Context, *Job) error 29 | } 30 | 31 | type Job struct { 32 | ID string 33 | Name string 34 | Date time.Time 35 | Status string 36 | Data JobData 37 | } 38 | 39 | func (j *Job) Validate() error { 40 | if j.ID == "" { 41 | return errors.New("missing id") 42 | } 43 | 44 | if j.Name == "" { 45 | return errors.New("missing name") 46 | } 47 | 48 | if j.Status == "" { 49 | return errors.New("missing status") 50 | } 51 | 52 | if j.Date.IsZero() { 53 | return errors.New("missing date") 54 | } 55 | 56 | if err := j.Data.Validate(); err != nil { 57 | return err 58 | } 59 | 60 | return nil 61 | } 62 | 63 | type JobData struct { 64 | Keywords []string `json:"keywords"` 65 | Lang string `json:"lang"` 66 | Zoom int `json:"zoom"` 67 | Lat string `json:"lat"` 68 | Lon string `json:"lon"` 69 | FastMode bool `json:"fast_mode"` 70 | Radius int `json:"radius"` 71 | Depth int `json:"depth"` 72 | Email bool `json:"email"` 73 | MaxTime time.Duration `json:"max_time"` 74 | Proxies []string `json:"proxies"` 75 | } 76 | 77 | func (d *JobData) Validate() error { 78 | if len(d.Keywords) == 0 { 79 | return errors.New("missing keywords") 80 | } 81 | 82 | if d.Lang == "" { 83 | return errors.New("missing lang") 84 | } 85 | 86 | if len(d.Lang) != 2 { 87 | return errors.New("invalid lang") 88 | } 89 | 90 | if d.Depth == 0 { 91 | return errors.New("missing depth") 92 | } 93 | 94 | if d.MaxTime == 0 { 95 | return errors.New("missing max time") 96 | } 97 | 98 | if d.FastMode && (d.Lat == "" || d.Lon == "") { 99 | return errors.New("missing geo coordinates") 100 | } 101 | 102 | return nil 103 | } 104 | -------------------------------------------------------------------------------- /web/service.go: -------------------------------------------------------------------------------- 1 | package web 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "os" 7 | "path/filepath" 8 | "strings" 9 | ) 10 | 11 | type Service struct { 12 | repo JobRepository 13 | dataFolder string 14 | } 15 | 16 | func NewService(repo JobRepository, dataFolder string) *Service { 17 | return &Service{ 18 | repo: repo, 19 | dataFolder: dataFolder, 20 | } 21 | } 22 | 23 | func (s *Service) Create(ctx context.Context, job *Job) error { 24 | return s.repo.Create(ctx, job) 25 | } 26 | 27 | func (s *Service) All(ctx context.Context) ([]Job, error) { 28 | return s.repo.Select(ctx, SelectParams{}) 29 | } 30 | 31 | func (s *Service) Get(ctx context.Context, id string) (Job, error) { 32 | return s.repo.Get(ctx, id) 33 | } 34 | 35 | func (s *Service) Delete(ctx context.Context, id string) error { 36 | if strings.Contains(id, "/") || strings.Contains(id, "\\") || strings.Contains(id, "..") { 37 | return fmt.Errorf("invalid file name") 38 | } 39 | 40 | datapath := filepath.Join(s.dataFolder, id+".csv") 41 | 42 | if _, err := os.Stat(datapath); err == nil { 43 | if err := os.Remove(datapath); err != nil { 44 | return err 45 | } 46 | } else if !os.IsNotExist(err) { 47 | return err 48 | } 49 | 50 | return s.repo.Delete(ctx, id) 51 | } 52 | 53 | func (s *Service) Update(ctx context.Context, job *Job) error { 54 | return s.repo.Update(ctx, job) 55 | } 56 | 57 | func (s *Service) SelectPending(ctx context.Context) ([]Job, error) { 58 | return s.repo.Select(ctx, SelectParams{Status: StatusPending, Limit: 1}) 59 | } 60 | 61 | func (s *Service) GetCSV(_ context.Context, id string) (string, error) { 62 | if strings.Contains(id, "/") || strings.Contains(id, "\\") || strings.Contains(id, "..") { 63 | return "", fmt.Errorf("invalid file name") 64 | } 65 | 66 | datapath := filepath.Join(s.dataFolder, id+".csv") 67 | 68 | if _, err := os.Stat(datapath); os.IsNotExist(err) { 69 | return "", fmt.Errorf("csv file not found for job %s", id) 70 | } 71 | 72 | return datapath, nil 73 | } 74 | -------------------------------------------------------------------------------- /web/sqlite/sqlite.go: -------------------------------------------------------------------------------- 1 | package sqlite 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "encoding/json" 7 | "time" 8 | 9 | _ "modernc.org/sqlite" // sqlite driver 10 | 11 | "github.com/gosom/google-maps-scraper/web" 12 | ) 13 | 14 | type repo struct { 15 | db *sql.DB 16 | } 17 | 18 | func New(path string) (web.JobRepository, error) { 19 | db, err := initDatabase(path) 20 | if err != nil { 21 | return nil, err 22 | } 23 | 24 | return &repo{db: db}, nil 25 | } 26 | 27 | func (repo *repo) Get(ctx context.Context, id string) (web.Job, error) { 28 | const q = `SELECT * from jobs WHERE id = ?` 29 | 30 | row := repo.db.QueryRowContext(ctx, q, id) 31 | 32 | return rowToJob(row) 33 | } 34 | 35 | func (repo *repo) Create(ctx context.Context, job *web.Job) error { 36 | item, err := jobToRow(job) 37 | if err != nil { 38 | return err 39 | } 40 | 41 | const q = `INSERT INTO jobs (id, name, status, data, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?)` 42 | 43 | _, err = repo.db.ExecContext(ctx, q, item.ID, item.Name, item.Status, item.Data, item.CreatedAt, item.UpdatedAt) 44 | if err != nil { 45 | return err 46 | } 47 | 48 | return nil 49 | } 50 | 51 | func (repo *repo) Delete(ctx context.Context, id string) error { 52 | const q = `DELETE FROM jobs WHERE id = ?` 53 | 54 | _, err := repo.db.ExecContext(ctx, q, id) 55 | 56 | return err 57 | } 58 | 59 | func (repo *repo) Select(ctx context.Context, params web.SelectParams) ([]web.Job, error) { 60 | q := `SELECT * from jobs` 61 | 62 | var args []any 63 | 64 | if params.Status != "" { 65 | q += ` WHERE status = ?` 66 | 67 | args = append(args, params.Status) 68 | } 69 | 70 | q += " ORDER BY created_at DESC" 71 | 72 | if params.Limit > 0 { 73 | q += " LIMIT ?" 74 | 75 | args = append(args, params.Limit) 76 | } 77 | 78 | rows, err := repo.db.QueryContext(ctx, q, args...) 79 | if err != nil { 80 | return nil, err 81 | } 82 | 83 | defer rows.Close() 84 | 85 | var ans []web.Job 86 | 87 | for rows.Next() { 88 | job, err := rowToJob(rows) 89 | if err != nil { 90 | return nil, err 91 | } 92 | 93 | ans = append(ans, job) 94 | } 95 | 96 | if err := rows.Err(); err != nil { 97 | return nil, err 98 | } 99 | 100 | return ans, nil 101 | } 102 | 103 | func (repo *repo) Update(ctx context.Context, job *web.Job) error { 104 | item, err := jobToRow(job) 105 | if err != nil { 106 | return err 107 | } 108 | 109 | const q = `UPDATE jobs SET name = ?, status = ?, data = ?, updated_at = ? WHERE id = ?` 110 | 111 | _, err = repo.db.ExecContext(ctx, q, item.Name, item.Status, item.Data, item.UpdatedAt, item.ID) 112 | 113 | return err 114 | } 115 | 116 | type scannable interface { 117 | Scan(dest ...any) error 118 | } 119 | 120 | func rowToJob(row scannable) (web.Job, error) { 121 | var j job 122 | 123 | err := row.Scan(&j.ID, &j.Name, &j.Status, &j.Data, &j.CreatedAt, &j.UpdatedAt) 124 | if err != nil { 125 | return web.Job{}, err 126 | } 127 | 128 | ans := web.Job{ 129 | ID: j.ID, 130 | Name: j.Name, 131 | Status: j.Status, 132 | Date: time.Unix(j.CreatedAt, 0).UTC(), 133 | } 134 | 135 | err = json.Unmarshal([]byte(j.Data), &ans.Data) 136 | if err != nil { 137 | return web.Job{}, err 138 | } 139 | 140 | return ans, nil 141 | } 142 | 143 | func jobToRow(item *web.Job) (job, error) { 144 | data, err := json.Marshal(item.Data) 145 | if err != nil { 146 | return job{}, err 147 | } 148 | 149 | return job{ 150 | ID: item.ID, 151 | Name: item.Name, 152 | Status: item.Status, 153 | Data: string(data), 154 | CreatedAt: item.Date.Unix(), 155 | UpdatedAt: time.Now().UTC().Unix(), 156 | }, nil 157 | } 158 | 159 | type job struct { 160 | ID string 161 | Name string 162 | Status string 163 | Data string 164 | CreatedAt int64 165 | UpdatedAt int64 166 | } 167 | 168 | func initDatabase(path string) (*sql.DB, error) { 169 | db, err := sql.Open("sqlite", path) 170 | if err != nil { 171 | return nil, err 172 | } 173 | 174 | db.SetMaxOpenConns(1) 175 | db.SetMaxIdleConns(1) 176 | db.SetConnMaxLifetime(30 * time.Minute) 177 | 178 | _, err = db.Exec("PRAGMA busy_timeout = 5000") 179 | if err != nil { 180 | return nil, err 181 | } 182 | 183 | _, err = db.Exec("PRAGMA journal_mode=WAL") 184 | if err != nil { 185 | return nil, err 186 | } 187 | 188 | _, err = db.Exec("PRAGMA synchronous=NORMAL") 189 | if err != nil { 190 | return nil, err 191 | } 192 | 193 | _, err = db.Exec("PRAGMA cache_size=1000") 194 | if err != nil { 195 | return nil, err 196 | } 197 | 198 | err = db.Ping() 199 | if err != nil { 200 | return nil, err 201 | } 202 | 203 | return db, createSchema(db) 204 | } 205 | 206 | func createSchema(db *sql.DB) error { 207 | _, err := db.Exec(` 208 | CREATE TABLE IF NOT EXISTS jobs ( 209 | id TEXT PRIMARY KEY, 210 | name TEXT NOT NULL, 211 | status TEXT NOT NULL, 212 | data TEXT NOT NULL, 213 | created_at INT NOT NULL, 214 | updated_at INT NOT NULL 215 | ) 216 | `) 217 | 218 | return err 219 | } 220 | -------------------------------------------------------------------------------- /web/static/css/main.css: -------------------------------------------------------------------------------- 1 | :root { 2 | --color-background: #f9f9f9; 3 | --color-surface: #ffffff; 4 | --color-text: #333333; 5 | --color-text-light: #666666; 6 | --color-border: #e0e0e0; 7 | --color-primary: #4a4a4a; 8 | --color-primary-light: #5a5a5a; 9 | --color-success: #43a047; 10 | --color-warning: #fdd835; 11 | --color-error: #e53935; 12 | --color-github-star: #f1e05a; 13 | --color-sponsor: #ea4aaa; 14 | --color-sponsor: #4a4a4a; 15 | --color-sponsor-hover: #5a5a5a; 16 | } 17 | 18 | body { 19 | font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen-Sans, Ubuntu, Cantarell, 'Helvetica Neue', sans-serif; 20 | line-height: 1.5; 21 | color: var(--color-text); 22 | background-color: var(--color-background); 23 | margin: 0; 24 | padding: 0; 25 | height: 100vh; 26 | display: flex; 27 | flex-direction: column; 28 | } 29 | 30 | .app-container { 31 | flex: 1; 32 | display: flex; 33 | flex-direction: column; 34 | } 35 | 36 | header { 37 | background-color: var(--color-surface); 38 | padding: 24px 32px; 39 | box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1); 40 | } 41 | 42 | h1 { 43 | font-size: 24px; 44 | font-weight: 500; 45 | color: var(--color-primary); 46 | margin: 0 0 16px 0; 47 | } 48 | 49 | .github-section { 50 | display: flex; 51 | align-items: center; 52 | margin-top: 16px; 53 | } 54 | 55 | .github-button { 56 | display: inline-flex; 57 | align-items: center; 58 | background-color: var(--color-sponsor); 59 | color: white; 60 | border: 1px solid var(--color-border); 61 | padding: 10px 16px; 62 | margin-left: 16px; 63 | border-radius: 4px; 64 | font-size: 14px; 65 | text-decoration: none; 66 | transition: background-color 0.2s, border-color 0.2s; 67 | } 68 | 69 | .github-button:hover { 70 | background-color: var(--color-sponsor); 71 | border-color: var(--color-primary); 72 | } 73 | 74 | .github-button svg { 75 | margin-right: 8px; 76 | } 77 | 78 | .github-button svg path { 79 | fill: var(--color-github-star); 80 | } 81 | 82 | main { 83 | flex: 1; 84 | display: flex; 85 | overflow: hidden; 86 | } 87 | 88 | .sidebar { 89 | width: 320px; 90 | background-color: var(--color-surface); 91 | padding: 24px; 92 | overflow-y: auto; 93 | border-right: 1px solid var(--color-border); 94 | box-shadow: 2px 0 5px rgba(0, 0, 0, 0.05); 95 | } 96 | 97 | .content { 98 | flex: 1; 99 | padding: 24px 32px; 100 | overflow-y: auto; 101 | background-color: var(--color-background); 102 | } 103 | 104 | form { 105 | display: flex; 106 | flex-direction: column; 107 | gap: 24px; 108 | } 109 | 110 | fieldset { 111 | border: none; 112 | padding: 0; 113 | margin: 0; 114 | } 115 | 116 | legend { 117 | font-weight: 500; 118 | margin-bottom: 16px; 119 | color: var(--color-primary); 120 | } 121 | 122 | .form-group { 123 | margin-bottom: 20px; 124 | } 125 | 126 | label { 127 | display: block; 128 | margin-bottom: 8px; 129 | color: var(--color-text-light); 130 | } 131 | 132 | input[type="text"], 133 | input[type="number"], 134 | textarea { 135 | width: 100%; 136 | padding: 10px 12px; 137 | border: 1px solid var(--color-border); 138 | border-radius: 4px; 139 | font-size: 14px; 140 | box-sizing: border-box; 141 | } 142 | 143 | button { 144 | background-color: var(--color-primary); 145 | color: white; 146 | border: none; 147 | padding: 12px 20px; 148 | border-radius: 4px; 149 | cursor: pointer; 150 | font-size: 14px; 151 | transition: background-color 0.2s; 152 | } 153 | 154 | button:hover { 155 | background-color: var(--color-primary-light); 156 | } 157 | 158 | table { 159 | width: 100%; 160 | border-collapse: separate; 161 | border-spacing: 0; 162 | background-color: var(--color-surface); 163 | box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1); 164 | border-radius: 4px; 165 | overflow: hidden; 166 | } 167 | 168 | th, td { 169 | text-align: left; 170 | padding: 16px; 171 | border-bottom: 1px solid var(--color-border); 172 | } 173 | 174 | th { 175 | font-weight: 500; 176 | color: var(--color-primary); 177 | background-color: var(--color-surface); 178 | } 179 | 180 | .status-indicator { 181 | display: inline-block; 182 | padding: 4px 10px; 183 | border-radius: 12px; 184 | font-size: 12px; 185 | font-weight: 500; 186 | } 187 | 188 | .status-ok { 189 | background-color: var(--color-success); 190 | color: white; 191 | } 192 | 193 | .status-pending { 194 | background-color: var(--color-warning); 195 | color: var(--color-text); 196 | } 197 | 198 | .status-working { 199 | background-color: var(--color-warning); 200 | color: var(--color-text); 201 | } 202 | 203 | .status-failed { 204 | background-color: var(--color-error); 205 | color: var(--color-text); 206 | } 207 | 208 | .status-error { 209 | background-color: var(--color-error); 210 | color: white; 211 | } 212 | 213 | .download-button, .delete-button { 214 | padding: 6px 12px; 215 | border-radius: 4px; 216 | font-size: 12px; 217 | text-decoration: none; 218 | color: white; 219 | } 220 | 221 | .download-button { 222 | background-color: var(--color-success); 223 | } 224 | 225 | .delete-button { 226 | background-color: var(--color-error); 227 | } 228 | 229 | .error-message { 230 | display: none; 231 | background-color: #ffebee; 232 | border: 1px solid var(--color-error); 233 | color: var(--color-error); 234 | padding: 12px 16px; 235 | border-radius: 4px; 236 | margin-bottom: 20px; 237 | font-size: 14px; 238 | } 239 | 240 | .error-message:not(:empty) { 241 | display: block; 242 | } 243 | 244 | .expandable-section summary { 245 | cursor: pointer; 246 | padding: 12px 16px; 247 | background-color: var(--color-background); 248 | border: 1px solid var(--color-border); 249 | border-radius: 4px; 250 | color: var(--color-text); 251 | } 252 | 253 | .expandable-section[open] summary { 254 | border-bottom: none; 255 | border-bottom-left-radius: 0; 256 | border-bottom-right-radius: 0; 257 | } 258 | 259 | @media (max-width: 768px) { 260 | main { 261 | flex-direction: column; 262 | } 263 | 264 | .sidebar { 265 | width: 100%; 266 | border-right: none; 267 | border-bottom: 1px solid var(--color-border); 268 | box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05); 269 | } 270 | 271 | .content { 272 | padding: 20px; 273 | } 274 | } 275 | 276 | .sponsor-section { 277 | position: relative; 278 | margin-top: 24px; 279 | padding: 16px; 280 | background-color: var(--color-surface); 281 | border: 1px solid var(--color-border); 282 | border-radius: 4px; 283 | } 284 | 285 | .sponsor-close { 286 | position: absolute; 287 | top: 8px; 288 | right: 8px; 289 | width: 20px; 290 | height: 20px; 291 | border: none; 292 | background-color: transparent; 293 | color: var(--color-text-light); 294 | font-size: 16px; 295 | line-height: 1; 296 | cursor: pointer; 297 | display: flex; 298 | align-items: center; 299 | justify-content: center; 300 | transition: color 0.2s; 301 | } 302 | 303 | .sponsor-close:hover { 304 | color: var(--color-text); 305 | } 306 | 307 | .sponsor-text { 308 | margin: 0 0 12px 0; 309 | font-size: 14px; 310 | color: var(--color-text); 311 | padding-right: 20px; /* Make space for the close button */ 312 | } 313 | 314 | .sponsor-button { 315 | display: inline-flex; 316 | align-items: center; 317 | background-color: var(--color-sponsor); 318 | color: white; 319 | border: none; 320 | padding: 10px 16px; 321 | border-radius: 4px; 322 | font-size: 14px; 323 | text-decoration: none; 324 | transition: background-color 0.2s; 325 | } 326 | 327 | .sponsor-button:hover { 328 | background-color: var(--color-sponsor-hover); 329 | } 330 | 331 | .sponsor-button svg { 332 | margin-right: 8px; 333 | } 334 | 335 | nav { 336 | margin-bottom: 16px; 337 | padding: 8px 0; 338 | } 339 | 340 | nav a { 341 | color: white; 342 | text-decoration: none; 343 | font-size: 16px; 344 | font-weight: 500; 345 | padding: 8px 16px; 346 | background-color: var(--color-primary); 347 | border: 1px solid var(--color-primary); 348 | border-radius: 4px; 349 | transition: all 0.2s ease; 350 | display: inline-block; 351 | } 352 | 353 | nav a:hover { 354 | background-color: var(--color-primary-light); 355 | transform: translateY(-1px); 356 | box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); 357 | } 358 | -------------------------------------------------------------------------------- /web/static/spec/spec.yaml: -------------------------------------------------------------------------------- 1 | openapi: 3.0.3 2 | info: 3 | title: Google Maps Scraper API 4 | version: 1.0.0 5 | description: API for managing job google maps scraping tasks 6 | 7 | paths: 8 | /api/v1/jobs: 9 | post: 10 | summary: Create a new job scraping task 11 | x-code-samples: 12 | - lang: curl 13 | source: | 14 | curl -X POST "http://localhost:8080/api/v1/jobs" \ 15 | -H "Content-Type: application/json" \ 16 | -d '{ 17 | "name": "Coffee shops Ilion", 18 | "keywords": ["coffee in ilion"], 19 | "lang": "el", 20 | "zoom": 15, 21 | "depth": 1, 22 | "max_time": 3600 23 | }' 24 | requestBody: 25 | required: true 26 | content: 27 | application/json: 28 | schema: 29 | $ref: '#/components/schemas/ApiScrapeRequest' 30 | responses: 31 | '201': 32 | description: Job created successfully 33 | content: 34 | application/json: 35 | schema: 36 | $ref: '#/components/schemas/ApiScrapeResponse' 37 | '422': 38 | description: Unprocessable entity 39 | content: 40 | application/json: 41 | schema: 42 | $ref: '#/components/schemas/ApiError' 43 | '500': 44 | description: Internal server error 45 | content: 46 | application/json: 47 | schema: 48 | $ref: '#/components/schemas/ApiError' 49 | 50 | get: 51 | summary: Get all jobs 52 | x-code-samples: 53 | - lang: curl 54 | source: | 55 | curl -X GET "http://localhost:8080/api/v1/jobs" 56 | responses: 57 | '200': 58 | description: Successful response 59 | content: 60 | application/json: 61 | schema: 62 | type: array 63 | items: 64 | $ref: '#/components/schemas/Job' 65 | '500': 66 | description: Internal server error 67 | content: 68 | application/json: 69 | schema: 70 | $ref: '#/components/schemas/ApiError' 71 | 72 | /api/v1/jobs/{id}: 73 | get: 74 | summary: Get a specific job 75 | x-code-samples: 76 | - lang: curl 77 | source: | 78 | curl -X GET "http://localhost:8080/api/v1/jobs/6f0c1af8-3c4e-4742-84bb-590938ae8930" 79 | parameters: 80 | - name: id 81 | in: path 82 | required: true 83 | schema: 84 | type: string 85 | responses: 86 | '200': 87 | description: Successful response 88 | content: 89 | application/json: 90 | schema: 91 | $ref: '#/components/schemas/Job' 92 | '404': 93 | description: Job not found 94 | content: 95 | application/json: 96 | schema: 97 | $ref: '#/components/schemas/ApiError' 98 | '422': 99 | description: Invalid ID 100 | content: 101 | application/json: 102 | schema: 103 | $ref: '#/components/schemas/ApiError' 104 | 105 | delete: 106 | summary: Delete a specific job 107 | x-code-samples: 108 | - lang: curl 109 | source: | 110 | curl -X DELETE "http://localhost:8080/api/v1/jobs/455a6a00-cefb-4a9d-9e7d-791f01873700" 111 | parameters: 112 | - name: id 113 | in: path 114 | required: true 115 | schema: 116 | type: string 117 | responses: 118 | '200': 119 | description: Job deleted successfully 120 | '422': 121 | description: Invalid ID 122 | content: 123 | application/json: 124 | schema: 125 | $ref: '#/components/schemas/ApiError' 126 | '500': 127 | description: Internal server error 128 | content: 129 | application/json: 130 | schema: 131 | $ref: '#/components/schemas/ApiError' 132 | 133 | /api/v1/jobs/{id}/download: 134 | get: 135 | summary: Download job results as CSV 136 | x-code-samples: 137 | source: | 138 | curl -X GET "http://localhost:8080/api/v1/jobs/18eafda3-53a9-4970-ac96-8f8dfc7011c3/download" --output results.csv 139 | parameters: 140 | - name: id 141 | in: path 142 | required: true 143 | schema: 144 | type: string 145 | responses: 146 | '200': 147 | description: Successful response 148 | content: 149 | text/csv: 150 | schema: 151 | type: string 152 | format: binary 153 | '404': 154 | description: File not found 155 | '422': 156 | description: Invalid ID 157 | '500': 158 | description: Internal server error 159 | 160 | components: 161 | schemas: 162 | ApiError: 163 | type: object 164 | properties: 165 | code: 166 | type: integer 167 | message: 168 | type: string 169 | 170 | ApiScrapeRequest: 171 | type: object 172 | properties: 173 | name: 174 | type: string 175 | keywords: 176 | type: array 177 | items: 178 | type: string 179 | lang: 180 | type: string 181 | zoom: 182 | type: integer 183 | lat: 184 | type: string 185 | lon: 186 | type: string 187 | fast_mode: 188 | type: boolean 189 | radius: 190 | type: integer 191 | depth: 192 | type: integer 193 | email: 194 | type: boolean 195 | max_time: 196 | type: integer 197 | proxies: 198 | type: array 199 | items: 200 | type: string 201 | 202 | ApiScrapeResponse: 203 | type: object 204 | properties: 205 | id: 206 | type: string 207 | 208 | Job: 209 | type: object 210 | properties: 211 | id: 212 | type: string 213 | name: 214 | type: string 215 | date: 216 | type: string 217 | format: date-time 218 | status: 219 | type: string 220 | data: 221 | $ref: '#/components/schemas/JobData' 222 | 223 | JobData: 224 | type: object 225 | properties: 226 | keywords: 227 | type: array 228 | items: 229 | type: string 230 | lang: 231 | type: string 232 | zoom: 233 | type: integer 234 | lat: 235 | type: string 236 | lon: 237 | type: string 238 | fast_mode: 239 | type: boolean 240 | radius: 241 | type: integer 242 | depth: 243 | type: integer 244 | email: 245 | type: boolean 246 | max_time: 247 | type: integer 248 | proxies: 249 | type: array 250 | items: 251 | type: string 252 | 253 | -------------------------------------------------------------------------------- /web/static/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Google Maps Scraper 7 | 8 | 9 | 10 | 11 |
12 |
13 |

Google Maps Scraper

14 | 17 |
18 |

If you find this tool useful, please consider starring our repository:

19 | 20 | 21 | 22 | 23 | Star on GitHub 24 | 25 |
26 | 38 | 39 |
40 |
41 | 130 |
131 |
132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 |
Job IDJob NameJob DateStatusActions
146 |
147 |
148 |
149 | 150 | 158 | 159 | 160 | -------------------------------------------------------------------------------- /web/static/templates/job_row.html: -------------------------------------------------------------------------------- 1 | 2 | {{.ID}} 3 | {{.Name}} 4 | {{.Date}} 5 | 6 | {{.Status}} 7 | 8 | 9 | {{ if eq .Status "ok" }} 10 | Download 11 | {{ end }} 12 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /web/static/templates/job_rows.html: -------------------------------------------------------------------------------- 1 | {{range .}} 2 | 3 | {{.ID}} 4 | {{.Name}} 5 | {{.Date}} 6 | 7 | {{.Status}} 8 | 9 | 10 | {{ if eq .Status "ok" }} 11 | Download 12 | {{ end }} 13 | 18 | 19 | 20 | {{end}} 21 | -------------------------------------------------------------------------------- /web/static/templates/redoc.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | API Documentation 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /web/web.go: -------------------------------------------------------------------------------- 1 | package web 2 | 3 | import ( 4 | "context" 5 | "embed" 6 | "encoding/json" 7 | "fmt" 8 | "html/template" 9 | "io" 10 | "io/fs" 11 | "log" 12 | "net/http" 13 | "os" 14 | "path/filepath" 15 | "strconv" 16 | "strings" 17 | "time" 18 | 19 | "github.com/google/uuid" 20 | ) 21 | 22 | //go:embed static 23 | var static embed.FS 24 | 25 | type Server struct { 26 | tmpl map[string]*template.Template 27 | srv *http.Server 28 | svc *Service 29 | } 30 | 31 | func New(svc *Service, addr string) (*Server, error) { 32 | ans := Server{ 33 | svc: svc, 34 | tmpl: make(map[string]*template.Template), 35 | srv: &http.Server{ 36 | Addr: addr, 37 | ReadHeaderTimeout: 10 * time.Second, 38 | ReadTimeout: 60 * time.Second, 39 | WriteTimeout: 60 * time.Second, 40 | IdleTimeout: 120 * time.Second, 41 | MaxHeaderBytes: 1 << 20, 42 | }, 43 | } 44 | 45 | staticFS, err := fs.Sub(static, "static") 46 | if err != nil { 47 | return nil, err 48 | } 49 | 50 | fileServer := http.FileServer(http.FS(staticFS)) 51 | mux := http.NewServeMux() 52 | 53 | mux.Handle("/static/", http.StripPrefix("/static/", fileServer)) 54 | mux.HandleFunc("/scrape", ans.scrape) 55 | mux.HandleFunc("/download", func(w http.ResponseWriter, r *http.Request) { 56 | r = requestWithID(r) 57 | 58 | ans.download(w, r) 59 | }) 60 | mux.HandleFunc("/delete", func(w http.ResponseWriter, r *http.Request) { 61 | r = requestWithID(r) 62 | 63 | ans.delete(w, r) 64 | }) 65 | mux.HandleFunc("/jobs", ans.getJobs) 66 | mux.HandleFunc("/", ans.index) 67 | 68 | // api routes 69 | mux.HandleFunc("/api/docs", ans.redocHandler) 70 | mux.HandleFunc("/api/v1/jobs", func(w http.ResponseWriter, r *http.Request) { 71 | switch r.Method { 72 | case http.MethodPost: 73 | ans.apiScrape(w, r) 74 | case http.MethodGet: 75 | ans.apiGetJobs(w, r) 76 | default: 77 | ans := apiError{ 78 | Code: http.StatusMethodNotAllowed, 79 | Message: "Method not allowed", 80 | } 81 | 82 | renderJSON(w, http.StatusMethodNotAllowed, ans) 83 | } 84 | }) 85 | 86 | mux.HandleFunc("/api/v1/jobs/{id}", func(w http.ResponseWriter, r *http.Request) { 87 | r = requestWithID(r) 88 | 89 | switch r.Method { 90 | case http.MethodGet: 91 | ans.apiGetJob(w, r) 92 | case http.MethodDelete: 93 | ans.apiDeleteJob(w, r) 94 | default: 95 | ans := apiError{ 96 | Code: http.StatusMethodNotAllowed, 97 | Message: "Method not allowed", 98 | } 99 | 100 | renderJSON(w, http.StatusMethodNotAllowed, ans) 101 | } 102 | }) 103 | 104 | mux.HandleFunc("/api/v1/jobs/{id}/download", func(w http.ResponseWriter, r *http.Request) { 105 | r = requestWithID(r) 106 | 107 | if r.Method != http.MethodGet { 108 | ans := apiError{ 109 | Code: http.StatusMethodNotAllowed, 110 | Message: "Method not allowed", 111 | } 112 | 113 | renderJSON(w, http.StatusMethodNotAllowed, ans) 114 | 115 | return 116 | } 117 | 118 | ans.download(w, r) 119 | }) 120 | 121 | handler := securityHeaders(mux) 122 | ans.srv.Handler = handler 123 | 124 | tmplsKeys := []string{ 125 | "static/templates/index.html", 126 | "static/templates/job_rows.html", 127 | "static/templates/job_row.html", 128 | "static/templates/redoc.html", 129 | } 130 | 131 | for _, key := range tmplsKeys { 132 | tmp, err := template.ParseFS(static, key) 133 | if err != nil { 134 | return nil, err 135 | } 136 | 137 | ans.tmpl[key] = tmp 138 | } 139 | 140 | return &ans, nil 141 | } 142 | 143 | func (s *Server) Start(ctx context.Context) error { 144 | go func() { 145 | <-ctx.Done() 146 | 147 | err := s.srv.Shutdown(context.Background()) 148 | if err != nil { 149 | log.Println(err) 150 | 151 | return 152 | } 153 | 154 | log.Println("server stopped") 155 | }() 156 | 157 | fmt.Fprintf(os.Stderr, "visit http://localhost%s\n", s.srv.Addr) 158 | 159 | err := s.srv.ListenAndServe() 160 | if err != nil && err != http.ErrServerClosed { 161 | return err 162 | } 163 | 164 | return nil 165 | } 166 | 167 | type formData struct { 168 | Name string 169 | MaxTime string 170 | Keywords []string 171 | Language string 172 | Zoom int 173 | FastMode bool 174 | Radius int 175 | Lat string 176 | Lon string 177 | Depth int 178 | Email bool 179 | Proxies []string 180 | } 181 | 182 | type ctxKey string 183 | 184 | const idCtxKey ctxKey = "id" 185 | 186 | func requestWithID(r *http.Request) *http.Request { 187 | id := r.PathValue("id") 188 | if id == "" { 189 | id = r.URL.Query().Get("id") 190 | } 191 | 192 | parsed, err := uuid.Parse(id) 193 | if err == nil { 194 | r = r.WithContext(context.WithValue(r.Context(), idCtxKey, parsed)) 195 | } 196 | 197 | return r 198 | } 199 | 200 | func getIDFromRequest(r *http.Request) (uuid.UUID, bool) { 201 | id, ok := r.Context().Value(idCtxKey).(uuid.UUID) 202 | 203 | return id, ok 204 | } 205 | 206 | //nolint:gocritic // this is used in template 207 | func (f formData) ProxiesString() string { 208 | return strings.Join(f.Proxies, "\n") 209 | } 210 | 211 | //nolint:gocritic // this is used in template 212 | func (f formData) KeywordsString() string { 213 | return strings.Join(f.Keywords, "\n") 214 | } 215 | 216 | func (s *Server) index(w http.ResponseWriter, r *http.Request) { 217 | if r.Method != http.MethodGet { 218 | http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) 219 | 220 | return 221 | } 222 | 223 | tmpl, ok := s.tmpl["static/templates/index.html"] 224 | if !ok { 225 | http.Error(w, "missing tpl", http.StatusInternalServerError) 226 | 227 | return 228 | } 229 | 230 | data := formData{ 231 | Name: "", 232 | MaxTime: "10m", 233 | Keywords: []string{}, 234 | Language: "en", 235 | Zoom: 15, 236 | FastMode: false, 237 | Radius: 10000, 238 | Lat: "0", 239 | Lon: "0", 240 | Depth: 10, 241 | Email: false, 242 | } 243 | 244 | _ = tmpl.Execute(w, data) 245 | } 246 | 247 | func (s *Server) scrape(w http.ResponseWriter, r *http.Request) { 248 | if r.Method != http.MethodPost { 249 | http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) 250 | 251 | return 252 | } 253 | 254 | err := r.ParseForm() 255 | if err != nil { 256 | http.Error(w, err.Error(), http.StatusInternalServerError) 257 | 258 | return 259 | } 260 | 261 | newJob := Job{ 262 | ID: uuid.New().String(), 263 | Name: r.Form.Get("name"), 264 | Date: time.Now().UTC(), 265 | Status: StatusPending, 266 | Data: JobData{}, 267 | } 268 | 269 | maxTimeStr := r.Form.Get("maxtime") 270 | 271 | maxTime, err := time.ParseDuration(maxTimeStr) 272 | if err != nil { 273 | http.Error(w, "invalid max time", http.StatusUnprocessableEntity) 274 | 275 | return 276 | } 277 | 278 | if maxTime < time.Minute*3 { 279 | http.Error(w, "max time must be more than 3m", http.StatusUnprocessableEntity) 280 | 281 | return 282 | } 283 | 284 | newJob.Data.MaxTime = maxTime 285 | 286 | keywordsStr, ok := r.Form["keywords"] 287 | if !ok { 288 | http.Error(w, "missing keywords", http.StatusUnprocessableEntity) 289 | 290 | return 291 | } 292 | 293 | keywords := strings.Split(keywordsStr[0], "\n") 294 | for _, k := range keywords { 295 | k = strings.TrimSpace(k) 296 | if k == "" { 297 | continue 298 | } 299 | 300 | newJob.Data.Keywords = append(newJob.Data.Keywords, k) 301 | } 302 | 303 | newJob.Data.Lang = r.Form.Get("lang") 304 | 305 | newJob.Data.Zoom, err = strconv.Atoi(r.Form.Get("zoom")) 306 | if err != nil { 307 | http.Error(w, "invalid zoom", http.StatusUnprocessableEntity) 308 | 309 | return 310 | } 311 | 312 | if r.Form.Get("fastmode") == "on" { 313 | newJob.Data.FastMode = true 314 | } 315 | 316 | newJob.Data.Radius, err = strconv.Atoi(r.Form.Get("radius")) 317 | if err != nil { 318 | http.Error(w, "invalid radius", http.StatusUnprocessableEntity) 319 | 320 | return 321 | } 322 | 323 | newJob.Data.Lat = r.Form.Get("latitude") 324 | newJob.Data.Lon = r.Form.Get("longitude") 325 | 326 | newJob.Data.Depth, err = strconv.Atoi(r.Form.Get("depth")) 327 | if err != nil { 328 | http.Error(w, "invalid depth", http.StatusUnprocessableEntity) 329 | 330 | return 331 | } 332 | 333 | newJob.Data.Email = r.Form.Get("email") == "on" 334 | 335 | proxies := strings.Split(r.Form.Get("proxies"), "\n") 336 | if len(proxies) > 0 { 337 | for _, p := range proxies { 338 | p = strings.TrimSpace(p) 339 | if p == "" { 340 | continue 341 | } 342 | 343 | newJob.Data.Proxies = append(newJob.Data.Proxies, p) 344 | } 345 | } 346 | 347 | err = newJob.Validate() 348 | if err != nil { 349 | http.Error(w, err.Error(), http.StatusUnprocessableEntity) 350 | 351 | return 352 | } 353 | 354 | err = s.svc.Create(r.Context(), &newJob) 355 | if err != nil { 356 | http.Error(w, err.Error(), http.StatusInternalServerError) 357 | 358 | return 359 | } 360 | 361 | tmpl, ok := s.tmpl["static/templates/job_row.html"] 362 | if !ok { 363 | http.Error(w, "missing tpl", http.StatusInternalServerError) 364 | 365 | return 366 | } 367 | 368 | _ = tmpl.Execute(w, newJob) 369 | } 370 | 371 | func (s *Server) getJobs(w http.ResponseWriter, r *http.Request) { 372 | if r.Method != http.MethodGet { 373 | http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) 374 | 375 | return 376 | } 377 | 378 | tmpl, ok := s.tmpl["static/templates/job_rows.html"] 379 | if !ok { 380 | http.Error(w, "missing tpl", http.StatusInternalServerError) 381 | return 382 | } 383 | 384 | jobs, err := s.svc.All(context.Background()) 385 | if err != nil { 386 | http.Error(w, err.Error(), http.StatusInternalServerError) 387 | 388 | return 389 | } 390 | 391 | _ = tmpl.Execute(w, jobs) 392 | } 393 | 394 | func (s *Server) download(w http.ResponseWriter, r *http.Request) { 395 | if r.Method != http.MethodGet { 396 | http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) 397 | 398 | return 399 | } 400 | 401 | ctx := r.Context() 402 | 403 | id, ok := getIDFromRequest(r) 404 | if !ok { 405 | http.Error(w, "Invalid ID", http.StatusUnprocessableEntity) 406 | 407 | return 408 | } 409 | 410 | filePath, err := s.svc.GetCSV(ctx, id.String()) 411 | if err != nil { 412 | http.Error(w, err.Error(), http.StatusNotFound) 413 | return 414 | } 415 | 416 | file, err := os.Open(filePath) 417 | if err != nil { 418 | http.Error(w, "Failed to open file", http.StatusInternalServerError) 419 | return 420 | } 421 | defer file.Close() 422 | 423 | fileName := filepath.Base(filePath) 424 | w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%s", fileName)) 425 | w.Header().Set("Content-Type", "text/csv") 426 | 427 | _, err = io.Copy(w, file) 428 | if err != nil { 429 | http.Error(w, "Failed to send file", http.StatusInternalServerError) 430 | return 431 | } 432 | } 433 | 434 | func (s *Server) delete(w http.ResponseWriter, r *http.Request) { 435 | if r.Method != http.MethodDelete { 436 | http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) 437 | 438 | return 439 | } 440 | 441 | deleteID, ok := getIDFromRequest(r) 442 | if !ok { 443 | http.Error(w, "Invalid ID", http.StatusUnprocessableEntity) 444 | 445 | return 446 | } 447 | 448 | err := s.svc.Delete(r.Context(), deleteID.String()) 449 | if err != nil { 450 | http.Error(w, err.Error(), http.StatusInternalServerError) 451 | 452 | return 453 | } 454 | 455 | w.WriteHeader(http.StatusOK) 456 | } 457 | 458 | type apiError struct { 459 | Code int `json:"code"` 460 | Message string `json:"message"` 461 | } 462 | 463 | type apiScrapeRequest struct { 464 | Name string 465 | JobData 466 | } 467 | 468 | type apiScrapeResponse struct { 469 | ID string `json:"id"` 470 | } 471 | 472 | func (s *Server) redocHandler(w http.ResponseWriter, _ *http.Request) { 473 | tmpl, ok := s.tmpl["static/templates/redoc.html"] 474 | if !ok { 475 | http.Error(w, "missing tpl", http.StatusInternalServerError) 476 | 477 | return 478 | } 479 | 480 | _ = tmpl.Execute(w, nil) 481 | } 482 | 483 | func (s *Server) apiScrape(w http.ResponseWriter, r *http.Request) { 484 | var req apiScrapeRequest 485 | 486 | err := json.NewDecoder(r.Body).Decode(&req) 487 | if err != nil { 488 | ans := apiError{ 489 | Code: http.StatusUnprocessableEntity, 490 | Message: err.Error(), 491 | } 492 | 493 | renderJSON(w, http.StatusUnprocessableEntity, ans) 494 | 495 | return 496 | } 497 | 498 | newJob := Job{ 499 | ID: uuid.New().String(), 500 | Name: req.Name, 501 | Date: time.Now().UTC(), 502 | Status: StatusPending, 503 | Data: req.JobData, 504 | } 505 | 506 | // convert to seconds 507 | newJob.Data.MaxTime *= time.Second 508 | 509 | err = newJob.Validate() 510 | if err != nil { 511 | ans := apiError{ 512 | Code: http.StatusUnprocessableEntity, 513 | Message: err.Error(), 514 | } 515 | 516 | renderJSON(w, http.StatusUnprocessableEntity, ans) 517 | 518 | return 519 | } 520 | 521 | err = s.svc.Create(r.Context(), &newJob) 522 | if err != nil { 523 | ans := apiError{ 524 | Code: http.StatusInternalServerError, 525 | Message: err.Error(), 526 | } 527 | 528 | renderJSON(w, http.StatusInternalServerError, ans) 529 | 530 | return 531 | } 532 | 533 | ans := apiScrapeResponse{ 534 | ID: newJob.ID, 535 | } 536 | 537 | renderJSON(w, http.StatusCreated, ans) 538 | } 539 | 540 | func (s *Server) apiGetJobs(w http.ResponseWriter, r *http.Request) { 541 | jobs, err := s.svc.All(r.Context()) 542 | if err != nil { 543 | apiError := apiError{ 544 | Code: http.StatusInternalServerError, 545 | Message: err.Error(), 546 | } 547 | 548 | renderJSON(w, http.StatusInternalServerError, apiError) 549 | 550 | return 551 | } 552 | 553 | renderJSON(w, http.StatusOK, jobs) 554 | } 555 | 556 | func (s *Server) apiGetJob(w http.ResponseWriter, r *http.Request) { 557 | id, ok := getIDFromRequest(r) 558 | if !ok { 559 | apiError := apiError{ 560 | Code: http.StatusUnprocessableEntity, 561 | Message: "Invalid ID", 562 | } 563 | 564 | renderJSON(w, http.StatusUnprocessableEntity, apiError) 565 | 566 | return 567 | } 568 | 569 | job, err := s.svc.Get(r.Context(), id.String()) 570 | if err != nil { 571 | apiError := apiError{ 572 | Code: http.StatusNotFound, 573 | Message: http.StatusText(http.StatusNotFound), 574 | } 575 | 576 | renderJSON(w, http.StatusNotFound, apiError) 577 | 578 | return 579 | } 580 | 581 | renderJSON(w, http.StatusOK, job) 582 | } 583 | 584 | func (s *Server) apiDeleteJob(w http.ResponseWriter, r *http.Request) { 585 | id, ok := getIDFromRequest(r) 586 | if !ok { 587 | apiError := apiError{ 588 | Code: http.StatusUnprocessableEntity, 589 | Message: "Invalid ID", 590 | } 591 | 592 | renderJSON(w, http.StatusUnprocessableEntity, apiError) 593 | 594 | return 595 | } 596 | 597 | err := s.svc.Delete(r.Context(), id.String()) 598 | if err != nil { 599 | apiError := apiError{ 600 | Code: http.StatusInternalServerError, 601 | Message: err.Error(), 602 | } 603 | 604 | renderJSON(w, http.StatusInternalServerError, apiError) 605 | 606 | return 607 | } 608 | 609 | w.WriteHeader(http.StatusOK) 610 | } 611 | 612 | func renderJSON(w http.ResponseWriter, code int, data any) { 613 | w.Header().Set("Content-Type", "application/json") 614 | w.WriteHeader(code) 615 | 616 | _ = json.NewEncoder(w).Encode(data) 617 | } 618 | 619 | func formatDate(t time.Time) string { 620 | return t.Format("Jan 02, 2006 15:04:05") 621 | } 622 | 623 | func securityHeaders(next http.Handler) http.Handler { 624 | return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 625 | w.Header().Set("X-Content-Type-Options", "nosniff") 626 | w.Header().Set("X-Frame-Options", "DENY") 627 | w.Header().Set("X-XSS-Protection", "1; mode=block") 628 | w.Header().Set("Content-Security-Policy", 629 | "default-src 'self'; "+ 630 | "script-src 'self' cdn.redoc.ly cdnjs.cloudflare.com 'unsafe-inline' 'unsafe-eval'; "+ 631 | "worker-src 'self' blob:; "+ 632 | "style-src 'self' 'unsafe-inline' fonts.googleapis.com; "+ 633 | "img-src 'self' data: cdn.redoc.ly; "+ 634 | "font-src 'self' fonts.gstatic.com; "+ 635 | "connect-src 'self'") 636 | 637 | next.ServeHTTP(w, r) 638 | }) 639 | } 640 | --------------------------------------------------------------------------------