├── .gau.toml
├── .github
├── FUNDING.yml
└── workflows
│ ├── cicd-to-dockerhub.yml
│ └── release_build.yml
├── .gitignore
├── .goreleaser.yml
├── CONTRIBUTORS.md
├── Dockerfile
├── LICENSE
├── README.md
├── cmd
└── gau
│ └── main.go
├── go.mod
├── go.sum
├── pkg
├── httpclient
│ └── client.go
├── output
│ └── output.go
└── providers
│ ├── commoncrawl
│ ├── commoncrawl.go
│ └── types.go
│ ├── filters.go
│ ├── otx
│ └── otx.go
│ ├── providers.go
│ ├── urlscan
│ ├── types.go
│ └── urlscan.go
│ └── wayback
│ └── wayback.go
└── runner
├── flags
└── flags.go
└── runner.go
/.gau.toml:
--------------------------------------------------------------------------------
1 | threads = 2
2 | verbose = false
3 | retries = 15
4 | subdomains = false
5 | parameters = false
6 | providers = ["wayback","commoncrawl","otx","urlscan"]
7 | blacklist = ["ttf","woff","svg","png","jpg"]
8 | json = false
9 |
10 | [urlscan]
11 | apikey = ""
12 |
13 | [filters]
14 | from = ""
15 | to = ""
16 | matchstatuscodes = []
17 | matchmimetypes = []
18 | filterstatuscodes = []
19 | filtermimetypes = ["image/png", "image/jpg", "image/svg+xml"]
20 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: lc
2 |
--------------------------------------------------------------------------------
/.github/workflows/cicd-to-dockerhub.yml:
--------------------------------------------------------------------------------
1 | name: cicd-to-dockerhub
2 |
3 | on:
4 | push:
5 | tags:
6 | - "*"
7 |
8 | jobs:
9 | build:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - name: Checkout repository
13 | uses: actions/checkout@v2
14 |
15 | - name: Login to Docker Hub
16 | uses: docker/login-action@v1
17 | with:
18 | username: ${{ secrets.DOCKER_HUB_USERNAME }}
19 | password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
20 |
21 | - name: Set up Docker Buildx
22 | id: buildx
23 | uses: docker/setup-buildx-action@v1
24 |
25 | - name: Build and push
26 | id: docker_build
27 | uses: docker/build-push-action@v2
28 | with:
29 | context: ./
30 | file: ./Dockerfile
31 | push: true
32 | tags: ${{ secrets.DOCKER_HUB_USERNAME }}/gau:latest
33 |
34 | - name: Image digest
35 | run: echo ${{ steps.docker_build.outputs.digest }}
36 |
--------------------------------------------------------------------------------
/.github/workflows/release_build.yml:
--------------------------------------------------------------------------------
1 | name: Release gau
2 |
3 | on:
4 | push:
5 | tags:
6 | - "*"
7 |
8 | jobs:
9 | build:
10 | name: GoReleaser build
11 | runs-on: ubuntu-latest
12 |
13 | steps:
14 | - name: Check out code into the Go module directory
15 | uses: actions/checkout@v2
16 | with:
17 | fetch-depth: 0 # See: https://goreleaser.com/ci/actions/
18 |
19 | - name: Set up Go 1.23.2
20 | uses: actions/setup-go@v2
21 | with:
22 | go-version: 1.23.2
23 | id: go
24 |
25 | - name: Import GPG key
26 | id: import_gpg
27 | uses: crazy-max/ghaction-import-gpg@v4
28 | with:
29 | gpg_private_key: ${{ secrets.GPG_PRIVATE_KEY }}
30 | passphrase: ${{ secrets.GPG_PASSPHRASE }}
31 |
32 | - name: Run GoReleaser
33 | uses: goreleaser/goreleaser-action@master
34 | with:
35 | version: latest
36 | args: release --clean
37 | env:
38 | GITHUB_TOKEN: ${{ secrets.GO_RELEASER_GITHUB_TOKEN }}
39 | GPG_FINGERPRINT: ${{ steps.import_gpg.outputs.fingerprint }}
40 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Created by .ignore support plugin (hsz.mobi)
2 | ### Go template
3 | # Binaries for programs and plugins
4 | *.exe
5 | *.exe~
6 | *.dll
7 | *.so
8 | *.dylib
9 |
10 | # Test binary, built with `go test -c`
11 | *.test
12 |
13 | # Output of the go coverage tool, specifically when used with LiteIDE
14 | *.out
15 |
16 | # Dependency directories (remove the comment below to include it)
17 | # vendor/
18 | .DS_Store
19 | .idea
20 | dist
21 |
--------------------------------------------------------------------------------
/.goreleaser.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | before:
3 | hooks:
4 | - go mod download
5 | builds:
6 | - binary: gau
7 | goos:
8 | - linux
9 | - windows
10 | - darwin
11 | goarch:
12 | - amd64
13 | - 386
14 | - arm64
15 | ignore:
16 | - goos: darwin
17 | goarch: 386
18 | - goos: windows
19 | goarch: 'arm64'
20 | main: ./cmd/gau/
21 | archives:
22 | - id: tgz
23 | format: tar.gz
24 | format_overrides:
25 | - goos: windows
26 | format: zip
27 |
28 | signs:
29 | - artifacts: checksum
30 | args: [ "--batch", "-u", "{{ .Env.GPG_FINGERPRINT }}", "--output", "${signature}", "--detach-sign", "${artifact}" ]
31 |
--------------------------------------------------------------------------------
/CONTRIBUTORS.md:
--------------------------------------------------------------------------------
1 | # Contributors
2 | * [lc](https://github.com/lc)
3 | * [shellbear](https://github.com/shellbear)
4 |
5 |
6 | Thanks to [tomnomnom](https://github.com/tomnomnom) for waybackurls!
7 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # Build image: golang:1.21.0-alpine3.17
2 | FROM golang:1.21.0-alpine3.17 as build
3 |
4 | WORKDIR /app
5 |
6 | COPY . .
7 | RUN go mod download && go build -o ./build/gau ./cmd/gau
8 |
9 | ENTRYPOINT ["/app/gau/build/gau"]
10 |
11 | # Release image: alpine:3.17
12 | FROM alpine:3.17
13 |
14 | RUN apk -U upgrade --no-cache
15 | COPY --from=build /app/build/gau /usr/local/bin/gau
16 |
17 | RUN adduser \
18 | --gecos "" \
19 | --disabled-password \
20 | gau
21 |
22 | USER gau
23 | ENTRYPOINT ["gau"]
24 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025 Corben Leo
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # getallurls (gau)
2 | [](https://opensource.org/licenses/MIT)
3 |
4 | getallurls (gau) fetches known URLs from AlienVault's [Open Threat Exchange](https://otx.alienvault.com), the Wayback Machine, Common Crawl, and URLScan for any given domain. Inspired by Tomnomnom's [waybackurls](https://github.com/tomnomnom/waybackurls).
5 |
6 | # Resources
7 | - [Usage](#usage)
8 | - [Installation](#installation)
9 | - [ohmyzsh note](#ohmyzsh-note)
10 |
11 | ## Usage:
12 | Examples:
13 |
14 | ```bash
15 | $ printf example.com | gau
16 | $ cat domains.txt | gau --threads 5
17 | $ gau example.com google.com
18 | $ gau --o example-urls.txt example.com
19 | $ gau --blacklist png,jpg,gif example.com
20 | ```
21 |
22 | To display the help for the tool use the `-h` flag:
23 |
24 | ```bash
25 | $ gau -h
26 | ```
27 |
28 | | Flag | Description | Example |
29 | |------|-------------|---------|
30 | |`--blacklist`| list of extensions to skip | gau --blacklist ttf,woff,svg,png|
31 | |`--config` | Use alternate configuration file (default `$HOME/config.toml` or `%USERPROFILE%\.gau.toml`) | gau --config $HOME/.config/gau.toml|
32 | |`--fc`| list of status codes to filter | gau --fc 404,302 |
33 | |`--from`| fetch urls from date (format: YYYYMM) | gau --from 202101 |
34 | |`--ft`| list of mime-types to filter | gau --ft text/plain|
35 | |`--fp`| remove different parameters of the same endpoint | gau --fp|
36 | |`--json`| output as json | gau --json |
37 | |`--mc`| list of status codes to match | gau --mc 200,500 |
38 | |`--mt`| list of mime-types to match |gau --mt text/html,application/json|
39 | |`--o`| filename to write results to | gau --o out.txt |
40 | |`--providers`| list of providers to use (wayback,commoncrawl,otx,urlscan) | gau --providers wayback|
41 | |`--proxy`| http proxy to use (socks5:// or http:// | gau --proxy http://proxy.example.com:8080 |
42 | |`--retries`| retries for HTTP client | gau --retries 10 |
43 | |`--timeout`| timeout (in seconds) for HTTP client | gau --timeout 60 |
44 | |`--subs`| include subdomains of target domain | gau example.com --subs |
45 | |`--threads`| number of workers to spawn | gau example.com --threads |
46 | |`--to`| fetch urls to date (format: YYYYMM) | gau example.com --to 202101 |
47 | |`--verbose`| show verbose output | gau --verbose example.com |
48 | |`--version`| show gau version | gau --version|
49 |
50 |
51 | ## Configuration Files
52 | gau automatically looks for a configuration file at `$HOME/.gau.toml` or`%USERPROFILE%\.gau.toml`. You can point to a different configuration file using the `--config` flag. **If the configuration file is not found, gau will still run with a default configuration, but will output a message to stderr**.
53 |
54 | You can specify options and they will be used for every subsequent run of gau. Any options provided via command line flags will override options set in the configuration file.
55 |
56 | An example configuration file can be found [here](https://github.com/lc/gau/blob/master/.gau.toml)
57 |
58 | ## Installation:
59 | ### From source:
60 | ```
61 | $ go install github.com/lc/gau/v2/cmd/gau@latest
62 | ```
63 | ### From github :
64 | ```
65 | git clone https://github.com/lc/gau.git; \
66 | cd gau/cmd; \
67 | go build; \
68 | sudo mv gau /usr/local/bin/; \
69 | gau --version;
70 | ```
71 | ### From binary:
72 | You can download the pre-built binaries from the [releases](https://github.com/lc/gau/releases/) page and then move them into your $PATH.
73 |
74 | ```bash
75 | $ tar xvf gau_2.0.6_linux_amd64.tar.gz
76 | $ mv gau /usr/bin/gau
77 | ```
78 |
79 | ### From Docker:
80 | You can run gau via docker like so:
81 | ```bash
82 | docker run --rm sxcurity/gau:latest --help
83 | ```
84 |
85 |
86 | You can also build a docker image with the following command
87 | ```bash
88 | docker build -t gau .
89 | ```
90 | and then run it
91 | ```bash
92 | docker run gau example.com
93 | ```
94 | Bear in mind that piping command (echo "example.com" | gau) will not work with the docker container
95 |
96 |
97 | ## ohmyzsh note:
98 | ohmyzsh's [git plugin](https://github.com/ohmyzsh/ohmyzsh/tree/master/plugins/git) has an alias which maps `gau` to the `git add --update` command. This is problematic, causing a binary conflict between this tool "gau" and the zsh plugin alias "gau" (`git add --update`). There is currently a few workarounds which can be found in this Github [issue](https://github.com/lc/gau/issues/8).
99 |
100 |
101 | ## Useful?
102 |
103 |
104 |
105 | Donate to CommonCrawl
106 | Donate to the InternetArchive
107 |
--------------------------------------------------------------------------------
/cmd/gau/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "bufio"
5 | "context"
6 | "io"
7 | "os"
8 | "sync"
9 |
10 | "github.com/lc/gau/v2/pkg/output"
11 | "github.com/lc/gau/v2/runner"
12 | "github.com/lc/gau/v2/runner/flags"
13 | log "github.com/sirupsen/logrus"
14 | )
15 |
16 | func main() {
17 | cfg, err := flags.New().ReadInConfig()
18 | if err != nil {
19 | log.Warnf("error reading config: %v", err)
20 | }
21 |
22 | config, err := cfg.ProviderConfig()
23 | if err != nil {
24 | log.Fatal(err)
25 | }
26 |
27 | gau := new(runner.Runner)
28 |
29 | if err = gau.Init(config, cfg.Providers, cfg.Filters); err != nil {
30 | log.Warn(err)
31 | }
32 |
33 | results := make(chan string)
34 |
35 | out := os.Stdout
36 | // Handle results in background
37 | if config.Output != "" {
38 | out, err = os.OpenFile(config.Output, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644)
39 | if err != nil {
40 | log.Fatalf("Could not open output file: %v\n", err)
41 | }
42 | defer out.Close()
43 | }
44 |
45 | var writeWg sync.WaitGroup
46 | writeWg.Add(1)
47 | go func(out io.Writer, JSON bool) {
48 | defer writeWg.Done()
49 | if JSON {
50 | output.WriteURLsJSON(out, results, config.Blacklist, config.RemoveParameters)
51 | } else if err = output.WriteURLs(out, results, config.Blacklist, config.RemoveParameters); err != nil {
52 | log.Fatalf("error writing results: %v\n", err)
53 | }
54 | }(out, config.JSON)
55 | ctx, cancel := context.WithCancel(context.Background())
56 | defer cancel()
57 | workChan := make(chan runner.Work)
58 | gau.Start(ctx, workChan, results)
59 | domains := flags.Args()
60 | if len(domains) > 0 {
61 | for _, provider := range gau.Providers {
62 | for _, domain := range domains {
63 | workChan <- runner.NewWork(domain, provider)
64 | }
65 | }
66 | } else {
67 | sc := bufio.NewScanner(os.Stdin)
68 | for sc.Scan() {
69 | domain := sc.Text()
70 | for _, provider := range gau.Providers {
71 | workChan <- runner.NewWork(domain, provider)
72 | }
73 | }
74 | if err := sc.Err(); err != nil {
75 | log.Fatal(err)
76 | }
77 | }
78 | close(workChan)
79 |
80 | // wait for providers to fetch URLS
81 | gau.Wait()
82 |
83 | // close results channel
84 | close(results)
85 |
86 | // wait for writer to finish output
87 | writeWg.Wait()
88 | }
89 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/lc/gau/v2
2 |
3 | go 1.20
4 |
5 | require (
6 | github.com/bobesa/go-domain-util v0.0.0-20190911083921-4033b5f7dd89
7 | github.com/deckarep/golang-set/v2 v2.3.0
8 | github.com/json-iterator/go v1.1.12
9 | github.com/lynxsecurity/pflag v1.1.3
10 | github.com/lynxsecurity/viper v1.10.0
11 | github.com/sirupsen/logrus v1.8.1
12 | github.com/valyala/bytebufferpool v1.0.0
13 | github.com/valyala/fasthttp v1.31.0
14 | )
15 |
16 | require (
17 | github.com/andybalholm/brotli v1.0.2 // indirect
18 | github.com/fsnotify/fsnotify v1.5.1 // indirect
19 | github.com/hashicorp/hcl v1.0.0 // indirect
20 | github.com/klauspost/compress v1.13.4 // indirect
21 | github.com/magiconair/properties v1.8.5 // indirect
22 | github.com/mitchellh/mapstructure v1.4.2 // indirect
23 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 // indirect
24 | github.com/modern-go/reflect2 v1.0.2 // indirect
25 | github.com/pelletier/go-toml v1.9.4 // indirect
26 | github.com/spf13/afero v1.6.0 // indirect
27 | github.com/spf13/cast v1.4.1 // indirect
28 | github.com/spf13/jwalterweatherman v1.1.0 // indirect
29 | github.com/subosito/gotenv v1.2.0 // indirect
30 | golang.org/x/net v0.17.0 // indirect
31 | golang.org/x/sys v0.13.0 // indirect
32 | golang.org/x/text v0.13.0 // indirect
33 | gopkg.in/ini.v1 v1.64.0 // indirect
34 | gopkg.in/yaml.v2 v2.4.0 // indirect
35 | )
36 |
37 | retract (
38 | v2.0.7
39 | v2.0.3
40 | v2.0.2
41 | v2.0.1
42 | )
43 |
--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | github.com/andybalholm/brotli v1.0.2 h1:JKnhI/XQ75uFBTiuzXpzFrUriDPiZjlOSzh6wXogP0E=
2 | github.com/andybalholm/brotli v1.0.2/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y=
3 | github.com/bobesa/go-domain-util v0.0.0-20190911083921-4033b5f7dd89 h1:2pkAuIM8OF1fy4ToFpMnI4oE+VeUNRbGrpSLKshK0oQ=
4 | github.com/bobesa/go-domain-util v0.0.0-20190911083921-4033b5f7dd89/go.mod h1:/09nEjna1UMoasyyQDhOrIn8hi2v2kiJglPWed1idck=
5 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
6 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
7 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
8 | github.com/deckarep/golang-set/v2 v2.3.0 h1:qs18EKUfHm2X9fA50Mr/M5hccg2tNnVqsiBImnyDs0g=
9 | github.com/deckarep/golang-set/v2 v2.3.0/go.mod h1:VAky9rY/yGXJOLEDv3OMci+7wtDpOF4IN+y82NBOac4=
10 | github.com/fsnotify/fsnotify v1.5.1 h1:mZcQUHVQUQWoPXXtuf9yuEXKudkV2sx1E06UadKWpgI=
11 | github.com/fsnotify/fsnotify v1.5.1/go.mod h1:T3375wBYaZdLLcVNkcVbzGHY7f1l/uK5T5Ai1i3InKU=
12 | github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
13 | github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
14 | github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
15 | github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
16 | github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
17 | github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
18 | github.com/klauspost/compress v1.13.4 h1:0zhec2I8zGnjWcKyLl6i3gPqKANCCn5e9xmviEEeX6s=
19 | github.com/klauspost/compress v1.13.4/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
20 | github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg=
21 | github.com/lynxsecurity/pflag v1.1.3 h1:/5R9phe4nvKWXbcSWiWUsdWPa7CI+TKR3EyLhvUf0fQ=
22 | github.com/lynxsecurity/pflag v1.1.3/go.mod h1:Yz08toY61CsgZXC/AIHEbfHi45Vcsihen8PYE5vAfs0=
23 | github.com/lynxsecurity/viper v1.10.0 h1:4Y6fXjnid2CkrT2bjcI3nPjBkWpiLf+Z7PLNBQ29N/8=
24 | github.com/lynxsecurity/viper v1.10.0/go.mod h1:JdScMPWhCuBZ5pKBAEs9G2uioQVjsfGbkyIjrMnrIJo=
25 | github.com/magiconair/properties v1.8.5 h1:b6kJs+EmPFMYGkow9GiUyCyOvIwYetYJ3fSaWak/Gls=
26 | github.com/magiconair/properties v1.8.5/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60=
27 | github.com/mitchellh/mapstructure v1.4.2 h1:6h7AQ0yhTcIsmFmnAwQls75jp2Gzs4iB8W7pjMO+rqo=
28 | github.com/mitchellh/mapstructure v1.4.2/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
29 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc=
30 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
31 | github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
32 | github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
33 | github.com/pelletier/go-toml v1.9.4 h1:tjENF6MfZAg8e4ZmZTeWaWiT2vXtsoO6+iuOjFhECwM=
34 | github.com/pelletier/go-toml v1.9.4/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
35 | github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
36 | github.com/pkg/sftp v1.10.1/go.mod h1:lYOWFsE0bwd1+KfKJaKeuokY15vzFx25BLbzYYoAxZI=
37 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
38 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
39 | github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE=
40 | github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
41 | github.com/spf13/afero v1.6.0 h1:xoax2sJ2DT8S8xA2paPFjDCScCNeWsg75VG0DLRreiY=
42 | github.com/spf13/afero v1.6.0/go.mod h1:Ai8FlHk4v/PARR026UzYexafAt9roJ7LcLMAmO6Z93I=
43 | github.com/spf13/cast v1.4.1 h1:s0hze+J0196ZfEMTs80N7UlFt0BDuQ7Q+JDnHiMWKdA=
44 | github.com/spf13/cast v1.4.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
45 | github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk=
46 | github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo=
47 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
48 | github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
49 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
50 | github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
51 | github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
52 | github.com/subosito/gotenv v1.2.0 h1:Slr1R9HxAlEKefgq5jn9U+DnETlIUa6HfgEzj0g5d7s=
53 | github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
54 | github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
55 | github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
56 | github.com/valyala/fasthttp v1.31.0 h1:lrauRLII19afgCs2fnWRJ4M5IkV0lo2FqA61uGkNBfE=
57 | github.com/valyala/fasthttp v1.31.0/go.mod h1:2rsYD01CKFrjjsvFxx75KlEUNpWNBY9JWD3K/7o2Cus=
58 | github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
59 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
60 | golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
61 | golang.org/x/crypto v0.0.0-20210513164829-c07d793c2f9a/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8=
62 | golang.org/x/net v0.0.0-20180811021610-c39426892332/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
63 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
64 | golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
65 | golang.org/x/net v0.0.0-20210510120150-4163338589ed/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
66 | golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
67 | golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
68 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
69 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
70 | golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
71 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
72 | golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
73 | golang.org/x/sys v0.0.0-20210514084401-e8d321eab015/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
74 | golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
75 | golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
76 | golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
77 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
78 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
79 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
80 | golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
81 | golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
82 | golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
83 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
84 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
85 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
86 | gopkg.in/ini.v1 v1.64.0 h1:Mj2zXEXcNb5joEiSA0zc3HZpTst/iyjNiR4CN8tDzOg=
87 | gopkg.in/ini.v1 v1.64.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
88 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
89 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
90 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
91 |
--------------------------------------------------------------------------------
/pkg/httpclient/client.go:
--------------------------------------------------------------------------------
1 | package httpclient
2 |
3 | import (
4 | "errors"
5 | "math/rand"
6 | "time"
7 |
8 | "github.com/valyala/fasthttp"
9 | )
10 |
11 | var (
12 | ErrNilResponse = errors.New("unexpected nil response")
13 | ErrNon200Response = errors.New("API responded with non-200 status code")
14 | ErrBadRequest = errors.New("API responded with 400 status code")
15 | )
16 |
17 | type Header struct {
18 | Key string
19 | Value string
20 | }
21 |
22 | func MakeRequest(c *fasthttp.Client, url string, maxRetries uint, timeout uint, headers ...Header) ([]byte, error) {
23 | var (
24 | req *fasthttp.Request
25 | respBody []byte
26 | err error
27 | )
28 | retries := int(maxRetries)
29 | for i := retries; i >= 0; i-- {
30 | req = fasthttp.AcquireRequest()
31 |
32 | req.Header.SetMethod(fasthttp.MethodGet)
33 | for _, header := range headers {
34 | if header.Key != "" {
35 | req.Header.Set(header.Key, header.Value)
36 | }
37 | }
38 | req.Header.Set(fasthttp.HeaderUserAgent, getUserAgent())
39 | req.Header.Set("Accept", "*/*")
40 | req.SetRequestURI(url)
41 | respBody, err = doReq(c, req, timeout)
42 | if err == nil {
43 | break
44 | }
45 | }
46 | if err != nil {
47 | return nil, err
48 | }
49 | return respBody, nil
50 | }
51 |
52 | // doReq handles http requests
53 | func doReq(c *fasthttp.Client, req *fasthttp.Request, timeout uint) ([]byte, error) {
54 | resp := fasthttp.AcquireResponse()
55 | defer fasthttp.ReleaseResponse(resp)
56 | defer fasthttp.ReleaseRequest(req)
57 | if err := c.DoTimeout(req, resp, time.Second*time.Duration(timeout)); err != nil {
58 | return nil, err
59 | }
60 | if resp.StatusCode() != 200 {
61 | if resp.StatusCode() == 400 {
62 | return nil, ErrBadRequest
63 | }
64 | return nil, ErrNon200Response
65 | }
66 | if resp.Body() == nil {
67 | return nil, ErrNilResponse
68 | }
69 |
70 | return resp.Body(), nil
71 | }
72 |
73 | func getUserAgent() string {
74 | payload := []string{
75 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36",
76 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36",
77 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0",
78 | "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
79 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1 Safari/605.1.15",
80 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36",
81 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0",
82 | "Mozilla/5.0 (iPhone; CPU iPhone OS 8_4_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12H321 Safari/600.1.4",
83 | "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
84 | "Mozilla/5.0 (iPad; CPU OS 7_1_2 like Mac OS X) AppleWebKit/537.51.2 (KHTML, like Gecko) Version/7.0 Mobile/11D257 Safari/9537.53",
85 | "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)",
86 | }
87 |
88 | randomIndex := rand.Intn(len(payload))
89 | pick := payload[randomIndex]
90 |
91 | return pick
92 | }
93 |
--------------------------------------------------------------------------------
/pkg/output/output.go:
--------------------------------------------------------------------------------
1 | package output
2 |
3 | import (
4 | "io"
5 | "net/url"
6 | "path"
7 | "strings"
8 |
9 | mapset "github.com/deckarep/golang-set/v2"
10 | jsoniter "github.com/json-iterator/go"
11 | "github.com/valyala/bytebufferpool"
12 | )
13 |
14 | type JSONResult struct {
15 | Url string `json:"url"`
16 | }
17 |
18 | func WriteURLs(writer io.Writer, results <-chan string, blacklistMap mapset.Set[string], RemoveParameters bool) error {
19 | lastURL := mapset.NewThreadUnsafeSet[string]()
20 | for result := range results {
21 | buf := bytebufferpool.Get()
22 | u, err := url.Parse(result)
23 | if err != nil {
24 | continue
25 | }
26 | if path.Ext(u.Path) != "" && blacklistMap.Contains(strings.ToLower(path.Ext(u.Path))) {
27 | continue
28 | }
29 |
30 | if RemoveParameters && !lastURL.Contains(u.Host+u.Path) {
31 | continue
32 | }
33 | lastURL.Add(u.Host + u.Path)
34 |
35 | buf.B = append(buf.B, []byte(result)...)
36 | buf.B = append(buf.B, "\n"...)
37 | _, err = writer.Write(buf.B)
38 | if err != nil {
39 | return err
40 | }
41 | bytebufferpool.Put(buf)
42 | }
43 | return nil
44 | }
45 |
46 | func WriteURLsJSON(writer io.Writer, results <-chan string, blacklistMap mapset.Set[string], RemoveParameters bool) {
47 | var jr JSONResult
48 | enc := jsoniter.NewEncoder(writer)
49 | for result := range results {
50 | u, err := url.Parse(result)
51 | if err != nil {
52 | continue
53 | }
54 | if blacklistMap.Contains(strings.ToLower(path.Ext(u.Path))) {
55 | continue
56 | }
57 | jr.Url = result
58 | if err := enc.Encode(jr); err != nil {
59 | // todo: handle this error
60 | continue
61 | }
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/pkg/providers/commoncrawl/commoncrawl.go:
--------------------------------------------------------------------------------
1 | package commoncrawl
2 |
3 | import (
4 | "bufio"
5 | "bytes"
6 | "context"
7 | "errors"
8 | "fmt"
9 |
10 | jsoniter "github.com/json-iterator/go"
11 | "github.com/lc/gau/v2/pkg/httpclient"
12 | "github.com/lc/gau/v2/pkg/providers"
13 | "github.com/sirupsen/logrus"
14 | )
15 |
16 | const (
17 | Name = "commoncrawl"
18 | )
19 |
20 | // verify interface compliance
21 | var _ providers.Provider = (*Client)(nil)
22 |
23 | // Client is the structure that holds the Filters and the Client's configuration
24 | type Client struct {
25 | filters providers.Filters
26 | config *providers.Config
27 |
28 | apiURL string
29 | }
30 |
31 | func New(c *providers.Config, filters providers.Filters) (*Client, error) {
32 | // Fetch the list of available CommonCrawl Api URLs.
33 | resp, err := httpclient.MakeRequest(c.Client, "http://index.commoncrawl.org/collinfo.json", c.MaxRetries, c.Timeout)
34 | if err != nil {
35 | return nil, err
36 | }
37 |
38 | var r apiResult
39 | if err = jsoniter.Unmarshal(resp, &r); err != nil {
40 | return nil, err
41 | }
42 |
43 | if len(r) == 0 {
44 | return nil, errors.New("failed to grab latest commoncrawl index")
45 | }
46 |
47 | return &Client{config: c, filters: filters, apiURL: r[0].API}, nil
48 | }
49 |
50 | func (c *Client) Name() string {
51 | return Name
52 | }
53 |
54 | // Fetch fetches all urls for a given domain and sends them to a channel.
55 | // It returns an error should one occur.
56 | func (c *Client) Fetch(ctx context.Context, domain string, results chan string) error {
57 | p, err := c.getPagination(domain)
58 | if err != nil {
59 | return err
60 | }
61 | // 0 pages means no results
62 | if p.Pages == 0 {
63 | logrus.WithFields(logrus.Fields{"provider": Name}).Infof("no results for %s", domain)
64 | return nil
65 | }
66 |
67 | for page := uint(0); page < p.Pages; page++ {
68 | select {
69 | case <-ctx.Done():
70 | return nil
71 | default:
72 | logrus.WithFields(logrus.Fields{"provider": Name, "page": page}).Infof("fetching %s", domain)
73 | apiURL := c.formatURL(domain, page)
74 | resp, err := httpclient.MakeRequest(c.config.Client, apiURL, c.config.MaxRetries, c.config.Timeout)
75 | if err != nil {
76 | return fmt.Errorf("failed to fetch commoncrawl(%d): %s", page, err)
77 | }
78 |
79 | sc := bufio.NewScanner(bytes.NewReader(resp))
80 | for sc.Scan() {
81 | var res apiResponse
82 | if err := jsoniter.Unmarshal(sc.Bytes(), &res); err != nil {
83 | return fmt.Errorf("failed to decode commoncrawl result: %s", err)
84 | }
85 | if res.Error != "" {
86 | return fmt.Errorf("received an error from commoncrawl: %s", res.Error)
87 | }
88 |
89 | results <- res.URL
90 | }
91 | }
92 | }
93 | return nil
94 | }
95 |
96 | func (c *Client) formatURL(domain string, page uint) string {
97 | if c.config.IncludeSubdomains {
98 | domain = "*." + domain
99 | }
100 |
101 | filterParams := c.filters.GetParameters(false)
102 |
103 | return fmt.Sprintf("%s?url=%s/*&output=json&fl=url&page=%d", c.apiURL, domain, page) + filterParams
104 | }
105 |
106 | // Fetch the number of pages.
107 | func (c *Client) getPagination(domain string) (r paginationResult, err error) {
108 | url := fmt.Sprintf("%s&showNumPages=true", c.formatURL(domain, 0))
109 | var resp []byte
110 |
111 | resp, err = httpclient.MakeRequest(c.config.Client, url, c.config.MaxRetries, c.config.Timeout)
112 | if err != nil {
113 | return
114 | }
115 |
116 | err = jsoniter.Unmarshal(resp, &r)
117 | return
118 | }
119 |
--------------------------------------------------------------------------------
/pkg/providers/commoncrawl/types.go:
--------------------------------------------------------------------------------
1 | package commoncrawl
2 |
3 | type apiResponse struct {
4 | URL string `json:"url"`
5 | Error string `json:"error"`
6 | }
7 |
8 | type paginationResult struct {
9 | Blocks uint `json:"blocks"`
10 | PageSize uint `json:"pageSize"`
11 | Pages uint `json:"pages"`
12 | }
13 |
14 | type apiResult []struct {
15 | API string `json:"cdx-api"`
16 | }
17 |
--------------------------------------------------------------------------------
/pkg/providers/filters.go:
--------------------------------------------------------------------------------
1 | package providers
2 |
3 | import "net/url"
4 |
5 | type Filters struct {
6 | From string `mapstructure:"from"`
7 | To string `mapstructure:"to"`
8 | MatchStatusCodes []string `mapstructure:"matchstatuscodes"`
9 | MatchMimeTypes []string `mapstructure:"matchmimetypes"`
10 | FilterStatusCodes []string `mapstructure:"filterstatuscodes"`
11 | FilterMimeTypes []string `mapstructure:"filtermimetypes"`
12 | }
13 |
14 | func (f *Filters) GetParameters(forWayback bool) string {
15 | form := url.Values{}
16 | if f.From != "" {
17 | form.Add("from", f.From)
18 | }
19 |
20 | if f.To != "" {
21 | form.Add("to", f.To)
22 | }
23 |
24 | switch forWayback {
25 | case true:
26 | // generate parameters for wayback
27 | if len(f.MatchMimeTypes) > 0 {
28 | for _, mt := range f.MatchMimeTypes {
29 | form.Add("filter", "mimetype:"+mt)
30 | }
31 | }
32 |
33 | if len(f.MatchStatusCodes) > 0 {
34 | for _, ms := range f.MatchStatusCodes {
35 | form.Add("filter", "statuscode:"+ms)
36 | }
37 | }
38 |
39 | if len(f.FilterStatusCodes) > 0 {
40 | for _, sc := range f.FilterStatusCodes {
41 | form.Add("filter", "!statuscode:"+sc)
42 | }
43 | }
44 |
45 | if len(f.FilterMimeTypes) > 0 {
46 | for _, mt := range f.FilterMimeTypes {
47 | form.Add("filter", "!mimetype:"+mt)
48 | }
49 | }
50 | default:
51 | // generate parameters for commoncrawl
52 | if len(f.MatchStatusCodes) > 0 {
53 | for _, ms := range f.MatchStatusCodes {
54 | form.Add("filter", "status:"+ms)
55 | }
56 | }
57 |
58 | if len(f.MatchMimeTypes) > 0 {
59 | for _, mt := range f.MatchMimeTypes {
60 | form.Add("filter", "mime:"+mt)
61 | }
62 | }
63 |
64 | if len(f.FilterStatusCodes) > 0 {
65 | for _, ms := range f.FilterStatusCodes {
66 | form.Add("filter", "!=status:"+ms)
67 | }
68 | }
69 |
70 | if len(f.FilterMimeTypes) > 0 {
71 | for _, fs := range f.FilterMimeTypes {
72 | form.Add("filter", "!=mime:"+fs)
73 | }
74 | }
75 |
76 | }
77 |
78 | params := form.Encode()
79 | if params != "" {
80 | return "&" + params
81 | }
82 |
83 | return params
84 | }
85 |
--------------------------------------------------------------------------------
/pkg/providers/otx/otx.go:
--------------------------------------------------------------------------------
1 | package otx
2 |
3 | import (
4 | "context"
5 | "fmt"
6 |
7 | "github.com/bobesa/go-domain-util/domainutil"
8 | jsoniter "github.com/json-iterator/go"
9 | "github.com/lc/gau/v2/pkg/httpclient"
10 | "github.com/lc/gau/v2/pkg/providers"
11 | "github.com/sirupsen/logrus"
12 | )
13 |
14 | const (
15 | Name = "otx"
16 | )
17 |
18 | type Client struct {
19 | config *providers.Config
20 | }
21 |
22 | var _ providers.Provider = (*Client)(nil)
23 |
24 | func New(c *providers.Config) *Client {
25 | if c.OTX != "" {
26 | setBaseURL(c.OTX)
27 | }
28 | return &Client{config: c}
29 | }
30 |
31 | type otxResult struct {
32 | HasNext bool `json:"has_next"`
33 | ActualSize int `json:"actual_size"`
34 | URLList []struct {
35 | Domain string `json:"domain"`
36 | URL string `json:"url"`
37 | Hostname string `json:"hostname"`
38 | HTTPCode int `json:"httpcode"`
39 | PageNum int `json:"page_num"`
40 | FullSize int `json:"full_size"`
41 | Paged bool `json:"paged"`
42 | } `json:"url_list"`
43 | }
44 |
45 | func (c *Client) Name() string {
46 | return Name
47 | }
48 |
49 | func (c *Client) Fetch(ctx context.Context, domain string, results chan string) error {
50 | for page := uint(1); ; page++ {
51 | select {
52 | case <-ctx.Done():
53 | return nil
54 | default:
55 | logrus.WithFields(logrus.Fields{"provider": Name, "page": page - 1}).Infof("fetching %s", domain)
56 | apiURL := c.formatURL(domain, page)
57 | resp, err := httpclient.MakeRequest(c.config.Client, apiURL, c.config.MaxRetries, c.config.Timeout)
58 | if err != nil {
59 | return fmt.Errorf("failed to fetch alienvault(%d): %s", page, err)
60 | }
61 | var result otxResult
62 | if err := jsoniter.Unmarshal(resp, &result); err != nil {
63 | return fmt.Errorf("failed to decode otx results for page %d: %s", page, err)
64 | }
65 |
66 | for _, entry := range result.URLList {
67 | results <- entry.URL
68 | }
69 |
70 | if !result.HasNext {
71 | return nil
72 | }
73 | }
74 | }
75 | }
76 |
77 | func (c *Client) formatURL(domain string, page uint) string {
78 | category := "hostname"
79 | if !domainutil.HasSubdomain(domain) {
80 | category = "domain"
81 | }
82 | if domainutil.HasSubdomain(domain) && c.config.IncludeSubdomains {
83 | domain = domainutil.Domain(domain)
84 | category = "domain"
85 | }
86 |
87 | return fmt.Sprintf("%sapi/v1/indicators/%s/%s/url_list?limit=100&page=%d", _BaseURL, category, domain, page)
88 | }
89 |
90 | var _BaseURL = "https://otx.alienvault.com/"
91 |
92 | func setBaseURL(baseURL string) {
93 | _BaseURL = baseURL
94 | }
95 |
--------------------------------------------------------------------------------
/pkg/providers/providers.go:
--------------------------------------------------------------------------------
1 | package providers
2 |
3 | import (
4 | "context"
5 |
6 | mapset "github.com/deckarep/golang-set/v2"
7 | "github.com/valyala/fasthttp"
8 | )
9 |
10 | const Version = `2.2.4`
11 |
12 | // Provider is a generic interface for all archive fetchers
13 | type Provider interface {
14 | Fetch(ctx context.Context, domain string, results chan string) error
15 | Name() string
16 | }
17 |
18 | type URLScan struct {
19 | Host string
20 | APIKey string
21 | }
22 |
23 | type Config struct {
24 | Threads uint
25 | Timeout uint
26 | MaxRetries uint
27 | IncludeSubdomains bool
28 | RemoveParameters bool
29 | Client *fasthttp.Client
30 | Providers []string
31 | Blacklist mapset.Set[string]
32 | Output string
33 | JSON bool
34 | URLScan URLScan
35 | OTX string
36 | }
37 |
--------------------------------------------------------------------------------
/pkg/providers/urlscan/types.go:
--------------------------------------------------------------------------------
1 | package urlscan
2 |
3 | import (
4 | "strings"
5 | )
6 |
7 | var _BaseURL = "https://urlscan.io/"
8 |
9 | type apiResponse struct {
10 | Status int `json:"status"`
11 | Results []searchResult `json:"results"`
12 | HasMore bool `json:"has_more"`
13 | }
14 |
15 | type searchResult struct {
16 | Page archivedPage
17 | Sort []interface{} `json:"sort"`
18 | }
19 |
20 | type archivedPage struct {
21 | Domain string `json:"domain"`
22 | MimeType string `json:"mimeType"`
23 | URL string `json:"url"`
24 | Status string `json:"status"`
25 | }
26 |
27 | func parseSort(sort []interface{}) string {
28 | var sortParam []string
29 | for _, t := range sort {
30 | switch t.(type) {
31 | case string:
32 | sortParam = append(sortParam, t.(string))
33 | }
34 | }
35 | return strings.Join(sortParam, ",")
36 | }
37 |
--------------------------------------------------------------------------------
/pkg/providers/urlscan/urlscan.go:
--------------------------------------------------------------------------------
1 | package urlscan
2 |
3 | import (
4 | "bytes"
5 | "context"
6 | "fmt"
7 | "strings"
8 |
9 | jsoniter "github.com/json-iterator/go"
10 | "github.com/lc/gau/v2/pkg/httpclient"
11 | "github.com/lc/gau/v2/pkg/providers"
12 | "github.com/sirupsen/logrus"
13 | )
14 |
15 | const (
16 | Name = "urlscan"
17 | )
18 |
19 | type Client struct {
20 | config *providers.Config
21 | }
22 |
23 | func New(c *providers.Config) *Client {
24 | if c.URLScan.Host != "" {
25 | setBaseURL(c.URLScan.Host)
26 | }
27 |
28 | return &Client{config: c}
29 | }
30 |
31 | func (c *Client) Name() string {
32 | return Name
33 | }
34 |
35 | func (c *Client) Fetch(ctx context.Context, domain string, results chan string) error {
36 | var searchAfter string
37 | var header httpclient.Header
38 |
39 | if c.config.URLScan.APIKey != "" {
40 | header.Key = "API-Key"
41 | header.Value = c.config.URLScan.APIKey
42 | }
43 |
44 | for page := uint(0); ; page++ {
45 | select {
46 | case <-ctx.Done():
47 | return nil
48 | default:
49 | logrus.WithFields(logrus.Fields{"provider": Name, "page": page}).Infof("fetching %s", domain)
50 | apiURL := c.formatURL(domain, searchAfter)
51 | resp, err := httpclient.MakeRequest(c.config.Client, apiURL, c.config.MaxRetries, c.config.Timeout, header)
52 | if err != nil {
53 | return fmt.Errorf("failed to fetch urlscan: %s", err)
54 | }
55 | var result apiResponse
56 | decoder := jsoniter.NewDecoder(bytes.NewReader(resp))
57 | decoder.UseNumber()
58 | if err = decoder.Decode(&result); err != nil {
59 | return fmt.Errorf("failed to decode urlscan result: %s", err)
60 | }
61 | // rate limited
62 | if result.Status == 429 {
63 | logrus.WithField("provider", "urlscan").Warnf("urlscan responded with 429, probably being rate limited")
64 | return nil
65 | }
66 |
67 | total := len(result.Results)
68 | for i, res := range result.Results {
69 | if res.Page.Domain == domain || (c.config.IncludeSubdomains && strings.HasSuffix(res.Page.Domain, domain)) {
70 | results <- res.Page.URL
71 | }
72 |
73 | if i == total-1 {
74 | sortParam := parseSort(res.Sort)
75 | if sortParam == "" {
76 | return nil
77 | }
78 | searchAfter = sortParam
79 | }
80 | }
81 |
82 | if !result.HasMore {
83 | return nil
84 | }
85 | }
86 | }
87 | }
88 |
89 | func (c *Client) formatURL(domain string, after string) string {
90 | if after != "" {
91 | after = "&search_after=" + after
92 | }
93 |
94 | return fmt.Sprintf(_BaseURL+"api/v1/search/?q=domain:%s&size=100", domain) + after
95 | }
96 |
97 | func setBaseURL(baseURL string) {
98 | _BaseURL = baseURL
99 | }
100 |
--------------------------------------------------------------------------------
/pkg/providers/wayback/wayback.go:
--------------------------------------------------------------------------------
1 | package wayback
2 |
3 | import (
4 | "context"
5 | "errors"
6 | "fmt"
7 |
8 | jsoniter "github.com/json-iterator/go"
9 | "github.com/lc/gau/v2/pkg/httpclient"
10 | "github.com/lc/gau/v2/pkg/providers"
11 | "github.com/sirupsen/logrus"
12 | )
13 |
14 | const (
15 | Name = "wayback"
16 | )
17 |
18 | // verify interface compliance
19 | var _ providers.Provider = (*Client)(nil)
20 |
21 | // Client is the structure that holds the WaybackFilters and the Client's configuration
22 | type Client struct {
23 | filters providers.Filters
24 | config *providers.Config
25 | }
26 |
27 | func New(config *providers.Config, filters providers.Filters) *Client {
28 | return &Client{filters, config}
29 | }
30 |
31 | func (c *Client) Name() string {
32 | return Name
33 | }
34 |
35 | // waybackResult holds the response from the wayback API
36 | type waybackResult [][]string
37 |
38 | // Fetch fetches all urls for a given domain and sends them to a channel.
39 | // It returns an error should one occur.
40 | func (c *Client) Fetch(ctx context.Context, domain string, results chan string) error {
41 | for page := uint(0); ; page++ {
42 | select {
43 | case <-ctx.Done():
44 | return nil
45 | default:
46 | logrus.WithFields(logrus.Fields{"provider": Name, "page": page}).Infof("fetching %s", domain)
47 | apiURL := c.formatURL(domain, page)
48 | // make HTTP request
49 | resp, err := httpclient.MakeRequest(c.config.Client, apiURL, c.config.MaxRetries, c.config.Timeout)
50 | if err != nil {
51 | if errors.Is(err, httpclient.ErrBadRequest) {
52 | return nil
53 | }
54 | return fmt.Errorf("failed to fetch wayback results page %d: %s", page, err)
55 | }
56 | var result waybackResult
57 | if err = jsoniter.Unmarshal(resp, &result); err != nil {
58 | return fmt.Errorf("failed to decode wayback results for page %d: %s", page, err)
59 | }
60 |
61 | // check if there's results, wayback's pagination response
62 | // is not always correct when using a filter
63 | if len(result) == 0 {
64 | break
65 | }
66 |
67 | // output results
68 | // Slicing as [1:] to skip first result by default
69 | for _, entry := range result[1:] {
70 | results <- entry[0]
71 | }
72 | }
73 | }
74 | }
75 |
76 | // formatUrl returns a formatted URL for the Wayback API
77 | func (c *Client) formatURL(domain string, page uint) string {
78 | if c.config.IncludeSubdomains {
79 | domain = "*." + domain
80 | }
81 | filterParams := c.filters.GetParameters(true)
82 | return fmt.Sprintf(
83 | "https://web.archive.org/cdx/search/cdx?url=%s/*&output=json&collapse=urlkey&fl=original&pageSize=100&page=%d",
84 | domain, page,
85 | ) + filterParams
86 | }
87 |
--------------------------------------------------------------------------------
/runner/flags/flags.go:
--------------------------------------------------------------------------------
1 | package flags
2 |
3 | import (
4 | "crypto/tls"
5 | "errors"
6 | "flag"
7 | "fmt"
8 | "net/url"
9 | "os"
10 | "path/filepath"
11 | "strings"
12 | "time"
13 |
14 | mapset "github.com/deckarep/golang-set/v2"
15 | "github.com/lc/gau/v2/pkg/providers"
16 | "github.com/lynxsecurity/pflag"
17 | "github.com/lynxsecurity/viper"
18 | log "github.com/sirupsen/logrus"
19 | "github.com/valyala/fasthttp"
20 | "github.com/valyala/fasthttp/fasthttpproxy"
21 | )
22 |
23 | type URLScanConfig struct {
24 | Host string `mapstructure:"host"`
25 | APIKey string `mapstructure:"apikey"`
26 | }
27 |
28 | type Config struct {
29 | Filters providers.Filters `mapstructure:"filters"`
30 | Proxy string `mapstructure:"proxy"`
31 | Threads uint `mapstructure:"threads"`
32 | Timeout uint `mapstructure:"timeout"`
33 | Verbose bool `mapstructure:"verbose"`
34 | MaxRetries uint `mapstructure:"retries"`
35 | IncludeSubdomains bool `mapstructure:"subdomains"`
36 | RemoveParameters bool `mapstructure:"parameters"`
37 | Providers []string `mapstructure:"providers"`
38 | Blacklist []string `mapstructure:"blacklist"`
39 | JSON bool `mapstructure:"json"`
40 | URLScan URLScanConfig `mapstructure:"urlscan"`
41 | OTX string `mapstructure:"otx"`
42 | Outfile string // output file to write to
43 | }
44 |
45 | func (c *Config) ProviderConfig() (*providers.Config, error) {
46 | var dialer fasthttp.DialFunc
47 |
48 | if c.Proxy != "" {
49 | parse, err := url.Parse(c.Proxy)
50 | if err != nil {
51 | return nil, fmt.Errorf("proxy url: %v", err)
52 | }
53 | switch parse.Scheme {
54 | case "http":
55 | dialer = fasthttpproxy.FasthttpHTTPDialer(strings.ReplaceAll(c.Proxy, "http://", ""))
56 | case "socks5":
57 | dialer = fasthttpproxy.FasthttpSocksDialer(c.Proxy)
58 | default:
59 | return nil, fmt.Errorf("unsupported proxy scheme: %s", parse.Scheme)
60 | }
61 | }
62 |
63 | pc := &providers.Config{
64 | Threads: c.Threads,
65 | Timeout: c.Timeout,
66 | MaxRetries: c.MaxRetries,
67 | IncludeSubdomains: c.IncludeSubdomains,
68 | RemoveParameters: c.RemoveParameters,
69 | Client: &fasthttp.Client{
70 | TLSConfig: &tls.Config{
71 | InsecureSkipVerify: true,
72 | },
73 | Dial: dialer,
74 | },
75 | Providers: c.Providers,
76 | Output: c.Outfile,
77 | JSON: c.JSON,
78 | URLScan: providers.URLScan{
79 | Host: c.URLScan.Host,
80 | APIKey: c.URLScan.APIKey,
81 | },
82 | OTX: c.OTX,
83 | }
84 |
85 | log.SetLevel(log.ErrorLevel)
86 | if c.Verbose {
87 | log.SetLevel(log.InfoLevel)
88 | }
89 | pc.Blacklist = mapset.NewThreadUnsafeSet(c.Blacklist...)
90 | pc.Blacklist.Add("")
91 | return pc, nil
92 | }
93 |
94 | type Options struct {
95 | viper *viper.Viper
96 | }
97 |
98 | func New() *Options {
99 | v := viper.New()
100 |
101 | pflag.String("o", "", "filename to write results to")
102 | pflag.String("config", "", "location of config file (default $HOME/.gau.toml or %USERPROFILE%\\.gau.toml)")
103 | pflag.Uint("threads", 1, "number of workers to spawn")
104 | pflag.Uint("timeout", 45, "timeout (in seconds) for HTTP client")
105 | pflag.Uint("retries", 0, "retries for HTTP client")
106 | pflag.String("proxy", "", "http proxy to use")
107 | pflag.StringSlice("blacklist", []string{}, "list of extensions to skip")
108 | pflag.StringSlice("providers", []string{}, "list of providers to use (wayback,commoncrawl,otx,urlscan)")
109 | pflag.Bool("subs", false, "include subdomains of target domain")
110 | pflag.Bool("fp", false, "remove different parameters of the same endpoint")
111 | pflag.Bool("verbose", false, "show verbose output")
112 | pflag.Bool("json", false, "output as json")
113 |
114 | // filter flags
115 | pflag.StringSlice("mc", []string{}, "list of status codes to match")
116 | pflag.StringSlice("fc", []string{}, "list of status codes to filter")
117 | pflag.StringSlice("mt", []string{}, "list of mime-types to match")
118 | pflag.StringSlice("ft", []string{}, "list of mime-types to filter")
119 | pflag.String("from", "", "fetch urls from date (format: YYYYMM)")
120 | pflag.String("to", "", "fetch urls to date (format: YYYYMM)")
121 | pflag.Bool("version", false, "show gau version")
122 |
123 | pflag.CommandLine.AddGoFlagSet(flag.CommandLine)
124 | pflag.Parse()
125 |
126 | if err := v.BindPFlags(pflag.CommandLine); err != nil {
127 | log.Fatal(err)
128 | }
129 |
130 | return &Options{viper: v}
131 | }
132 |
133 | func Args() []string {
134 | return pflag.Args()
135 | }
136 |
137 | func (o *Options) ReadInConfig() (*Config, error) {
138 | confFile := o.viper.GetString("config")
139 |
140 | if confFile == "" {
141 | home, err := os.UserHomeDir()
142 | if err != nil {
143 | return o.DefaultConfig(), err
144 | }
145 |
146 | confFile = filepath.Join(home, ".gau.toml")
147 | }
148 |
149 | return o.ReadConfigFile(confFile)
150 | }
151 |
152 | func (o *Options) ReadConfigFile(name string) (*Config, error) {
153 | if _, err := os.Stat(name); errors.Is(err, os.ErrNotExist) {
154 | return o.DefaultConfig(), fmt.Errorf("Config file %s not found, using default config", name)
155 | }
156 |
157 | o.viper.SetConfigFile(name)
158 |
159 | if err := o.viper.ReadInConfig(); err != nil {
160 | return o.DefaultConfig(), err
161 | }
162 |
163 | var c Config
164 |
165 | if err := o.viper.Unmarshal(&c); err != nil {
166 | return o.DefaultConfig(), err
167 | }
168 |
169 | o.getFlagValues(&c)
170 |
171 | return &c, nil
172 | }
173 |
174 | func (o *Options) DefaultConfig() *Config {
175 | c := &Config{
176 | Filters: providers.Filters{},
177 | Proxy: "",
178 | Timeout: 45,
179 | Threads: 1,
180 | Verbose: false,
181 | MaxRetries: 5,
182 | IncludeSubdomains: false,
183 | RemoveParameters: false,
184 | Providers: []string{"wayback", "commoncrawl", "otx", "urlscan"},
185 | Blacklist: []string{},
186 | JSON: false,
187 | Outfile: "",
188 | }
189 |
190 | o.getFlagValues(c)
191 |
192 | return c
193 | }
194 |
195 | func (o *Options) getFlagValues(c *Config) {
196 | version := o.viper.GetBool("version")
197 | verbose := o.viper.GetBool("verbose")
198 | json := o.viper.GetBool("json")
199 | retries := o.viper.GetUint("retries")
200 | proxy := o.viper.GetString("proxy")
201 | outfile := o.viper.GetString("o")
202 | fetchers := o.viper.GetStringSlice("providers")
203 | threads := o.viper.GetUint("threads")
204 | blacklist := o.viper.GetStringSlice("blacklist")
205 | subs := o.viper.GetBool("subs")
206 | fp := o.viper.GetBool("fp")
207 |
208 | if version {
209 | fmt.Printf("gau version: %s\n", providers.Version)
210 | os.Exit(0)
211 | }
212 |
213 | if proxy != "" {
214 | c.Proxy = proxy
215 | }
216 |
217 | if outfile != "" {
218 | c.Outfile = outfile
219 | }
220 | // set if --threads flag is set, otherwise use default
221 | if threads > 1 {
222 | c.Threads = threads
223 | }
224 |
225 | // set if --blacklist flag is specified, otherwise use default
226 | if len(blacklist) > 0 {
227 | c.Blacklist = blacklist
228 | }
229 |
230 | // set if --providers flag is specified, otherwise use default
231 | if len(fetchers) > 0 {
232 | c.Providers = fetchers
233 | }
234 |
235 | if retries > 0 {
236 | c.MaxRetries = retries
237 | }
238 |
239 | if subs {
240 | c.IncludeSubdomains = subs
241 | }
242 |
243 | if fp {
244 | c.RemoveParameters = fp
245 | }
246 |
247 | c.JSON = json
248 | c.Verbose = verbose
249 |
250 | // get filter flags
251 | mc := o.viper.GetStringSlice("mc")
252 | fc := o.viper.GetStringSlice("fc")
253 | mt := o.viper.GetStringSlice("mt")
254 | ft := o.viper.GetStringSlice("ft")
255 | from := o.viper.GetString("from")
256 | to := o.viper.GetString("to")
257 |
258 | var seenFilterFlag bool
259 |
260 | var filters providers.Filters
261 | if len(mc) > 0 {
262 | seenFilterFlag = true
263 | filters.MatchStatusCodes = mc
264 | }
265 |
266 | if len(fc) > 0 {
267 | seenFilterFlag = true
268 | filters.FilterStatusCodes = fc
269 | }
270 |
271 | if len(mt) > 0 {
272 | seenFilterFlag = true
273 | filters.MatchMimeTypes = mt
274 | }
275 |
276 | if len(ft) > 0 {
277 | seenFilterFlag = true
278 | filters.FilterMimeTypes = ft
279 | }
280 |
281 | if from != "" {
282 | seenFilterFlag = true
283 | if _, err := time.Parse("200601", from); err == nil {
284 | filters.From = from
285 | }
286 | }
287 |
288 | if to != "" {
289 | seenFilterFlag = true
290 | if _, err := time.Parse("200601", to); err == nil {
291 | filters.To = to
292 | }
293 | }
294 |
295 | if seenFilterFlag {
296 | c.Filters = filters
297 | }
298 | }
299 |
--------------------------------------------------------------------------------
/runner/runner.go:
--------------------------------------------------------------------------------
1 | package runner
2 |
3 | import (
4 | "context"
5 | "fmt"
6 | "sync"
7 |
8 | "github.com/lc/gau/v2/pkg/providers"
9 | "github.com/lc/gau/v2/pkg/providers/commoncrawl"
10 | "github.com/lc/gau/v2/pkg/providers/otx"
11 | "github.com/lc/gau/v2/pkg/providers/urlscan"
12 | "github.com/lc/gau/v2/pkg/providers/wayback"
13 | "github.com/sirupsen/logrus"
14 | )
15 |
16 | type Runner struct {
17 | sync.WaitGroup
18 |
19 | Providers []providers.Provider
20 | threads uint
21 | ctx context.Context
22 | }
23 |
24 | // Init initializes the runner
25 | func (r *Runner) Init(c *providers.Config, providers []string, filters providers.Filters) error {
26 | r.threads = c.Threads
27 | for _, name := range providers {
28 | switch name {
29 | case "urlscan":
30 | r.Providers = append(r.Providers, urlscan.New(c))
31 | case "otx":
32 | r.Providers = append(r.Providers, otx.New(c))
33 | case "wayback":
34 | r.Providers = append(r.Providers, wayback.New(c, filters))
35 | case "commoncrawl":
36 | cc, err := commoncrawl.New(c, filters)
37 | if err != nil {
38 | return fmt.Errorf("error instantiating commoncrawl: %v\n", err)
39 | }
40 | r.Providers = append(r.Providers, cc)
41 | }
42 | }
43 |
44 | return nil
45 | }
46 |
47 | // Starts starts the worker
48 | func (r *Runner) Start(ctx context.Context, workChan chan Work, results chan string) {
49 | for i := uint(0); i < r.threads; i++ {
50 | r.Add(1)
51 | go func() {
52 | defer r.Done()
53 | r.worker(ctx, workChan, results)
54 | }()
55 | }
56 | }
57 |
58 | type Work struct {
59 | domain string
60 | provider providers.Provider
61 | }
62 |
63 | func NewWork(domain string, provider providers.Provider) Work {
64 | return Work{domain, provider}
65 | }
66 |
67 | func (w *Work) Do(ctx context.Context, results chan string) error {
68 | return w.provider.Fetch(ctx, w.domain, results)
69 | }
70 |
71 | // worker checks to see if the context is finished and executes the fetching process for each provider
72 | func (r *Runner) worker(ctx context.Context, workChan chan Work, results chan string) {
73 | for {
74 | select {
75 | case <-ctx.Done():
76 | return
77 | case work, ok := <-workChan:
78 | if !ok {
79 | return
80 | }
81 | if err := work.Do(ctx, results); err != nil {
82 | logrus.WithField("provider", work.provider.Name()).Warnf("%s - %v", work.domain, err)
83 | }
84 | }
85 | }
86 | }
87 |
--------------------------------------------------------------------------------