├── .gau.toml ├── .github ├── FUNDING.yml └── workflows │ ├── cicd-to-dockerhub.yml │ └── release_build.yml ├── .gitignore ├── .goreleaser.yml ├── CONTRIBUTORS.md ├── Dockerfile ├── LICENSE ├── README.md ├── cmd └── gau │ └── main.go ├── go.mod ├── go.sum ├── pkg ├── httpclient │ └── client.go ├── output │ └── output.go └── providers │ ├── commoncrawl │ ├── commoncrawl.go │ └── types.go │ ├── filters.go │ ├── otx │ └── otx.go │ ├── providers.go │ ├── urlscan │ ├── types.go │ └── urlscan.go │ └── wayback │ └── wayback.go └── runner ├── flags └── flags.go └── runner.go /.gau.toml: -------------------------------------------------------------------------------- 1 | threads = 2 2 | verbose = false 3 | retries = 15 4 | subdomains = false 5 | parameters = false 6 | providers = ["wayback","commoncrawl","otx","urlscan"] 7 | blacklist = ["ttf","woff","svg","png","jpg"] 8 | json = false 9 | 10 | [urlscan] 11 | apikey = "" 12 | 13 | [filters] 14 | from = "" 15 | to = "" 16 | matchstatuscodes = [] 17 | matchmimetypes = [] 18 | filterstatuscodes = [] 19 | filtermimetypes = ["image/png", "image/jpg", "image/svg+xml"] 20 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: lc 2 | -------------------------------------------------------------------------------- /.github/workflows/cicd-to-dockerhub.yml: -------------------------------------------------------------------------------- 1 | name: cicd-to-dockerhub 2 | 3 | on: 4 | push: 5 | tags: 6 | - "*" 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout repository 13 | uses: actions/checkout@v2 14 | 15 | - name: Login to Docker Hub 16 | uses: docker/login-action@v1 17 | with: 18 | username: ${{ secrets.DOCKER_HUB_USERNAME }} 19 | password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }} 20 | 21 | - name: Set up Docker Buildx 22 | id: buildx 23 | uses: docker/setup-buildx-action@v1 24 | 25 | - name: Build and push 26 | id: docker_build 27 | uses: docker/build-push-action@v2 28 | with: 29 | context: ./ 30 | file: ./Dockerfile 31 | push: true 32 | tags: ${{ secrets.DOCKER_HUB_USERNAME }}/gau:latest 33 | 34 | - name: Image digest 35 | run: echo ${{ steps.docker_build.outputs.digest }} 36 | -------------------------------------------------------------------------------- /.github/workflows/release_build.yml: -------------------------------------------------------------------------------- 1 | name: Release gau 2 | 3 | on: 4 | push: 5 | tags: 6 | - "*" 7 | 8 | jobs: 9 | build: 10 | name: GoReleaser build 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - name: Check out code into the Go module directory 15 | uses: actions/checkout@v2 16 | with: 17 | fetch-depth: 0 # See: https://goreleaser.com/ci/actions/ 18 | 19 | - name: Set up Go 1.23.2 20 | uses: actions/setup-go@v2 21 | with: 22 | go-version: 1.23.2 23 | id: go 24 | 25 | - name: Import GPG key 26 | id: import_gpg 27 | uses: crazy-max/ghaction-import-gpg@v4 28 | with: 29 | gpg_private_key: ${{ secrets.GPG_PRIVATE_KEY }} 30 | passphrase: ${{ secrets.GPG_PASSPHRASE }} 31 | 32 | - name: Run GoReleaser 33 | uses: goreleaser/goreleaser-action@master 34 | with: 35 | version: latest 36 | args: release --clean 37 | env: 38 | GITHUB_TOKEN: ${{ secrets.GO_RELEASER_GITHUB_TOKEN }} 39 | GPG_FINGERPRINT: ${{ steps.import_gpg.outputs.fingerprint }} 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Go template 3 | # Binaries for programs and plugins 4 | *.exe 5 | *.exe~ 6 | *.dll 7 | *.so 8 | *.dylib 9 | 10 | # Test binary, built with `go test -c` 11 | *.test 12 | 13 | # Output of the go coverage tool, specifically when used with LiteIDE 14 | *.out 15 | 16 | # Dependency directories (remove the comment below to include it) 17 | # vendor/ 18 | .DS_Store 19 | .idea 20 | dist 21 | -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | before: 3 | hooks: 4 | - go mod download 5 | builds: 6 | - binary: gau 7 | goos: 8 | - linux 9 | - windows 10 | - darwin 11 | goarch: 12 | - amd64 13 | - 386 14 | - arm64 15 | ignore: 16 | - goos: darwin 17 | goarch: 386 18 | - goos: windows 19 | goarch: 'arm64' 20 | main: ./cmd/gau/ 21 | archives: 22 | - id: tgz 23 | format: tar.gz 24 | format_overrides: 25 | - goos: windows 26 | format: zip 27 | 28 | signs: 29 | - artifacts: checksum 30 | args: [ "--batch", "-u", "{{ .Env.GPG_FINGERPRINT }}", "--output", "${signature}", "--detach-sign", "${artifact}" ] 31 | -------------------------------------------------------------------------------- /CONTRIBUTORS.md: -------------------------------------------------------------------------------- 1 | # Contributors 2 | * [lc](https://github.com/lc) 3 | * [shellbear](https://github.com/shellbear) 4 | 5 | 6 | Thanks to [tomnomnom](https://github.com/tomnomnom) for waybackurls! 7 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Build image: golang:1.21.0-alpine3.17 2 | FROM golang:1.21.0-alpine3.17 as build 3 | 4 | WORKDIR /app 5 | 6 | COPY . . 7 | RUN go mod download && go build -o ./build/gau ./cmd/gau 8 | 9 | ENTRYPOINT ["/app/gau/build/gau"] 10 | 11 | # Release image: alpine:3.17 12 | FROM alpine:3.17 13 | 14 | RUN apk -U upgrade --no-cache 15 | COPY --from=build /app/build/gau /usr/local/bin/gau 16 | 17 | RUN adduser \ 18 | --gecos "" \ 19 | --disabled-password \ 20 | gau 21 | 22 | USER gau 23 | ENTRYPOINT ["gau"] 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Corben Leo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # getallurls (gau) 2 | [![License](https://img.shields.io/badge/license-MIT-_red.svg)](https://opensource.org/licenses/MIT) 3 | 4 | getallurls (gau) fetches known URLs from AlienVault's [Open Threat Exchange](https://otx.alienvault.com), the Wayback Machine, Common Crawl, and URLScan for any given domain. Inspired by Tomnomnom's [waybackurls](https://github.com/tomnomnom/waybackurls). 5 | 6 | # Resources 7 | - [Usage](#usage) 8 | - [Installation](#installation) 9 | - [ohmyzsh note](#ohmyzsh-note) 10 | 11 | ## Usage: 12 | Examples: 13 | 14 | ```bash 15 | $ printf example.com | gau 16 | $ cat domains.txt | gau --threads 5 17 | $ gau example.com google.com 18 | $ gau --o example-urls.txt example.com 19 | $ gau --blacklist png,jpg,gif example.com 20 | ``` 21 | 22 | To display the help for the tool use the `-h` flag: 23 | 24 | ```bash 25 | $ gau -h 26 | ``` 27 | 28 | | Flag | Description | Example | 29 | |------|-------------|---------| 30 | |`--blacklist`| list of extensions to skip | gau --blacklist ttf,woff,svg,png| 31 | |`--config` | Use alternate configuration file (default `$HOME/config.toml` or `%USERPROFILE%\.gau.toml`) | gau --config $HOME/.config/gau.toml| 32 | |`--fc`| list of status codes to filter | gau --fc 404,302 | 33 | |`--from`| fetch urls from date (format: YYYYMM) | gau --from 202101 | 34 | |`--ft`| list of mime-types to filter | gau --ft text/plain| 35 | |`--fp`| remove different parameters of the same endpoint | gau --fp| 36 | |`--json`| output as json | gau --json | 37 | |`--mc`| list of status codes to match | gau --mc 200,500 | 38 | |`--mt`| list of mime-types to match |gau --mt text/html,application/json| 39 | |`--o`| filename to write results to | gau --o out.txt | 40 | |`--providers`| list of providers to use (wayback,commoncrawl,otx,urlscan) | gau --providers wayback| 41 | |`--proxy`| http proxy to use (socks5:// or http:// | gau --proxy http://proxy.example.com:8080 | 42 | |`--retries`| retries for HTTP client | gau --retries 10 | 43 | |`--timeout`| timeout (in seconds) for HTTP client | gau --timeout 60 | 44 | |`--subs`| include subdomains of target domain | gau example.com --subs | 45 | |`--threads`| number of workers to spawn | gau example.com --threads | 46 | |`--to`| fetch urls to date (format: YYYYMM) | gau example.com --to 202101 | 47 | |`--verbose`| show verbose output | gau --verbose example.com | 48 | |`--version`| show gau version | gau --version| 49 | 50 | 51 | ## Configuration Files 52 | gau automatically looks for a configuration file at `$HOME/.gau.toml` or`%USERPROFILE%\.gau.toml`. You can point to a different configuration file using the `--config` flag. **If the configuration file is not found, gau will still run with a default configuration, but will output a message to stderr**. 53 | 54 | You can specify options and they will be used for every subsequent run of gau. Any options provided via command line flags will override options set in the configuration file. 55 | 56 | An example configuration file can be found [here](https://github.com/lc/gau/blob/master/.gau.toml) 57 | 58 | ## Installation: 59 | ### From source: 60 | ``` 61 | $ go install github.com/lc/gau/v2/cmd/gau@latest 62 | ``` 63 | ### From github : 64 | ``` 65 | git clone https://github.com/lc/gau.git; \ 66 | cd gau/cmd; \ 67 | go build; \ 68 | sudo mv gau /usr/local/bin/; \ 69 | gau --version; 70 | ``` 71 | ### From binary: 72 | You can download the pre-built binaries from the [releases](https://github.com/lc/gau/releases/) page and then move them into your $PATH. 73 | 74 | ```bash 75 | $ tar xvf gau_2.0.6_linux_amd64.tar.gz 76 | $ mv gau /usr/bin/gau 77 | ``` 78 | 79 | ### From Docker: 80 | You can run gau via docker like so: 81 | ```bash 82 | docker run --rm sxcurity/gau:latest --help 83 | ``` 84 | 85 | 86 | You can also build a docker image with the following command 87 | ```bash 88 | docker build -t gau . 89 | ``` 90 | and then run it 91 | ```bash 92 | docker run gau example.com 93 | ``` 94 | Bear in mind that piping command (echo "example.com" | gau) will not work with the docker container 95 | 96 | 97 | ## ohmyzsh note: 98 | ohmyzsh's [git plugin](https://github.com/ohmyzsh/ohmyzsh/tree/master/plugins/git) has an alias which maps `gau` to the `git add --update` command. This is problematic, causing a binary conflict between this tool "gau" and the zsh plugin alias "gau" (`git add --update`). There is currently a few workarounds which can be found in this Github [issue](https://github.com/lc/gau/issues/8). 99 | 100 | 101 | ## Useful? 102 | 103 | Buy Me A Coffee 104 | 105 | Donate to CommonCrawl
106 | Donate to the InternetArchive 107 | -------------------------------------------------------------------------------- /cmd/gau/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "context" 6 | "io" 7 | "os" 8 | "sync" 9 | 10 | "github.com/lc/gau/v2/pkg/output" 11 | "github.com/lc/gau/v2/runner" 12 | "github.com/lc/gau/v2/runner/flags" 13 | log "github.com/sirupsen/logrus" 14 | ) 15 | 16 | func main() { 17 | cfg, err := flags.New().ReadInConfig() 18 | if err != nil { 19 | log.Warnf("error reading config: %v", err) 20 | } 21 | 22 | config, err := cfg.ProviderConfig() 23 | if err != nil { 24 | log.Fatal(err) 25 | } 26 | 27 | gau := new(runner.Runner) 28 | 29 | if err = gau.Init(config, cfg.Providers, cfg.Filters); err != nil { 30 | log.Warn(err) 31 | } 32 | 33 | results := make(chan string) 34 | 35 | out := os.Stdout 36 | // Handle results in background 37 | if config.Output != "" { 38 | out, err = os.OpenFile(config.Output, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) 39 | if err != nil { 40 | log.Fatalf("Could not open output file: %v\n", err) 41 | } 42 | defer out.Close() 43 | } 44 | 45 | var writeWg sync.WaitGroup 46 | writeWg.Add(1) 47 | go func(out io.Writer, JSON bool) { 48 | defer writeWg.Done() 49 | if JSON { 50 | output.WriteURLsJSON(out, results, config.Blacklist, config.RemoveParameters) 51 | } else if err = output.WriteURLs(out, results, config.Blacklist, config.RemoveParameters); err != nil { 52 | log.Fatalf("error writing results: %v\n", err) 53 | } 54 | }(out, config.JSON) 55 | ctx, cancel := context.WithCancel(context.Background()) 56 | defer cancel() 57 | workChan := make(chan runner.Work) 58 | gau.Start(ctx, workChan, results) 59 | domains := flags.Args() 60 | if len(domains) > 0 { 61 | for _, provider := range gau.Providers { 62 | for _, domain := range domains { 63 | workChan <- runner.NewWork(domain, provider) 64 | } 65 | } 66 | } else { 67 | sc := bufio.NewScanner(os.Stdin) 68 | for sc.Scan() { 69 | domain := sc.Text() 70 | for _, provider := range gau.Providers { 71 | workChan <- runner.NewWork(domain, provider) 72 | } 73 | } 74 | if err := sc.Err(); err != nil { 75 | log.Fatal(err) 76 | } 77 | } 78 | close(workChan) 79 | 80 | // wait for providers to fetch URLS 81 | gau.Wait() 82 | 83 | // close results channel 84 | close(results) 85 | 86 | // wait for writer to finish output 87 | writeWg.Wait() 88 | } 89 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/lc/gau/v2 2 | 3 | go 1.20 4 | 5 | require ( 6 | github.com/bobesa/go-domain-util v0.0.0-20190911083921-4033b5f7dd89 7 | github.com/deckarep/golang-set/v2 v2.3.0 8 | github.com/json-iterator/go v1.1.12 9 | github.com/lynxsecurity/pflag v1.1.3 10 | github.com/lynxsecurity/viper v1.10.0 11 | github.com/sirupsen/logrus v1.8.1 12 | github.com/valyala/bytebufferpool v1.0.0 13 | github.com/valyala/fasthttp v1.31.0 14 | ) 15 | 16 | require ( 17 | github.com/andybalholm/brotli v1.0.2 // indirect 18 | github.com/fsnotify/fsnotify v1.5.1 // indirect 19 | github.com/hashicorp/hcl v1.0.0 // indirect 20 | github.com/klauspost/compress v1.13.4 // indirect 21 | github.com/magiconair/properties v1.8.5 // indirect 22 | github.com/mitchellh/mapstructure v1.4.2 // indirect 23 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 // indirect 24 | github.com/modern-go/reflect2 v1.0.2 // indirect 25 | github.com/pelletier/go-toml v1.9.4 // indirect 26 | github.com/spf13/afero v1.6.0 // indirect 27 | github.com/spf13/cast v1.4.1 // indirect 28 | github.com/spf13/jwalterweatherman v1.1.0 // indirect 29 | github.com/subosito/gotenv v1.2.0 // indirect 30 | golang.org/x/net v0.17.0 // indirect 31 | golang.org/x/sys v0.13.0 // indirect 32 | golang.org/x/text v0.13.0 // indirect 33 | gopkg.in/ini.v1 v1.64.0 // indirect 34 | gopkg.in/yaml.v2 v2.4.0 // indirect 35 | ) 36 | 37 | retract ( 38 | v2.0.7 39 | v2.0.3 40 | v2.0.2 41 | v2.0.1 42 | ) 43 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/andybalholm/brotli v1.0.2 h1:JKnhI/XQ75uFBTiuzXpzFrUriDPiZjlOSzh6wXogP0E= 2 | github.com/andybalholm/brotli v1.0.2/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= 3 | github.com/bobesa/go-domain-util v0.0.0-20190911083921-4033b5f7dd89 h1:2pkAuIM8OF1fy4ToFpMnI4oE+VeUNRbGrpSLKshK0oQ= 4 | github.com/bobesa/go-domain-util v0.0.0-20190911083921-4033b5f7dd89/go.mod h1:/09nEjna1UMoasyyQDhOrIn8hi2v2kiJglPWed1idck= 5 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 6 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 7 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 8 | github.com/deckarep/golang-set/v2 v2.3.0 h1:qs18EKUfHm2X9fA50Mr/M5hccg2tNnVqsiBImnyDs0g= 9 | github.com/deckarep/golang-set/v2 v2.3.0/go.mod h1:VAky9rY/yGXJOLEDv3OMci+7wtDpOF4IN+y82NBOac4= 10 | github.com/fsnotify/fsnotify v1.5.1 h1:mZcQUHVQUQWoPXXtuf9yuEXKudkV2sx1E06UadKWpgI= 11 | github.com/fsnotify/fsnotify v1.5.1/go.mod h1:T3375wBYaZdLLcVNkcVbzGHY7f1l/uK5T5Ai1i3InKU= 12 | github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= 13 | github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= 14 | github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= 15 | github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= 16 | github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= 17 | github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= 18 | github.com/klauspost/compress v1.13.4 h1:0zhec2I8zGnjWcKyLl6i3gPqKANCCn5e9xmviEEeX6s= 19 | github.com/klauspost/compress v1.13.4/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= 20 | github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= 21 | github.com/lynxsecurity/pflag v1.1.3 h1:/5R9phe4nvKWXbcSWiWUsdWPa7CI+TKR3EyLhvUf0fQ= 22 | github.com/lynxsecurity/pflag v1.1.3/go.mod h1:Yz08toY61CsgZXC/AIHEbfHi45Vcsihen8PYE5vAfs0= 23 | github.com/lynxsecurity/viper v1.10.0 h1:4Y6fXjnid2CkrT2bjcI3nPjBkWpiLf+Z7PLNBQ29N/8= 24 | github.com/lynxsecurity/viper v1.10.0/go.mod h1:JdScMPWhCuBZ5pKBAEs9G2uioQVjsfGbkyIjrMnrIJo= 25 | github.com/magiconair/properties v1.8.5 h1:b6kJs+EmPFMYGkow9GiUyCyOvIwYetYJ3fSaWak/Gls= 26 | github.com/magiconair/properties v1.8.5/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60= 27 | github.com/mitchellh/mapstructure v1.4.2 h1:6h7AQ0yhTcIsmFmnAwQls75jp2Gzs4iB8W7pjMO+rqo= 28 | github.com/mitchellh/mapstructure v1.4.2/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= 29 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc= 30 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 31 | github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= 32 | github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= 33 | github.com/pelletier/go-toml v1.9.4 h1:tjENF6MfZAg8e4ZmZTeWaWiT2vXtsoO6+iuOjFhECwM= 34 | github.com/pelletier/go-toml v1.9.4/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= 35 | github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 36 | github.com/pkg/sftp v1.10.1/go.mod h1:lYOWFsE0bwd1+KfKJaKeuokY15vzFx25BLbzYYoAxZI= 37 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 38 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 39 | github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE= 40 | github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= 41 | github.com/spf13/afero v1.6.0 h1:xoax2sJ2DT8S8xA2paPFjDCScCNeWsg75VG0DLRreiY= 42 | github.com/spf13/afero v1.6.0/go.mod h1:Ai8FlHk4v/PARR026UzYexafAt9roJ7LcLMAmO6Z93I= 43 | github.com/spf13/cast v1.4.1 h1:s0hze+J0196ZfEMTs80N7UlFt0BDuQ7Q+JDnHiMWKdA= 44 | github.com/spf13/cast v1.4.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= 45 | github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk= 46 | github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo= 47 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 48 | github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= 49 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 50 | github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= 51 | github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= 52 | github.com/subosito/gotenv v1.2.0 h1:Slr1R9HxAlEKefgq5jn9U+DnETlIUa6HfgEzj0g5d7s= 53 | github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw= 54 | github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= 55 | github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= 56 | github.com/valyala/fasthttp v1.31.0 h1:lrauRLII19afgCs2fnWRJ4M5IkV0lo2FqA61uGkNBfE= 57 | github.com/valyala/fasthttp v1.31.0/go.mod h1:2rsYD01CKFrjjsvFxx75KlEUNpWNBY9JWD3K/7o2Cus= 58 | github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc= 59 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 60 | golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 61 | golang.org/x/crypto v0.0.0-20210513164829-c07d793c2f9a/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8= 62 | golang.org/x/net v0.0.0-20180811021610-c39426892332/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 63 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 64 | golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= 65 | golang.org/x/net v0.0.0-20210510120150-4163338589ed/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= 66 | golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= 67 | golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= 68 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 69 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 70 | golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 71 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 72 | golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 73 | golang.org/x/sys v0.0.0-20210514084401-e8d321eab015/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 74 | golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 75 | golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= 76 | golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 77 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= 78 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 79 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 80 | golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 81 | golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= 82 | golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= 83 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 84 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 85 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 86 | gopkg.in/ini.v1 v1.64.0 h1:Mj2zXEXcNb5joEiSA0zc3HZpTst/iyjNiR4CN8tDzOg= 87 | gopkg.in/ini.v1 v1.64.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= 88 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 89 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= 90 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= 91 | -------------------------------------------------------------------------------- /pkg/httpclient/client.go: -------------------------------------------------------------------------------- 1 | package httpclient 2 | 3 | import ( 4 | "errors" 5 | "math/rand" 6 | "time" 7 | 8 | "github.com/valyala/fasthttp" 9 | ) 10 | 11 | var ( 12 | ErrNilResponse = errors.New("unexpected nil response") 13 | ErrNon200Response = errors.New("API responded with non-200 status code") 14 | ErrBadRequest = errors.New("API responded with 400 status code") 15 | ) 16 | 17 | type Header struct { 18 | Key string 19 | Value string 20 | } 21 | 22 | func MakeRequest(c *fasthttp.Client, url string, maxRetries uint, timeout uint, headers ...Header) ([]byte, error) { 23 | var ( 24 | req *fasthttp.Request 25 | respBody []byte 26 | err error 27 | ) 28 | retries := int(maxRetries) 29 | for i := retries; i >= 0; i-- { 30 | req = fasthttp.AcquireRequest() 31 | 32 | req.Header.SetMethod(fasthttp.MethodGet) 33 | for _, header := range headers { 34 | if header.Key != "" { 35 | req.Header.Set(header.Key, header.Value) 36 | } 37 | } 38 | req.Header.Set(fasthttp.HeaderUserAgent, getUserAgent()) 39 | req.Header.Set("Accept", "*/*") 40 | req.SetRequestURI(url) 41 | respBody, err = doReq(c, req, timeout) 42 | if err == nil { 43 | break 44 | } 45 | } 46 | if err != nil { 47 | return nil, err 48 | } 49 | return respBody, nil 50 | } 51 | 52 | // doReq handles http requests 53 | func doReq(c *fasthttp.Client, req *fasthttp.Request, timeout uint) ([]byte, error) { 54 | resp := fasthttp.AcquireResponse() 55 | defer fasthttp.ReleaseResponse(resp) 56 | defer fasthttp.ReleaseRequest(req) 57 | if err := c.DoTimeout(req, resp, time.Second*time.Duration(timeout)); err != nil { 58 | return nil, err 59 | } 60 | if resp.StatusCode() != 200 { 61 | if resp.StatusCode() == 400 { 62 | return nil, ErrBadRequest 63 | } 64 | return nil, ErrNon200Response 65 | } 66 | if resp.Body() == nil { 67 | return nil, ErrNilResponse 68 | } 69 | 70 | return resp.Body(), nil 71 | } 72 | 73 | func getUserAgent() string { 74 | payload := []string{ 75 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36", 76 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36", 77 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0", 78 | "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36", 79 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1 Safari/605.1.15", 80 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36", 81 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0", 82 | "Mozilla/5.0 (iPhone; CPU iPhone OS 8_4_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12H321 Safari/600.1.4", 83 | "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko", 84 | "Mozilla/5.0 (iPad; CPU OS 7_1_2 like Mac OS X) AppleWebKit/537.51.2 (KHTML, like Gecko) Version/7.0 Mobile/11D257 Safari/9537.53", 85 | "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)", 86 | } 87 | 88 | randomIndex := rand.Intn(len(payload)) 89 | pick := payload[randomIndex] 90 | 91 | return pick 92 | } 93 | -------------------------------------------------------------------------------- /pkg/output/output.go: -------------------------------------------------------------------------------- 1 | package output 2 | 3 | import ( 4 | "io" 5 | "net/url" 6 | "path" 7 | "strings" 8 | 9 | mapset "github.com/deckarep/golang-set/v2" 10 | jsoniter "github.com/json-iterator/go" 11 | "github.com/valyala/bytebufferpool" 12 | ) 13 | 14 | type JSONResult struct { 15 | Url string `json:"url"` 16 | } 17 | 18 | func WriteURLs(writer io.Writer, results <-chan string, blacklistMap mapset.Set[string], RemoveParameters bool) error { 19 | lastURL := mapset.NewThreadUnsafeSet[string]() 20 | for result := range results { 21 | buf := bytebufferpool.Get() 22 | u, err := url.Parse(result) 23 | if err != nil { 24 | continue 25 | } 26 | if path.Ext(u.Path) != "" && blacklistMap.Contains(strings.ToLower(path.Ext(u.Path))) { 27 | continue 28 | } 29 | 30 | if RemoveParameters && !lastURL.Contains(u.Host+u.Path) { 31 | continue 32 | } 33 | lastURL.Add(u.Host + u.Path) 34 | 35 | buf.B = append(buf.B, []byte(result)...) 36 | buf.B = append(buf.B, "\n"...) 37 | _, err = writer.Write(buf.B) 38 | if err != nil { 39 | return err 40 | } 41 | bytebufferpool.Put(buf) 42 | } 43 | return nil 44 | } 45 | 46 | func WriteURLsJSON(writer io.Writer, results <-chan string, blacklistMap mapset.Set[string], RemoveParameters bool) { 47 | var jr JSONResult 48 | enc := jsoniter.NewEncoder(writer) 49 | for result := range results { 50 | u, err := url.Parse(result) 51 | if err != nil { 52 | continue 53 | } 54 | if blacklistMap.Contains(strings.ToLower(path.Ext(u.Path))) { 55 | continue 56 | } 57 | jr.Url = result 58 | if err := enc.Encode(jr); err != nil { 59 | // todo: handle this error 60 | continue 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /pkg/providers/commoncrawl/commoncrawl.go: -------------------------------------------------------------------------------- 1 | package commoncrawl 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "context" 7 | "errors" 8 | "fmt" 9 | 10 | jsoniter "github.com/json-iterator/go" 11 | "github.com/lc/gau/v2/pkg/httpclient" 12 | "github.com/lc/gau/v2/pkg/providers" 13 | "github.com/sirupsen/logrus" 14 | ) 15 | 16 | const ( 17 | Name = "commoncrawl" 18 | ) 19 | 20 | // verify interface compliance 21 | var _ providers.Provider = (*Client)(nil) 22 | 23 | // Client is the structure that holds the Filters and the Client's configuration 24 | type Client struct { 25 | filters providers.Filters 26 | config *providers.Config 27 | 28 | apiURL string 29 | } 30 | 31 | func New(c *providers.Config, filters providers.Filters) (*Client, error) { 32 | // Fetch the list of available CommonCrawl Api URLs. 33 | resp, err := httpclient.MakeRequest(c.Client, "http://index.commoncrawl.org/collinfo.json", c.MaxRetries, c.Timeout) 34 | if err != nil { 35 | return nil, err 36 | } 37 | 38 | var r apiResult 39 | if err = jsoniter.Unmarshal(resp, &r); err != nil { 40 | return nil, err 41 | } 42 | 43 | if len(r) == 0 { 44 | return nil, errors.New("failed to grab latest commoncrawl index") 45 | } 46 | 47 | return &Client{config: c, filters: filters, apiURL: r[0].API}, nil 48 | } 49 | 50 | func (c *Client) Name() string { 51 | return Name 52 | } 53 | 54 | // Fetch fetches all urls for a given domain and sends them to a channel. 55 | // It returns an error should one occur. 56 | func (c *Client) Fetch(ctx context.Context, domain string, results chan string) error { 57 | p, err := c.getPagination(domain) 58 | if err != nil { 59 | return err 60 | } 61 | // 0 pages means no results 62 | if p.Pages == 0 { 63 | logrus.WithFields(logrus.Fields{"provider": Name}).Infof("no results for %s", domain) 64 | return nil 65 | } 66 | 67 | for page := uint(0); page < p.Pages; page++ { 68 | select { 69 | case <-ctx.Done(): 70 | return nil 71 | default: 72 | logrus.WithFields(logrus.Fields{"provider": Name, "page": page}).Infof("fetching %s", domain) 73 | apiURL := c.formatURL(domain, page) 74 | resp, err := httpclient.MakeRequest(c.config.Client, apiURL, c.config.MaxRetries, c.config.Timeout) 75 | if err != nil { 76 | return fmt.Errorf("failed to fetch commoncrawl(%d): %s", page, err) 77 | } 78 | 79 | sc := bufio.NewScanner(bytes.NewReader(resp)) 80 | for sc.Scan() { 81 | var res apiResponse 82 | if err := jsoniter.Unmarshal(sc.Bytes(), &res); err != nil { 83 | return fmt.Errorf("failed to decode commoncrawl result: %s", err) 84 | } 85 | if res.Error != "" { 86 | return fmt.Errorf("received an error from commoncrawl: %s", res.Error) 87 | } 88 | 89 | results <- res.URL 90 | } 91 | } 92 | } 93 | return nil 94 | } 95 | 96 | func (c *Client) formatURL(domain string, page uint) string { 97 | if c.config.IncludeSubdomains { 98 | domain = "*." + domain 99 | } 100 | 101 | filterParams := c.filters.GetParameters(false) 102 | 103 | return fmt.Sprintf("%s?url=%s/*&output=json&fl=url&page=%d", c.apiURL, domain, page) + filterParams 104 | } 105 | 106 | // Fetch the number of pages. 107 | func (c *Client) getPagination(domain string) (r paginationResult, err error) { 108 | url := fmt.Sprintf("%s&showNumPages=true", c.formatURL(domain, 0)) 109 | var resp []byte 110 | 111 | resp, err = httpclient.MakeRequest(c.config.Client, url, c.config.MaxRetries, c.config.Timeout) 112 | if err != nil { 113 | return 114 | } 115 | 116 | err = jsoniter.Unmarshal(resp, &r) 117 | return 118 | } 119 | -------------------------------------------------------------------------------- /pkg/providers/commoncrawl/types.go: -------------------------------------------------------------------------------- 1 | package commoncrawl 2 | 3 | type apiResponse struct { 4 | URL string `json:"url"` 5 | Error string `json:"error"` 6 | } 7 | 8 | type paginationResult struct { 9 | Blocks uint `json:"blocks"` 10 | PageSize uint `json:"pageSize"` 11 | Pages uint `json:"pages"` 12 | } 13 | 14 | type apiResult []struct { 15 | API string `json:"cdx-api"` 16 | } 17 | -------------------------------------------------------------------------------- /pkg/providers/filters.go: -------------------------------------------------------------------------------- 1 | package providers 2 | 3 | import "net/url" 4 | 5 | type Filters struct { 6 | From string `mapstructure:"from"` 7 | To string `mapstructure:"to"` 8 | MatchStatusCodes []string `mapstructure:"matchstatuscodes"` 9 | MatchMimeTypes []string `mapstructure:"matchmimetypes"` 10 | FilterStatusCodes []string `mapstructure:"filterstatuscodes"` 11 | FilterMimeTypes []string `mapstructure:"filtermimetypes"` 12 | } 13 | 14 | func (f *Filters) GetParameters(forWayback bool) string { 15 | form := url.Values{} 16 | if f.From != "" { 17 | form.Add("from", f.From) 18 | } 19 | 20 | if f.To != "" { 21 | form.Add("to", f.To) 22 | } 23 | 24 | switch forWayback { 25 | case true: 26 | // generate parameters for wayback 27 | if len(f.MatchMimeTypes) > 0 { 28 | for _, mt := range f.MatchMimeTypes { 29 | form.Add("filter", "mimetype:"+mt) 30 | } 31 | } 32 | 33 | if len(f.MatchStatusCodes) > 0 { 34 | for _, ms := range f.MatchStatusCodes { 35 | form.Add("filter", "statuscode:"+ms) 36 | } 37 | } 38 | 39 | if len(f.FilterStatusCodes) > 0 { 40 | for _, sc := range f.FilterStatusCodes { 41 | form.Add("filter", "!statuscode:"+sc) 42 | } 43 | } 44 | 45 | if len(f.FilterMimeTypes) > 0 { 46 | for _, mt := range f.FilterMimeTypes { 47 | form.Add("filter", "!mimetype:"+mt) 48 | } 49 | } 50 | default: 51 | // generate parameters for commoncrawl 52 | if len(f.MatchStatusCodes) > 0 { 53 | for _, ms := range f.MatchStatusCodes { 54 | form.Add("filter", "status:"+ms) 55 | } 56 | } 57 | 58 | if len(f.MatchMimeTypes) > 0 { 59 | for _, mt := range f.MatchMimeTypes { 60 | form.Add("filter", "mime:"+mt) 61 | } 62 | } 63 | 64 | if len(f.FilterStatusCodes) > 0 { 65 | for _, ms := range f.FilterStatusCodes { 66 | form.Add("filter", "!=status:"+ms) 67 | } 68 | } 69 | 70 | if len(f.FilterMimeTypes) > 0 { 71 | for _, fs := range f.FilterMimeTypes { 72 | form.Add("filter", "!=mime:"+fs) 73 | } 74 | } 75 | 76 | } 77 | 78 | params := form.Encode() 79 | if params != "" { 80 | return "&" + params 81 | } 82 | 83 | return params 84 | } 85 | -------------------------------------------------------------------------------- /pkg/providers/otx/otx.go: -------------------------------------------------------------------------------- 1 | package otx 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | "github.com/bobesa/go-domain-util/domainutil" 8 | jsoniter "github.com/json-iterator/go" 9 | "github.com/lc/gau/v2/pkg/httpclient" 10 | "github.com/lc/gau/v2/pkg/providers" 11 | "github.com/sirupsen/logrus" 12 | ) 13 | 14 | const ( 15 | Name = "otx" 16 | ) 17 | 18 | type Client struct { 19 | config *providers.Config 20 | } 21 | 22 | var _ providers.Provider = (*Client)(nil) 23 | 24 | func New(c *providers.Config) *Client { 25 | if c.OTX != "" { 26 | setBaseURL(c.OTX) 27 | } 28 | return &Client{config: c} 29 | } 30 | 31 | type otxResult struct { 32 | HasNext bool `json:"has_next"` 33 | ActualSize int `json:"actual_size"` 34 | URLList []struct { 35 | Domain string `json:"domain"` 36 | URL string `json:"url"` 37 | Hostname string `json:"hostname"` 38 | HTTPCode int `json:"httpcode"` 39 | PageNum int `json:"page_num"` 40 | FullSize int `json:"full_size"` 41 | Paged bool `json:"paged"` 42 | } `json:"url_list"` 43 | } 44 | 45 | func (c *Client) Name() string { 46 | return Name 47 | } 48 | 49 | func (c *Client) Fetch(ctx context.Context, domain string, results chan string) error { 50 | for page := uint(1); ; page++ { 51 | select { 52 | case <-ctx.Done(): 53 | return nil 54 | default: 55 | logrus.WithFields(logrus.Fields{"provider": Name, "page": page - 1}).Infof("fetching %s", domain) 56 | apiURL := c.formatURL(domain, page) 57 | resp, err := httpclient.MakeRequest(c.config.Client, apiURL, c.config.MaxRetries, c.config.Timeout) 58 | if err != nil { 59 | return fmt.Errorf("failed to fetch alienvault(%d): %s", page, err) 60 | } 61 | var result otxResult 62 | if err := jsoniter.Unmarshal(resp, &result); err != nil { 63 | return fmt.Errorf("failed to decode otx results for page %d: %s", page, err) 64 | } 65 | 66 | for _, entry := range result.URLList { 67 | results <- entry.URL 68 | } 69 | 70 | if !result.HasNext { 71 | return nil 72 | } 73 | } 74 | } 75 | } 76 | 77 | func (c *Client) formatURL(domain string, page uint) string { 78 | category := "hostname" 79 | if !domainutil.HasSubdomain(domain) { 80 | category = "domain" 81 | } 82 | if domainutil.HasSubdomain(domain) && c.config.IncludeSubdomains { 83 | domain = domainutil.Domain(domain) 84 | category = "domain" 85 | } 86 | 87 | return fmt.Sprintf("%sapi/v1/indicators/%s/%s/url_list?limit=100&page=%d", _BaseURL, category, domain, page) 88 | } 89 | 90 | var _BaseURL = "https://otx.alienvault.com/" 91 | 92 | func setBaseURL(baseURL string) { 93 | _BaseURL = baseURL 94 | } 95 | -------------------------------------------------------------------------------- /pkg/providers/providers.go: -------------------------------------------------------------------------------- 1 | package providers 2 | 3 | import ( 4 | "context" 5 | 6 | mapset "github.com/deckarep/golang-set/v2" 7 | "github.com/valyala/fasthttp" 8 | ) 9 | 10 | const Version = `2.2.4` 11 | 12 | // Provider is a generic interface for all archive fetchers 13 | type Provider interface { 14 | Fetch(ctx context.Context, domain string, results chan string) error 15 | Name() string 16 | } 17 | 18 | type URLScan struct { 19 | Host string 20 | APIKey string 21 | } 22 | 23 | type Config struct { 24 | Threads uint 25 | Timeout uint 26 | MaxRetries uint 27 | IncludeSubdomains bool 28 | RemoveParameters bool 29 | Client *fasthttp.Client 30 | Providers []string 31 | Blacklist mapset.Set[string] 32 | Output string 33 | JSON bool 34 | URLScan URLScan 35 | OTX string 36 | } 37 | -------------------------------------------------------------------------------- /pkg/providers/urlscan/types.go: -------------------------------------------------------------------------------- 1 | package urlscan 2 | 3 | import ( 4 | "strings" 5 | ) 6 | 7 | var _BaseURL = "https://urlscan.io/" 8 | 9 | type apiResponse struct { 10 | Status int `json:"status"` 11 | Results []searchResult `json:"results"` 12 | HasMore bool `json:"has_more"` 13 | } 14 | 15 | type searchResult struct { 16 | Page archivedPage 17 | Sort []interface{} `json:"sort"` 18 | } 19 | 20 | type archivedPage struct { 21 | Domain string `json:"domain"` 22 | MimeType string `json:"mimeType"` 23 | URL string `json:"url"` 24 | Status string `json:"status"` 25 | } 26 | 27 | func parseSort(sort []interface{}) string { 28 | var sortParam []string 29 | for _, t := range sort { 30 | switch t.(type) { 31 | case string: 32 | sortParam = append(sortParam, t.(string)) 33 | } 34 | } 35 | return strings.Join(sortParam, ",") 36 | } 37 | -------------------------------------------------------------------------------- /pkg/providers/urlscan/urlscan.go: -------------------------------------------------------------------------------- 1 | package urlscan 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "fmt" 7 | "strings" 8 | 9 | jsoniter "github.com/json-iterator/go" 10 | "github.com/lc/gau/v2/pkg/httpclient" 11 | "github.com/lc/gau/v2/pkg/providers" 12 | "github.com/sirupsen/logrus" 13 | ) 14 | 15 | const ( 16 | Name = "urlscan" 17 | ) 18 | 19 | type Client struct { 20 | config *providers.Config 21 | } 22 | 23 | func New(c *providers.Config) *Client { 24 | if c.URLScan.Host != "" { 25 | setBaseURL(c.URLScan.Host) 26 | } 27 | 28 | return &Client{config: c} 29 | } 30 | 31 | func (c *Client) Name() string { 32 | return Name 33 | } 34 | 35 | func (c *Client) Fetch(ctx context.Context, domain string, results chan string) error { 36 | var searchAfter string 37 | var header httpclient.Header 38 | 39 | if c.config.URLScan.APIKey != "" { 40 | header.Key = "API-Key" 41 | header.Value = c.config.URLScan.APIKey 42 | } 43 | 44 | for page := uint(0); ; page++ { 45 | select { 46 | case <-ctx.Done(): 47 | return nil 48 | default: 49 | logrus.WithFields(logrus.Fields{"provider": Name, "page": page}).Infof("fetching %s", domain) 50 | apiURL := c.formatURL(domain, searchAfter) 51 | resp, err := httpclient.MakeRequest(c.config.Client, apiURL, c.config.MaxRetries, c.config.Timeout, header) 52 | if err != nil { 53 | return fmt.Errorf("failed to fetch urlscan: %s", err) 54 | } 55 | var result apiResponse 56 | decoder := jsoniter.NewDecoder(bytes.NewReader(resp)) 57 | decoder.UseNumber() 58 | if err = decoder.Decode(&result); err != nil { 59 | return fmt.Errorf("failed to decode urlscan result: %s", err) 60 | } 61 | // rate limited 62 | if result.Status == 429 { 63 | logrus.WithField("provider", "urlscan").Warnf("urlscan responded with 429, probably being rate limited") 64 | return nil 65 | } 66 | 67 | total := len(result.Results) 68 | for i, res := range result.Results { 69 | if res.Page.Domain == domain || (c.config.IncludeSubdomains && strings.HasSuffix(res.Page.Domain, domain)) { 70 | results <- res.Page.URL 71 | } 72 | 73 | if i == total-1 { 74 | sortParam := parseSort(res.Sort) 75 | if sortParam == "" { 76 | return nil 77 | } 78 | searchAfter = sortParam 79 | } 80 | } 81 | 82 | if !result.HasMore { 83 | return nil 84 | } 85 | } 86 | } 87 | } 88 | 89 | func (c *Client) formatURL(domain string, after string) string { 90 | if after != "" { 91 | after = "&search_after=" + after 92 | } 93 | 94 | return fmt.Sprintf(_BaseURL+"api/v1/search/?q=domain:%s&size=100", domain) + after 95 | } 96 | 97 | func setBaseURL(baseURL string) { 98 | _BaseURL = baseURL 99 | } 100 | -------------------------------------------------------------------------------- /pkg/providers/wayback/wayback.go: -------------------------------------------------------------------------------- 1 | package wayback 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | 8 | jsoniter "github.com/json-iterator/go" 9 | "github.com/lc/gau/v2/pkg/httpclient" 10 | "github.com/lc/gau/v2/pkg/providers" 11 | "github.com/sirupsen/logrus" 12 | ) 13 | 14 | const ( 15 | Name = "wayback" 16 | ) 17 | 18 | // verify interface compliance 19 | var _ providers.Provider = (*Client)(nil) 20 | 21 | // Client is the structure that holds the WaybackFilters and the Client's configuration 22 | type Client struct { 23 | filters providers.Filters 24 | config *providers.Config 25 | } 26 | 27 | func New(config *providers.Config, filters providers.Filters) *Client { 28 | return &Client{filters, config} 29 | } 30 | 31 | func (c *Client) Name() string { 32 | return Name 33 | } 34 | 35 | // waybackResult holds the response from the wayback API 36 | type waybackResult [][]string 37 | 38 | // Fetch fetches all urls for a given domain and sends them to a channel. 39 | // It returns an error should one occur. 40 | func (c *Client) Fetch(ctx context.Context, domain string, results chan string) error { 41 | for page := uint(0); ; page++ { 42 | select { 43 | case <-ctx.Done(): 44 | return nil 45 | default: 46 | logrus.WithFields(logrus.Fields{"provider": Name, "page": page}).Infof("fetching %s", domain) 47 | apiURL := c.formatURL(domain, page) 48 | // make HTTP request 49 | resp, err := httpclient.MakeRequest(c.config.Client, apiURL, c.config.MaxRetries, c.config.Timeout) 50 | if err != nil { 51 | if errors.Is(err, httpclient.ErrBadRequest) { 52 | return nil 53 | } 54 | return fmt.Errorf("failed to fetch wayback results page %d: %s", page, err) 55 | } 56 | var result waybackResult 57 | if err = jsoniter.Unmarshal(resp, &result); err != nil { 58 | return fmt.Errorf("failed to decode wayback results for page %d: %s", page, err) 59 | } 60 | 61 | // check if there's results, wayback's pagination response 62 | // is not always correct when using a filter 63 | if len(result) == 0 { 64 | break 65 | } 66 | 67 | // output results 68 | // Slicing as [1:] to skip first result by default 69 | for _, entry := range result[1:] { 70 | results <- entry[0] 71 | } 72 | } 73 | } 74 | } 75 | 76 | // formatUrl returns a formatted URL for the Wayback API 77 | func (c *Client) formatURL(domain string, page uint) string { 78 | if c.config.IncludeSubdomains { 79 | domain = "*." + domain 80 | } 81 | filterParams := c.filters.GetParameters(true) 82 | return fmt.Sprintf( 83 | "https://web.archive.org/cdx/search/cdx?url=%s/*&output=json&collapse=urlkey&fl=original&pageSize=100&page=%d", 84 | domain, page, 85 | ) + filterParams 86 | } 87 | -------------------------------------------------------------------------------- /runner/flags/flags.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | "crypto/tls" 5 | "errors" 6 | "flag" 7 | "fmt" 8 | "net/url" 9 | "os" 10 | "path/filepath" 11 | "strings" 12 | "time" 13 | 14 | mapset "github.com/deckarep/golang-set/v2" 15 | "github.com/lc/gau/v2/pkg/providers" 16 | "github.com/lynxsecurity/pflag" 17 | "github.com/lynxsecurity/viper" 18 | log "github.com/sirupsen/logrus" 19 | "github.com/valyala/fasthttp" 20 | "github.com/valyala/fasthttp/fasthttpproxy" 21 | ) 22 | 23 | type URLScanConfig struct { 24 | Host string `mapstructure:"host"` 25 | APIKey string `mapstructure:"apikey"` 26 | } 27 | 28 | type Config struct { 29 | Filters providers.Filters `mapstructure:"filters"` 30 | Proxy string `mapstructure:"proxy"` 31 | Threads uint `mapstructure:"threads"` 32 | Timeout uint `mapstructure:"timeout"` 33 | Verbose bool `mapstructure:"verbose"` 34 | MaxRetries uint `mapstructure:"retries"` 35 | IncludeSubdomains bool `mapstructure:"subdomains"` 36 | RemoveParameters bool `mapstructure:"parameters"` 37 | Providers []string `mapstructure:"providers"` 38 | Blacklist []string `mapstructure:"blacklist"` 39 | JSON bool `mapstructure:"json"` 40 | URLScan URLScanConfig `mapstructure:"urlscan"` 41 | OTX string `mapstructure:"otx"` 42 | Outfile string // output file to write to 43 | } 44 | 45 | func (c *Config) ProviderConfig() (*providers.Config, error) { 46 | var dialer fasthttp.DialFunc 47 | 48 | if c.Proxy != "" { 49 | parse, err := url.Parse(c.Proxy) 50 | if err != nil { 51 | return nil, fmt.Errorf("proxy url: %v", err) 52 | } 53 | switch parse.Scheme { 54 | case "http": 55 | dialer = fasthttpproxy.FasthttpHTTPDialer(strings.ReplaceAll(c.Proxy, "http://", "")) 56 | case "socks5": 57 | dialer = fasthttpproxy.FasthttpSocksDialer(c.Proxy) 58 | default: 59 | return nil, fmt.Errorf("unsupported proxy scheme: %s", parse.Scheme) 60 | } 61 | } 62 | 63 | pc := &providers.Config{ 64 | Threads: c.Threads, 65 | Timeout: c.Timeout, 66 | MaxRetries: c.MaxRetries, 67 | IncludeSubdomains: c.IncludeSubdomains, 68 | RemoveParameters: c.RemoveParameters, 69 | Client: &fasthttp.Client{ 70 | TLSConfig: &tls.Config{ 71 | InsecureSkipVerify: true, 72 | }, 73 | Dial: dialer, 74 | }, 75 | Providers: c.Providers, 76 | Output: c.Outfile, 77 | JSON: c.JSON, 78 | URLScan: providers.URLScan{ 79 | Host: c.URLScan.Host, 80 | APIKey: c.URLScan.APIKey, 81 | }, 82 | OTX: c.OTX, 83 | } 84 | 85 | log.SetLevel(log.ErrorLevel) 86 | if c.Verbose { 87 | log.SetLevel(log.InfoLevel) 88 | } 89 | pc.Blacklist = mapset.NewThreadUnsafeSet(c.Blacklist...) 90 | pc.Blacklist.Add("") 91 | return pc, nil 92 | } 93 | 94 | type Options struct { 95 | viper *viper.Viper 96 | } 97 | 98 | func New() *Options { 99 | v := viper.New() 100 | 101 | pflag.String("o", "", "filename to write results to") 102 | pflag.String("config", "", "location of config file (default $HOME/.gau.toml or %USERPROFILE%\\.gau.toml)") 103 | pflag.Uint("threads", 1, "number of workers to spawn") 104 | pflag.Uint("timeout", 45, "timeout (in seconds) for HTTP client") 105 | pflag.Uint("retries", 0, "retries for HTTP client") 106 | pflag.String("proxy", "", "http proxy to use") 107 | pflag.StringSlice("blacklist", []string{}, "list of extensions to skip") 108 | pflag.StringSlice("providers", []string{}, "list of providers to use (wayback,commoncrawl,otx,urlscan)") 109 | pflag.Bool("subs", false, "include subdomains of target domain") 110 | pflag.Bool("fp", false, "remove different parameters of the same endpoint") 111 | pflag.Bool("verbose", false, "show verbose output") 112 | pflag.Bool("json", false, "output as json") 113 | 114 | // filter flags 115 | pflag.StringSlice("mc", []string{}, "list of status codes to match") 116 | pflag.StringSlice("fc", []string{}, "list of status codes to filter") 117 | pflag.StringSlice("mt", []string{}, "list of mime-types to match") 118 | pflag.StringSlice("ft", []string{}, "list of mime-types to filter") 119 | pflag.String("from", "", "fetch urls from date (format: YYYYMM)") 120 | pflag.String("to", "", "fetch urls to date (format: YYYYMM)") 121 | pflag.Bool("version", false, "show gau version") 122 | 123 | pflag.CommandLine.AddGoFlagSet(flag.CommandLine) 124 | pflag.Parse() 125 | 126 | if err := v.BindPFlags(pflag.CommandLine); err != nil { 127 | log.Fatal(err) 128 | } 129 | 130 | return &Options{viper: v} 131 | } 132 | 133 | func Args() []string { 134 | return pflag.Args() 135 | } 136 | 137 | func (o *Options) ReadInConfig() (*Config, error) { 138 | confFile := o.viper.GetString("config") 139 | 140 | if confFile == "" { 141 | home, err := os.UserHomeDir() 142 | if err != nil { 143 | return o.DefaultConfig(), err 144 | } 145 | 146 | confFile = filepath.Join(home, ".gau.toml") 147 | } 148 | 149 | return o.ReadConfigFile(confFile) 150 | } 151 | 152 | func (o *Options) ReadConfigFile(name string) (*Config, error) { 153 | if _, err := os.Stat(name); errors.Is(err, os.ErrNotExist) { 154 | return o.DefaultConfig(), fmt.Errorf("Config file %s not found, using default config", name) 155 | } 156 | 157 | o.viper.SetConfigFile(name) 158 | 159 | if err := o.viper.ReadInConfig(); err != nil { 160 | return o.DefaultConfig(), err 161 | } 162 | 163 | var c Config 164 | 165 | if err := o.viper.Unmarshal(&c); err != nil { 166 | return o.DefaultConfig(), err 167 | } 168 | 169 | o.getFlagValues(&c) 170 | 171 | return &c, nil 172 | } 173 | 174 | func (o *Options) DefaultConfig() *Config { 175 | c := &Config{ 176 | Filters: providers.Filters{}, 177 | Proxy: "", 178 | Timeout: 45, 179 | Threads: 1, 180 | Verbose: false, 181 | MaxRetries: 5, 182 | IncludeSubdomains: false, 183 | RemoveParameters: false, 184 | Providers: []string{"wayback", "commoncrawl", "otx", "urlscan"}, 185 | Blacklist: []string{}, 186 | JSON: false, 187 | Outfile: "", 188 | } 189 | 190 | o.getFlagValues(c) 191 | 192 | return c 193 | } 194 | 195 | func (o *Options) getFlagValues(c *Config) { 196 | version := o.viper.GetBool("version") 197 | verbose := o.viper.GetBool("verbose") 198 | json := o.viper.GetBool("json") 199 | retries := o.viper.GetUint("retries") 200 | proxy := o.viper.GetString("proxy") 201 | outfile := o.viper.GetString("o") 202 | fetchers := o.viper.GetStringSlice("providers") 203 | threads := o.viper.GetUint("threads") 204 | blacklist := o.viper.GetStringSlice("blacklist") 205 | subs := o.viper.GetBool("subs") 206 | fp := o.viper.GetBool("fp") 207 | 208 | if version { 209 | fmt.Printf("gau version: %s\n", providers.Version) 210 | os.Exit(0) 211 | } 212 | 213 | if proxy != "" { 214 | c.Proxy = proxy 215 | } 216 | 217 | if outfile != "" { 218 | c.Outfile = outfile 219 | } 220 | // set if --threads flag is set, otherwise use default 221 | if threads > 1 { 222 | c.Threads = threads 223 | } 224 | 225 | // set if --blacklist flag is specified, otherwise use default 226 | if len(blacklist) > 0 { 227 | c.Blacklist = blacklist 228 | } 229 | 230 | // set if --providers flag is specified, otherwise use default 231 | if len(fetchers) > 0 { 232 | c.Providers = fetchers 233 | } 234 | 235 | if retries > 0 { 236 | c.MaxRetries = retries 237 | } 238 | 239 | if subs { 240 | c.IncludeSubdomains = subs 241 | } 242 | 243 | if fp { 244 | c.RemoveParameters = fp 245 | } 246 | 247 | c.JSON = json 248 | c.Verbose = verbose 249 | 250 | // get filter flags 251 | mc := o.viper.GetStringSlice("mc") 252 | fc := o.viper.GetStringSlice("fc") 253 | mt := o.viper.GetStringSlice("mt") 254 | ft := o.viper.GetStringSlice("ft") 255 | from := o.viper.GetString("from") 256 | to := o.viper.GetString("to") 257 | 258 | var seenFilterFlag bool 259 | 260 | var filters providers.Filters 261 | if len(mc) > 0 { 262 | seenFilterFlag = true 263 | filters.MatchStatusCodes = mc 264 | } 265 | 266 | if len(fc) > 0 { 267 | seenFilterFlag = true 268 | filters.FilterStatusCodes = fc 269 | } 270 | 271 | if len(mt) > 0 { 272 | seenFilterFlag = true 273 | filters.MatchMimeTypes = mt 274 | } 275 | 276 | if len(ft) > 0 { 277 | seenFilterFlag = true 278 | filters.FilterMimeTypes = ft 279 | } 280 | 281 | if from != "" { 282 | seenFilterFlag = true 283 | if _, err := time.Parse("200601", from); err == nil { 284 | filters.From = from 285 | } 286 | } 287 | 288 | if to != "" { 289 | seenFilterFlag = true 290 | if _, err := time.Parse("200601", to); err == nil { 291 | filters.To = to 292 | } 293 | } 294 | 295 | if seenFilterFlag { 296 | c.Filters = filters 297 | } 298 | } 299 | -------------------------------------------------------------------------------- /runner/runner.go: -------------------------------------------------------------------------------- 1 | package runner 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "sync" 7 | 8 | "github.com/lc/gau/v2/pkg/providers" 9 | "github.com/lc/gau/v2/pkg/providers/commoncrawl" 10 | "github.com/lc/gau/v2/pkg/providers/otx" 11 | "github.com/lc/gau/v2/pkg/providers/urlscan" 12 | "github.com/lc/gau/v2/pkg/providers/wayback" 13 | "github.com/sirupsen/logrus" 14 | ) 15 | 16 | type Runner struct { 17 | sync.WaitGroup 18 | 19 | Providers []providers.Provider 20 | threads uint 21 | ctx context.Context 22 | } 23 | 24 | // Init initializes the runner 25 | func (r *Runner) Init(c *providers.Config, providers []string, filters providers.Filters) error { 26 | r.threads = c.Threads 27 | for _, name := range providers { 28 | switch name { 29 | case "urlscan": 30 | r.Providers = append(r.Providers, urlscan.New(c)) 31 | case "otx": 32 | r.Providers = append(r.Providers, otx.New(c)) 33 | case "wayback": 34 | r.Providers = append(r.Providers, wayback.New(c, filters)) 35 | case "commoncrawl": 36 | cc, err := commoncrawl.New(c, filters) 37 | if err != nil { 38 | return fmt.Errorf("error instantiating commoncrawl: %v\n", err) 39 | } 40 | r.Providers = append(r.Providers, cc) 41 | } 42 | } 43 | 44 | return nil 45 | } 46 | 47 | // Starts starts the worker 48 | func (r *Runner) Start(ctx context.Context, workChan chan Work, results chan string) { 49 | for i := uint(0); i < r.threads; i++ { 50 | r.Add(1) 51 | go func() { 52 | defer r.Done() 53 | r.worker(ctx, workChan, results) 54 | }() 55 | } 56 | } 57 | 58 | type Work struct { 59 | domain string 60 | provider providers.Provider 61 | } 62 | 63 | func NewWork(domain string, provider providers.Provider) Work { 64 | return Work{domain, provider} 65 | } 66 | 67 | func (w *Work) Do(ctx context.Context, results chan string) error { 68 | return w.provider.Fetch(ctx, w.domain, results) 69 | } 70 | 71 | // worker checks to see if the context is finished and executes the fetching process for each provider 72 | func (r *Runner) worker(ctx context.Context, workChan chan Work, results chan string) { 73 | for { 74 | select { 75 | case <-ctx.Done(): 76 | return 77 | case work, ok := <-workChan: 78 | if !ok { 79 | return 80 | } 81 | if err := work.Do(ctx, results); err != nil { 82 | logrus.WithField("provider", work.provider.Name()).Warnf("%s - %v", work.domain, err) 83 | } 84 | } 85 | } 86 | } 87 | --------------------------------------------------------------------------------