├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── config.yaml │ └── feature_request.md ├── PULL_REQUEST_TEMPLATE.md ├── dependabot.yaml ├── install.sh └── workflows │ └── release.yaml ├── .gitignore ├── .goreleaser.yaml ├── Dockerfile ├── LICENSE ├── README.md ├── go.mod ├── go.sum ├── internal └── runner │ ├── consts.go │ ├── init.go │ ├── parser.go │ ├── runner.go │ ├── validator.go │ └── vars.go ├── main.go └── pkg └── galer ├── galer.go ├── util.go └── vars.go /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: ["dwisiswant0"] 2 | custom: ["https://paypal.me/dw1s", "https://saweria.co/dwisiswant0", "https://unstoppabledomains.com/d/dwisiswant0.crypto"] 3 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "[BUG]" 5 | labels: 'bug' 6 | assignees: dwisiswant0 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | 12 | A clear and concise description of what the bug is. 13 | 14 | **To Reproduce** 15 | 16 | Steps to reproduce the behavior: 17 | 18 | **Expected behavior** 19 | 20 | A clear and concise description of what you expected to happen. 21 | 22 | **Screenshots** 23 | 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Environment (please complete the following information):** 27 | 28 | - OS: [e.g. mac, linux] 29 | - OS version: [uname -a] 30 | - galer version: 31 | 32 | **Additional context** 33 | Add any other context about the problem here. Full output log is probably a helpful thing to add here. 34 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yaml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: Ask a question 4 | url: https://twitter.com/dwisiswant0 5 | about: Ask questions and discuss with author 6 | 7 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: "[FEATURE]" 5 | labels: 'enhancement' 6 | assignees: dwisiswant0 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | **IMPORTANT: Please do not create a Pull Request without creating an issue first!** 2 | 3 | _(Any change needs to be discussed before proceeding. Failure to do so may result in the rejection of the pull request)._ 4 | 5 | ### Summary 6 | 7 | 8 | 9 | _Explains the information and/ motivation for making this changes..._ 10 | 11 | 12 | ### Proposed of changes 13 | 14 | This PR fixes/implements the following **bugs/features**: 15 | 16 | - Bug 1 17 | - Bug 2 18 | - Feature 1 19 | - Feature 2 20 | - Breaking changes 21 | 22 | 23 | 24 | ### How has this been tested? 25 | 26 | Proof: 27 | 28 | 29 | 30 | ### Closing issues 31 | 32 | Fixes # 33 | 34 | ### Checklist: 35 | 36 | 37 | 38 | 39 | - [ ] My code follows the code style of this project. 40 | - [ ] My change requires a change to the documentation. 41 | - [ ] I have updated the documentation accordingly. 42 | - [ ] I have written new tests for my changes. 43 | - [ ] My changes successfully ran and pass tests locally. -------------------------------------------------------------------------------- /.github/dependabot.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "gomod" 4 | directory: "/" 5 | schedule: 6 | interval: "daily" 7 | labels: 8 | - "bug" 9 | 10 | - package-ecosystem: "github-actions" 11 | directory: "/" 12 | schedule: 13 | interval: "daily" 14 | labels: 15 | - "bug" -------------------------------------------------------------------------------- /.github/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | # Code generated by godownloader on 2020-09-14T07:38:56Z. DO NOT EDIT. 4 | # 5 | 6 | usage() { 7 | this=$1 8 | cat </dev/null 130 | } 131 | echoerr() { 132 | echo "$@" 1>&2 133 | } 134 | log_prefix() { 135 | echo "$0" 136 | } 137 | _logp=6 138 | log_set_priority() { 139 | _logp="$1" 140 | } 141 | log_priority() { 142 | if test -z "$1"; then 143 | echo "$_logp" 144 | return 145 | fi 146 | [ "$1" -le "$_logp" ] 147 | } 148 | log_tag() { 149 | case $1 in 150 | 0) echo "emerg" ;; 151 | 1) echo "alert" ;; 152 | 2) echo "crit" ;; 153 | 3) echo "err" ;; 154 | 4) echo "warning" ;; 155 | 5) echo "notice" ;; 156 | 6) echo "info" ;; 157 | 7) echo "debug" ;; 158 | *) echo "$1" ;; 159 | esac 160 | } 161 | log_debug() { 162 | log_priority 7 || return 0 163 | echoerr "$(log_prefix)" "$(log_tag 7)" "$@" 164 | } 165 | log_info() { 166 | log_priority 6 || return 0 167 | echoerr "$(log_prefix)" "$(log_tag 6)" "$@" 168 | } 169 | log_err() { 170 | log_priority 3 || return 0 171 | echoerr "$(log_prefix)" "$(log_tag 3)" "$@" 172 | } 173 | log_crit() { 174 | log_priority 2 || return 0 175 | echoerr "$(log_prefix)" "$(log_tag 2)" "$@" 176 | } 177 | uname_os() { 178 | os=$(uname -s | tr '[:upper:]' '[:lower:]') 179 | case "$os" in 180 | cygwin_nt*) os="windows" ;; 181 | mingw*) os="windows" ;; 182 | msys_nt*) os="windows" ;; 183 | esac 184 | echo "$os" 185 | } 186 | uname_arch() { 187 | arch=$(uname -m) 188 | case $arch in 189 | x86_64) arch="amd64" ;; 190 | x86) arch="386" ;; 191 | i686) arch="386" ;; 192 | i386) arch="386" ;; 193 | aarch64) arch="arm64" ;; 194 | armv5*) arch="armv5" ;; 195 | armv6*) arch="armv6" ;; 196 | armv7*) arch="armv7" ;; 197 | esac 198 | echo ${arch} 199 | } 200 | uname_os_check() { 201 | os=$(uname_os) 202 | case "$os" in 203 | darwin) return 0 ;; 204 | dragonfly) return 0 ;; 205 | freebsd) return 0 ;; 206 | linux) return 0 ;; 207 | android) return 0 ;; 208 | nacl) return 0 ;; 209 | netbsd) return 0 ;; 210 | openbsd) return 0 ;; 211 | plan9) return 0 ;; 212 | solaris) return 0 ;; 213 | windows) return 0 ;; 214 | esac 215 | log_crit "uname_os_check '$(uname -s)' got converted to '$os' which is not a GOOS value. Please file bug at https://github.com/client9/shlib" 216 | return 1 217 | } 218 | uname_arch_check() { 219 | arch=$(uname_arch) 220 | case "$arch" in 221 | 386) return 0 ;; 222 | amd64) return 0 ;; 223 | arm64) return 0 ;; 224 | armv5) return 0 ;; 225 | armv6) return 0 ;; 226 | armv7) return 0 ;; 227 | ppc64) return 0 ;; 228 | ppc64le) return 0 ;; 229 | mips) return 0 ;; 230 | mipsle) return 0 ;; 231 | mips64) return 0 ;; 232 | mips64le) return 0 ;; 233 | s390x) return 0 ;; 234 | amd64p32) return 0 ;; 235 | esac 236 | log_crit "uname_arch_check '$(uname -m)' got converted to '$arch' which is not a GOARCH value. Please file bug report at https://github.com/client9/shlib" 237 | return 1 238 | } 239 | untar() { 240 | tarball=$1 241 | case "${tarball}" in 242 | *.tar.gz | *.tgz) tar --no-same-owner -xzf "${tarball}" ;; 243 | *.tar) tar --no-same-owner -xf "${tarball}" ;; 244 | *.zip) unzip "${tarball}" ;; 245 | *) 246 | log_err "untar unknown archive format for ${tarball}" 247 | return 1 248 | ;; 249 | esac 250 | } 251 | http_download_curl() { 252 | local_file=$1 253 | source_url=$2 254 | header=$3 255 | if [ -z "$header" ]; then 256 | code=$(curl -w '%{http_code}' -sL -o "$local_file" "$source_url") 257 | else 258 | code=$(curl -w '%{http_code}' -sL -H "$header" -o "$local_file" "$source_url") 259 | fi 260 | if [ "$code" != "200" ]; then 261 | log_debug "http_download_curl received HTTP status $code" 262 | return 1 263 | fi 264 | return 0 265 | } 266 | http_download_wget() { 267 | local_file=$1 268 | source_url=$2 269 | header=$3 270 | if [ -z "$header" ]; then 271 | wget -q -O "$local_file" "$source_url" 272 | else 273 | wget -q --header "$header" -O "$local_file" "$source_url" 274 | fi 275 | } 276 | http_download() { 277 | log_debug "http_download $2" 278 | if is_command curl; then 279 | http_download_curl "$@" 280 | return 281 | elif is_command wget; then 282 | http_download_wget "$@" 283 | return 284 | fi 285 | log_crit "http_download unable to find wget or curl" 286 | return 1 287 | } 288 | http_copy() { 289 | tmp=$(mktemp) 290 | http_download "${tmp}" "$1" "$2" || return 1 291 | body=$(cat "$tmp") 292 | rm -f "${tmp}" 293 | echo "$body" 294 | } 295 | github_release() { 296 | owner_repo=$1 297 | version=$2 298 | test -z "$version" && version="latest" 299 | giturl="https://github.com/${owner_repo}/releases/${version}" 300 | json=$(http_copy "$giturl" "Accept:application/json") 301 | test -z "$json" && return 1 302 | version=$(echo "$json" | tr -s '\n' ' ' | sed 's/.*"tag_name":"//' | sed 's/".*//') 303 | test -z "$version" && return 1 304 | echo "$version" 305 | } 306 | hash_sha256() { 307 | TARGET=${1:-/dev/stdin} 308 | if is_command gsha256sum; then 309 | hash=$(gsha256sum "$TARGET") || return 1 310 | echo "$hash" | cut -d ' ' -f 1 311 | elif is_command sha256sum; then 312 | hash=$(sha256sum "$TARGET") || return 1 313 | echo "$hash" | cut -d ' ' -f 1 314 | elif is_command shasum; then 315 | hash=$(shasum -a 256 "$TARGET" 2>/dev/null) || return 1 316 | echo "$hash" | cut -d ' ' -f 1 317 | elif is_command openssl; then 318 | hash=$(openssl -dst openssl dgst -sha256 "$TARGET") || return 1 319 | echo "$hash" | cut -d ' ' -f a 320 | else 321 | log_crit "hash_sha256 unable to find command to compute sha-256 hash" 322 | return 1 323 | fi 324 | } 325 | hash_sha256_verify() { 326 | TARGET=$1 327 | checksums=$2 328 | if [ -z "$checksums" ]; then 329 | log_err "hash_sha256_verify checksum file not specified in arg2" 330 | return 1 331 | fi 332 | BASENAME=${TARGET##*/} 333 | want=$(grep "${BASENAME}" "${checksums}" 2>/dev/null | tr '\t' ' ' | cut -d ' ' -f 1) 334 | if [ -z "$want" ]; then 335 | log_err "hash_sha256_verify unable to find checksum for '${TARGET}' in '${checksums}'" 336 | return 1 337 | fi 338 | got=$(hash_sha256 "$TARGET") 339 | if [ "$want" != "$got" ]; then 340 | log_err "hash_sha256_verify checksum for '$TARGET' did not verify ${want} vs $got" 341 | return 1 342 | fi 343 | } 344 | cat /dev/null <- 40 | {{- .ProjectName }}_{{- .Tag }}- 41 | {{- .Os }}_ 42 | {{- if eq .Arch "amd64" }}x86_64 43 | {{- else if eq .Arch "386" }}i386 44 | {{- else }}{{ .Arch }}{{ end }} 45 | 46 | checksum: 47 | name_template: "{{ .ProjectName }}_{{ .Tag }}-checksums.txt" 48 | 49 | dockers: 50 | - image_templates: 51 | - "ghcr.io/dwisiswant0/{{ .ProjectName }}:{{ .Tag }}" 52 | - "ghcr.io/dwisiswant0/{{ .ProjectName }}:v{{ .Major }}.{{ .Minor }}" 53 | - "ghcr.io/dwisiswant0/{{ .ProjectName }}:v{{ .Major }}" 54 | - "ghcr.io/dwisiswant0/{{ .ProjectName }}:latest" 55 | dockerfile: Dockerfile 56 | use: docker 57 | build_flag_templates: 58 | - "--pull" 59 | - "--label=org.opencontainers.image.created={{ .Date }}" 60 | - "--label=org.opencontainers.image.description={{ .ProjectName }}" 61 | - "--label=org.opencontainers.image.revision={{ .FullCommit }}" 62 | - "--label=org.opencontainers.image.source=https://github.com/dwisiswant0/{{ .ProjectName }}" 63 | - "--label=org.opencontainers.image.title={{ .ProjectName }}" 64 | - "--label=org.opencontainers.image.url=https://github.com/dwisiswant0/{{ .ProjectName }}" 65 | - "--label=org.opencontainers.image.version={{ .Version }}" 66 | 67 | changelog: 68 | sort: asc 69 | filters: 70 | exclude: 71 | - "^build" 72 | - "^chore" 73 | - "^ci" 74 | - "^docs" 75 | - "^refactor" 76 | - "^test" 77 | - Merge pull request 78 | - Merge branch 79 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM scratch 2 | 3 | COPY galer /galer 4 | 5 | ENTRYPOINT ["/galer"] 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 dwisiswant0 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # galer 2 | 3 | [![made-with-Go](https://img.shields.io/badge/made%20with-Go-blue.svg)](http://golang.org) 4 | [![issues](https://img.shields.io/github/issues/dwisiswant0/galer?color=blue)](https://github.com/dwisiswant0/galer/issues) 5 | 6 | ```txt 7 | __ 8 | __ _ _(_ ) __ _ __ 9 | /'_ '\/'_' )| | /'__'( '__) 10 | ( (_) ( (_| || |( ___| | 11 | '\__ '\__,_(___'\____(_) 12 | ( )_) | 13 | \___/' @dwisiswant0 14 | ``` 15 | 16 | A fast tool to fetch URLs from HTML attributes by crawl-in. Inspired by the [@omespino Tweet](https://twitter.com/omespino/status/1318605084989837312), which is possible to extract `src`, `href`, `url` and `action` values by evaluating JavaScript through Chrome DevTools Protocol. 17 | 18 | --- 19 | 20 | ## Resources 21 | 22 | - [Installation](#installation) 23 | - [from Binary](#from-binary) 24 | - [from Source](#from-source) 25 | - [from GitHub](#from-github) 26 | - [Usage](#usage) 27 | - [Basic Usage](#basic-usage) 28 | - [Flags](#flags) 29 | - [Examples](#examples) 30 | - [Single URL](#single-url) 31 | - [URLs from list](#urls-from-list) 32 | - [from Stdin](#from-stdin) 33 | - [Library](#library) 34 | - [TODOs](#todos) 35 | - [Help & Bugs](#help--bugs) 36 | - [License](#license) 37 | - [Version](#version) 38 | - [Acknowledgement](#acknowledgement) 39 | 40 | ## Installation 41 | 42 | ### from Binary 43 | 44 | The installation is easy. You can download a prebuilt binary from [releases page](https://github.com/dwisiswant0/galer/releases), unpack and run! or with 45 | 46 | ```bash 47 | ▶ (sudo) curl -sSfL https://git.io/galer | sh -s -- -b /usr/local/bin 48 | ``` 49 | 50 | ### from Source 51 | 52 | If you have go1.22+ compiler installed and configured: 53 | 54 | ```bash 55 | ▶ go install -v github.com/dwisiswant0/galer@latest 56 | ``` 57 | 58 | ### from GitHub 59 | 60 | ```bash 61 | ▶ git clone https://github.com/dwisiswant0/galer 62 | ▶ cd galer 63 | ▶ go build . 64 | ▶ (sudo) install galer /usr/local/bin 65 | ``` 66 | 67 | ## Usage 68 | 69 | ### Basic Usage 70 | 71 | Simply, galer can be run with: 72 | 73 | ```bash 74 | ▶ galer -u "http://domain.tld" 75 | ``` 76 | 77 | ### Flags 78 | 79 | ![galer](https://user-images.githubusercontent.com/25837540/100824601-0ee53b80-3489-11eb-878d-a58d1ec3489d.jpg) 80 | 81 | This will display help for the tool. Here are all the options it supports. 82 | 83 | ```console 84 | $ galer -h 85 | 86 | __ v0.2.0 87 | __ _ _(_ ) __ _ __ 88 | /'_ '\/'_' )| | /'__'( '__) 89 | ( (_) ( (_| || |( ___| | 90 | '\__ '\__,_(___'\____(_) 91 | ( )_) | 92 | \___/' @dwisiswant0 93 | 94 | A fast tool to fetch URLs from HTML attributes by crawl-in 95 | 96 | Usage: 97 | galer -u [URL|URLs.txt] -o [output.txt] 98 | 99 | Options: 100 | -u, --url Target to fetches (single target URL or list) 101 | -e, --extension Show only certain extensions (comma-separated, e.g. js,php) 102 | -c, --concurrency Concurrency level (default: 50) 103 | -w, --wait Wait N seconds before evaluate (default: 1) 104 | -d, --depth Max. depth for crawling (levels of links to follow) 105 | --same-host Same host only 106 | --same-root Same root (eTLD+1) only (takes precedence over --same-host) 107 | -o, --output Save fetched URLs output into file 108 | -T, --template Format for output template (e.g., "{{scheme}}://{{host}}{{path}}") 109 | Valid variables are: "raw_url", "scheme", "user", "username", 110 | "password", "host", "hostname", "port", "path", "raw_path", 111 | "escaped_path", "raw_query", "fragment", "raw_fragment". 112 | -t, --timeout Max. time (seconds) allowed for connection (default: 60) 113 | -s, --silent Silent mode (suppress an errors) 114 | -v, --verbose Verbose mode show error details unless you weren't use silent 115 | -h, --help Display its helps 116 | ``` 117 | 118 | ### Examples 119 | 120 | #### Single URL 121 | 122 | ```bash 123 | ▶ galer -u "http://domain.tld" 124 | ``` 125 | 126 | #### URLs from list 127 | 128 | ```bash 129 | ▶ galer -u /path/to/urls.txt 130 | ``` 131 | 132 | #### from Stdin 133 | 134 | ```bash 135 | ▶ cat urls.txt | galer 136 | ``` 137 | 138 | In case you want to chained with other tools: 139 | 140 | ```bash 141 | ▶ subfinder -d domain.tld -silent | httpx -silent | galer 142 | ``` 143 | 144 | ### Library 145 | 146 | [![godoc](https://img.shields.io/badge/godoc-reference-blue.svg)](https://godoc.org/github.com/dwisiswant0/galer/pkg/galer) 147 | 148 | You can use **galer** as library. 149 | 150 | ``` 151 | ▶ go get github.com/dwisiswant0/galer/pkg/galer@latest 152 | ``` 153 | 154 | For example: 155 | 156 | ```go 157 | package main 158 | 159 | import ( 160 | "fmt" 161 | 162 | "github.com/dwisiswant0/galer/pkg/galer" 163 | ) 164 | 165 | func main() { 166 | cfg := &galer.Config{ 167 | Timeout: 60, 168 | } 169 | cfg = galer.New(cfg) 170 | 171 | run, err := cfg.Crawl("https://twitter.com") 172 | if err != nil { 173 | panic(err) 174 | } 175 | 176 | for _, url := range run { 177 | fmt.Println(url) 178 | } 179 | } 180 | ``` 181 | 182 | ## TODOs 183 | 184 | - [ ] Enable to set extra HTTP headers 185 | - [ ] Provide randomly User-Agent 186 | - [ ] Bypass headless browser 187 | - [ ] Add exception for specific extensions 188 | 189 | ## Help & Bugs 190 | 191 | [![contributions welcome](https://img.shields.io/badge/contributions-welcome-blue.svg)](https://github.com/dwisiswant0/galer/issues) 192 | 193 | If you are still confused or found a bug, please [open the issue](https://github.com/dwisiswant0/galer/issues). All bug reports are appreciated, some features have not been tested yet due to lack of free time. 194 | 195 | ## Status 196 | 197 | > [!CAUTION] 198 | > galer has NOT reached 1.0 yet. Therefore, this library is currently not supported and does not offer a stable API; use at your own risk. 199 | 200 | There are no guarantees of stability for the APIs in this library, and while they are not expected to change dramatically. API tweaks and bug fixes may occur. 201 | 202 | ## Pronunciation 203 | 204 | `id_ID` • **/gäˈlər/** — kalau _galer_ jangan dicium baunya, langsung cuci tangan, _bego_! 205 | 206 | ## Acknowledgement 207 | 208 | - [Omar Espino](https://twitter.com/omespino) for the idea, that's why this tool was made! 209 | 210 | ### License 211 | 212 | `sebel` is released by **@dwisiswant0** under the MIT license. See [LICENSE](/LICENSE). -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/dwisiswant0/galer 2 | 3 | go 1.23 4 | 5 | toolchain go1.23.1 6 | 7 | require ( 8 | github.com/charmbracelet/log v0.4.0 9 | github.com/chromedp/cdproto v0.0.0-20241022234722-4d5d5faf59fb 10 | github.com/chromedp/chromedp v0.11.1 11 | github.com/logrusorgru/aurora v2.0.3+incompatible 12 | github.com/remeh/sizedwaitgroup v1.0.0 13 | golang.org/x/exp v0.0.0-20241004190924-225e2abe05e6 14 | golang.org/x/net v0.30.0 15 | ) 16 | 17 | require ( 18 | github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect 19 | github.com/charmbracelet/lipgloss v0.10.0 // indirect 20 | github.com/chromedp/sysutil v1.1.0 // indirect 21 | github.com/go-logfmt/logfmt v0.6.0 // indirect 22 | github.com/gobwas/httphead v0.1.0 // indirect 23 | github.com/gobwas/pool v0.2.1 // indirect 24 | github.com/gobwas/ws v1.4.0 // indirect 25 | github.com/josharian/intern v1.0.0 // indirect 26 | github.com/lucasb-eyer/go-colorful v1.2.0 // indirect 27 | github.com/mailru/easyjson v0.7.7 // indirect 28 | github.com/mattn/go-isatty v0.0.18 // indirect 29 | github.com/mattn/go-runewidth v0.0.15 // indirect 30 | github.com/muesli/reflow v0.3.0 // indirect 31 | github.com/muesli/termenv v0.15.2 // indirect 32 | github.com/rivo/uniseg v0.4.7 // indirect 33 | github.com/valyala/bytebufferpool v1.0.0 // indirect 34 | github.com/valyala/fasttemplate v1.2.2 // indirect 35 | golang.org/x/sys v0.26.0 // indirect 36 | ) 37 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k= 2 | github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8= 3 | github.com/charmbracelet/lipgloss v0.10.0 h1:KWeXFSexGcfahHX+54URiZGkBFazf70JNMtwg/AFW3s= 4 | github.com/charmbracelet/lipgloss v0.10.0/go.mod h1:Wig9DSfvANsxqkRsqj6x87irdy123SR4dOXlKa91ciE= 5 | github.com/charmbracelet/log v0.4.0 h1:G9bQAcx8rWA2T3pWvx7YtPTPwgqpk7D68BX21IRW8ZM= 6 | github.com/charmbracelet/log v0.4.0/go.mod h1:63bXt/djrizTec0l11H20t8FDSvA4CRZJ1KH22MdptM= 7 | github.com/chromedp/cdproto v0.0.0-20241022234722-4d5d5faf59fb h1:noKVm2SsG4v0Yd0lHNtFYc9EUxIVvrr4kJ6hM8wvIYU= 8 | github.com/chromedp/cdproto v0.0.0-20241022234722-4d5d5faf59fb/go.mod h1:4XqMl3iIW08jtieURWL6Tt5924w21pxirC6th662XUM= 9 | github.com/chromedp/chromedp v0.11.1 h1:Spca8egFqUlv+JDW+yIs+ijlHlJDPufgrfXPwtq6NMs= 10 | github.com/chromedp/chromedp v0.11.1/go.mod h1:lr8dFRLKsdTTWb75C/Ttol2vnBKOSnt0BW8R9Xaupi8= 11 | github.com/chromedp/sysutil v1.1.0 h1:PUFNv5EcprjqXZD9nJb9b/c9ibAbxiYo4exNWZyipwM= 12 | github.com/chromedp/sysutil v1.1.0/go.mod h1:WiThHUdltqCNKGc4gaU50XgYjwjYIhKWoHGPTUfWTJ8= 13 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 14 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 15 | github.com/go-logfmt/logfmt v0.6.0 h1:wGYYu3uicYdqXVgoYbvnkrPVXkuLM1p1ifugDMEdRi4= 16 | github.com/go-logfmt/logfmt v0.6.0/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= 17 | github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU= 18 | github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM= 19 | github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og= 20 | github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= 21 | github.com/gobwas/ws v1.4.0 h1:CTaoG1tojrh4ucGPcoJFiAQUAsEWekEWvLy7GsVNqGs= 22 | github.com/gobwas/ws v1.4.0/go.mod h1:G3gNqMNtPppf5XUz7O4shetPpcZ1VJ7zt18dlUeakrc= 23 | github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= 24 | github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= 25 | github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 h1:6Yzfa6GP0rIo/kULo2bwGEkFvCePZ3qHDDTC3/J9Swo= 26 | github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs= 27 | github.com/logrusorgru/aurora v2.0.3+incompatible h1:tOpm7WcpBTn4fjmVfgpQq0EfczGlG91VSDkswnjF5A8= 28 | github.com/logrusorgru/aurora v2.0.3+incompatible/go.mod h1:7rIyQOR62GCctdiQpZ/zOJlFyk6y+94wXzv6RNZgaR4= 29 | github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY= 30 | github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= 31 | github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= 32 | github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= 33 | github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp98= 34 | github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= 35 | github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk= 36 | github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= 37 | github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= 38 | github.com/muesli/reflow v0.3.0 h1:IFsN6K9NfGtjeggFP+68I4chLZV2yIKsXJFNZ+eWh6s= 39 | github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8= 40 | github.com/muesli/termenv v0.15.2 h1:GohcuySI0QmI3wN8Ok9PtKGkgkFIk7y6Vpb5PvrY+Wo= 41 | github.com/muesli/termenv v0.15.2/go.mod h1:Epx+iuz8sNs7mNKhxzH4fWXGNpZwUaJKRS1noLXviQ8= 42 | github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde h1:x0TT0RDC7UhAVbbWWBzr41ElhJx5tXPWkIHA2HWPRuw= 43 | github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde/go.mod h1:nZgzbfBr3hhjoZnS66nKrHmduYNpc34ny7RK4z5/HM0= 44 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 45 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 46 | github.com/remeh/sizedwaitgroup v1.0.0 h1:VNGGFwNo/R5+MJBf6yrsr110p0m4/OX4S3DCy7Kyl5E= 47 | github.com/remeh/sizedwaitgroup v1.0.0/go.mod h1:3j2R4OIe/SeS6YDhICBy22RWjJC5eNCJ1V+9+NVNYlo= 48 | github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= 49 | github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= 50 | github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= 51 | github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= 52 | github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= 53 | github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 54 | github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= 55 | github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= 56 | github.com/valyala/fasttemplate v1.2.2 h1:lxLXG0uE3Qnshl9QyaK6XJxMXlQZELvChBOCmQD0Loo= 57 | github.com/valyala/fasttemplate v1.2.2/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ= 58 | golang.org/x/exp v0.0.0-20241004190924-225e2abe05e6 h1:1wqE9dj9NpSm04INVsJhhEUzhuDVjbcyKH91sVyPATw= 59 | golang.org/x/exp v0.0.0-20241004190924-225e2abe05e6/go.mod h1:NQtJDoLvd6faHhE7m4T/1IY708gDefGGjR/iUW8yQQ8= 60 | golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= 61 | golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= 62 | golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 63 | golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= 64 | golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 65 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 66 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 67 | -------------------------------------------------------------------------------- /internal/runner/consts.go: -------------------------------------------------------------------------------- 1 | package runner 2 | 3 | const ( 4 | author = "dwisiswant0" 5 | version = "0.2.0" 6 | banner = ` 7 | __ v` + version + ` 8 | __ _ _(_ ) __ _ __ 9 | /'_ '\/'_' )| | /'__'( '__) 10 | ( (_) ( (_| || |( ___| | 11 | '\__ '\__,_(___'\____(_) 12 | ( )_) | 13 | \___/' @` + author + ` 14 | 15 | ` 16 | help = `A fast tool to fetch URLs from HTML attributes by crawl-in 17 | 18 | Usage: 19 | galer -u [URL|URLs.txt] -o [output.txt] 20 | 21 | Options: 22 | -u, --url Target to fetches (single target URL or list) 23 | -e, --extension Show only certain extensions (comma-separated, e.g. js,php) 24 | -c, --concurrency Concurrency level (default: 50) 25 | -w, --wait Wait N seconds before evaluate (default: 1) 26 | -d, --depth Max. depth for crawling (levels of links to follow) 27 | --same-host Same host only 28 | --same-root Same root (eTLD+1) only (takes precedence over --same-host) 29 | -o, --output Save fetched URLs output into file 30 | -T, --template Format for output template (e.g., "{{scheme}}://{{host}}{{path}}") 31 | Valid variables are: "raw_url", "scheme", "user", "username", 32 | "password", "host", "hostname", "port", "path", "raw_path", 33 | "escaped_path", "raw_query", "fragment", "raw_fragment". 34 | -t, --timeout Max. time (seconds) allowed for connection (default: 60) 35 | -s, --silent Silent mode (suppress an errors) 36 | -v, --verbose Verbose mode show error details unless you weren't use silent 37 | -h, --help Display its helps 38 | 39 | Examples: 40 | galer -u http://domain.tld 41 | galer -u urls.txt -o output.txt 42 | cat urls.txt | galer -o output.txt 43 | 44 | ` 45 | ) 46 | -------------------------------------------------------------------------------- /internal/runner/init.go: -------------------------------------------------------------------------------- 1 | package runner 2 | 3 | import ( 4 | "os" 5 | "time" 6 | 7 | "github.com/charmbracelet/log" 8 | ) 9 | 10 | func init() { 11 | clog = log.NewWithOptions(os.Stderr, log.Options{ 12 | ReportTimestamp: true, 13 | TimeFormat: time.Kitchen, 14 | }) 15 | } 16 | -------------------------------------------------------------------------------- /internal/runner/parser.go: -------------------------------------------------------------------------------- 1 | package runner 2 | 3 | import ( 4 | "bufio" 5 | "flag" 6 | "fmt" 7 | "os" 8 | 9 | "github.com/logrusorgru/aurora" 10 | ) 11 | 12 | // Options will defines its options 13 | type Options struct { 14 | Concurrency int 15 | Depth int 16 | Ext string 17 | File *os.File 18 | List *bufio.Scanner 19 | Output string 20 | SameHost bool 21 | SameRoot bool 22 | Silent bool 23 | Template string 24 | Timeout int 25 | URL string 26 | Verbose bool 27 | Wait int 28 | } 29 | 30 | // Parse user given arguments 31 | func Parse() *Options { 32 | opt = &Options{} 33 | 34 | flag.StringVar(&opt.URL, "url", "", "") 35 | flag.StringVar(&opt.URL, "u", "", "") 36 | 37 | flag.IntVar(&opt.Concurrency, "concurrency", 50, "") 38 | flag.IntVar(&opt.Concurrency, "c", 50, "") 39 | 40 | flag.IntVar(&opt.Wait, "wait", 1, "") 41 | flag.IntVar(&opt.Wait, "w", 1, "") 42 | 43 | flag.IntVar(&opt.Depth, "depth", 1, "") 44 | flag.IntVar(&opt.Depth, "d", 1, "") 45 | 46 | flag.IntVar(&opt.Timeout, "timeout", 60, "") 47 | flag.IntVar(&opt.Timeout, "t", 60, "") 48 | 49 | flag.StringVar(&opt.Ext, "e", "", "") 50 | flag.StringVar(&opt.Ext, "extension", "", "") 51 | 52 | flag.BoolVar(&opt.SameHost, "same-host", false, "") 53 | flag.BoolVar(&opt.SameRoot, "same-root", false, "") 54 | 55 | flag.StringVar(&opt.Output, "output", "", "") 56 | flag.StringVar(&opt.Output, "o", "", "") 57 | 58 | flag.StringVar(&opt.Template, "template", "", "") 59 | flag.StringVar(&opt.Template, "T", "", "") 60 | 61 | flag.BoolVar(&opt.Silent, "silent", false, "") 62 | flag.BoolVar(&opt.Silent, "s", false, "") 63 | 64 | flag.BoolVar(&opt.Verbose, "v", false, "") 65 | flag.BoolVar(&opt.Verbose, "verbose", false, "") 66 | 67 | flag.Usage = func() { 68 | showBanner() 69 | fmt.Fprint(os.Stderr, help) 70 | } 71 | 72 | flag.Parse() 73 | 74 | if !opt.Silent { 75 | showBanner() 76 | } 77 | 78 | if err := opt.validate(); err != nil { 79 | clog.Fatal("could not validate options", "err", err) 80 | } 81 | 82 | return opt 83 | } 84 | 85 | func showBanner() { 86 | fmt.Fprint(os.Stderr, aurora.Bold(aurora.Cyan(banner))) 87 | } 88 | -------------------------------------------------------------------------------- /internal/runner/runner.go: -------------------------------------------------------------------------------- 1 | package runner 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "os" 7 | 8 | "github.com/dwisiswant0/galer/pkg/galer" 9 | "github.com/remeh/sizedwaitgroup" 10 | ) 11 | 12 | type Runner struct { 13 | opt *Options 14 | swg sizedwaitgroup.SizedWaitGroup 15 | urls map[string]bool 16 | galer *galer.Config 17 | } 18 | 19 | // New initialize [Runner] 20 | func New(opt *Options) *Runner { 21 | return &Runner{ 22 | opt: opt, 23 | swg: sizedwaitgroup.New(opt.Concurrency), 24 | urls: make(map[string]bool), 25 | galer: &galer.Config{ 26 | Logger: clog, 27 | SameHost: opt.SameHost, 28 | SameRoot: opt.SameRoot, 29 | Template: opt.Template, 30 | Timeout: opt.Timeout, 31 | Wait: opt.Wait, 32 | }, 33 | } 34 | } 35 | 36 | // Do runs crawling 37 | func (r *Runner) Do() { 38 | jobs := make(chan string) 39 | 40 | for i := 0; i < r.opt.Concurrency; i++ { 41 | r.swg.Add() 42 | go func() { 43 | defer r.swg.Done() 44 | for job := range jobs { 45 | r.galer.SetScope(job) 46 | r.run(job, 1) 47 | } 48 | }() 49 | } 50 | 51 | for r.opt.List.Scan() { 52 | u := r.opt.List.Text() 53 | jobs <- u 54 | } 55 | 56 | close(jobs) 57 | r.swg.Wait() 58 | r.galer.Close() 59 | 60 | if r.opt.File != nil { 61 | r.opt.File.Close() 62 | } 63 | } 64 | 65 | func (r *Runner) run(URL string, counter int) { 66 | cfg := galer.New(r.galer) 67 | 68 | var writer io.Writer = os.Stdout 69 | if r.opt.File != nil { 70 | writer = io.MultiWriter(os.Stdout, r.opt.File) 71 | } 72 | 73 | for counter <= r.opt.Depth { 74 | crawl := r.crawl(URL, cfg) 75 | if len(crawl) == 0 { 76 | break 77 | } 78 | counter++ 79 | 80 | var batches []string 81 | for _, u := range crawl { 82 | if !r.urls[u] { 83 | fmt.Fprintf(writer, "%s\n", u) 84 | batches = append(batches, u) 85 | r.urls[u] = true 86 | } 87 | } 88 | 89 | for _, u := range batches { 90 | if r.opt.Ext != "" { 91 | if !r.opt.isOnExt(u) { 92 | continue 93 | } 94 | } 95 | 96 | if counter <= r.opt.Depth { 97 | r.run(u, counter+1) 98 | } 99 | } 100 | } 101 | } 102 | 103 | func (r *Runner) crawl(URL string, cfg *galer.Config) []string { 104 | res, err := cfg.Crawl(URL) 105 | if err != nil && opt.Verbose { 106 | clog.Error(err, "url", URL) 107 | 108 | return []string{} 109 | } 110 | 111 | return res 112 | } 113 | -------------------------------------------------------------------------------- /internal/runner/validator.go: -------------------------------------------------------------------------------- 1 | package runner 2 | 3 | import ( 4 | "bufio" 5 | "errors" 6 | "os" 7 | "path/filepath" 8 | "strings" 9 | ) 10 | 11 | func (opt *Options) validate() error { 12 | var errFile error 13 | 14 | if isStdin() { 15 | opt.List = bufio.NewScanner(os.Stdin) 16 | } else if opt.URL != "" { 17 | if strings.HasPrefix(opt.URL, "http") { 18 | opt.List = bufio.NewScanner(strings.NewReader(opt.URL)) 19 | } else { 20 | f, err := os.Open(opt.URL) 21 | if err != nil { 22 | return err 23 | } 24 | opt.List = bufio.NewScanner(f) 25 | } 26 | } else { 27 | return errors.New("no target inputs provided") 28 | } 29 | 30 | if opt.Output != "" { 31 | opt.File, errFile = os.OpenFile(opt.Output, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0644) 32 | if errFile != nil { 33 | return errFile 34 | } 35 | } 36 | 37 | return nil 38 | } 39 | 40 | func isStdin() bool { 41 | f, e := os.Stdin.Stat() 42 | if e != nil { 43 | return false 44 | } 45 | 46 | if f.Mode()&os.ModeNamedPipe == 0 { 47 | return false 48 | } 49 | 50 | return true 51 | } 52 | 53 | // func isScope(target string, URL string) bool { 54 | // t, e := url.Parse(target) 55 | // if e != nil { 56 | // return false 57 | // } 58 | 59 | // u, e := url.Parse(URL) 60 | // if e != nil { 61 | // return false 62 | // } 63 | 64 | // return t.Host == u.Host 65 | // } 66 | 67 | func (opt *Options) isOnExt(URL string) bool { 68 | for _, e := range strings.Split(opt.Ext, ",") { 69 | if strings.TrimLeft(filepath.Ext(URL), ".") == e { 70 | return true 71 | } 72 | } 73 | 74 | return false 75 | } 76 | -------------------------------------------------------------------------------- /internal/runner/vars.go: -------------------------------------------------------------------------------- 1 | package runner 2 | 3 | import ( 4 | "github.com/charmbracelet/log" 5 | ) 6 | 7 | var ( 8 | opt *Options 9 | clog *log.Logger 10 | ) 11 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "github.com/dwisiswant0/galer/internal/runner" 4 | 5 | func main() { 6 | options := runner.Parse() 7 | r := runner.New(options) 8 | r.Do() 9 | } 10 | -------------------------------------------------------------------------------- /pkg/galer/galer.go: -------------------------------------------------------------------------------- 1 | package galer 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "net/url" 7 | "strings" 8 | "time" 9 | 10 | "github.com/charmbracelet/log" 11 | "github.com/chromedp/cdproto/network" 12 | "github.com/chromedp/chromedp" 13 | "github.com/valyala/fasttemplate" 14 | "golang.org/x/exp/slices" 15 | "golang.org/x/net/publicsuffix" 16 | ) 17 | 18 | // Config declare its configurations 19 | type Config struct { 20 | Logger *log.Logger 21 | SameHost bool 22 | SameRoot bool 23 | Template string 24 | Timeout int 25 | Wait int 26 | 27 | // Headers network.Headers 28 | ctx context.Context 29 | cancel context.CancelFunc 30 | template *fasttemplate.Template 31 | 32 | scope struct { 33 | hostname, root string 34 | } 35 | } 36 | 37 | // New defines context for the configurations 38 | func New(cfg *Config) *Config { 39 | allocCtx, _ := chromedp.NewExecAllocator(context.Background(), execAllocOpts...) 40 | cfg.ctx, _ = chromedp.NewContext(allocCtx) 41 | cfg.ctx, cfg.cancel = context.WithTimeout(cfg.ctx, time.Duration(cfg.Timeout)*time.Second) 42 | 43 | return cfg 44 | } 45 | 46 | // Crawl to navigate to the URL & dump URLs on it 47 | func (cfg *Config) Crawl(URL string) ([]string, error) { 48 | var res, reqs []string 49 | 50 | if !IsURI(URL) { 51 | return nil, errors.New("cannot parse URL") 52 | } 53 | 54 | // defaulting sleep 55 | if cfg.Wait <= 0 { 56 | cfg.Wait = 1 57 | } 58 | 59 | // defaulting scope (hostname & root) 60 | if cfg.scope.hostname == "" && cfg.scope.root == "" { 61 | u, _ := url.Parse(URL) 62 | cfg.scope.hostname = u.Hostname() 63 | cfg.scope.root, _ = publicsuffix.EffectiveTLDPlusOne(cfg.scope.hostname) 64 | } 65 | 66 | var ctxOpts []chromedp.ContextOption 67 | if cfg.Logger != nil { 68 | ctxOpts = []chromedp.ContextOption{ 69 | chromedp.WithLogf(cfg.Logger.Printf), 70 | chromedp.WithDebugf(cfg.Logger.Debugf), 71 | chromedp.WithErrorf(cfg.Logger.Errorf), 72 | } 73 | } 74 | 75 | ctx, cancel := chromedp.NewContext(cfg.ctx, ctxOpts...) 76 | defer cancel() 77 | 78 | if cfg.Template != "" { 79 | cfg.template = fasttemplate.New(cfg.Template, "{{", "}}") 80 | } 81 | 82 | chromedp.ListenTarget(ctx, func(ev interface{}) { 83 | switch ev := ev.(type) { 84 | case *network.EventRequestWillBeSent: // Outgoing requests 85 | url := ev.Request.URL 86 | if !IsURI(url) { 87 | break 88 | } 89 | 90 | if url == URL { 91 | break 92 | } 93 | 94 | if !slices.Contains(reqs, url) { 95 | reqs = append(reqs, url) 96 | } 97 | } 98 | }) 99 | 100 | err := chromedp.Run(ctx, 101 | chromedp.Navigate(URL), 102 | chromedp.Sleep(1*time.Second), 103 | chromedp.Evaluate(script, &res), 104 | ) 105 | if err != nil { 106 | return nil, err 107 | } 108 | 109 | // template eval 110 | for i, _ := range res { 111 | res[i] = cfg.eval(res[i]) 112 | } 113 | 114 | for i, _ := range reqs { 115 | reqs[i] = cfg.eval(reqs[i]) 116 | } 117 | 118 | res = MergeSlices(res, reqs) 119 | 120 | // filters 121 | switch { 122 | case cfg.SameRoot: 123 | for i := 0; i < len(res); i++ { 124 | r, _ := url.Parse(res[i]) 125 | base, err := publicsuffix.EffectiveTLDPlusOne(r.Hostname()) 126 | if err != nil && cfg.Logger != nil { 127 | cfg.Logger.Error("could not get eTLD+1", "parsed", r.String()) 128 | } 129 | 130 | if !strings.HasSuffix(cfg.scope.root, base) { 131 | res = append(res[:i], res[i+1:]...) 132 | i-- 133 | } 134 | } 135 | case cfg.SameHost: 136 | for i := 0; i < len(res); i++ { 137 | r, _ := url.Parse(res[i]) 138 | if r.Hostname() != cfg.scope.hostname { 139 | res = append(res[:i], res[i+1:]...) 140 | i-- 141 | } 142 | } 143 | } 144 | 145 | return res, nil 146 | } 147 | 148 | func (cfg *Config) Close() error { 149 | cfg.cancel() 150 | 151 | return chromedp.Cancel(cfg.ctx) 152 | } 153 | -------------------------------------------------------------------------------- /pkg/galer/util.go: -------------------------------------------------------------------------------- 1 | package galer 2 | 3 | import ( 4 | "errors" 5 | "net/url" 6 | 7 | "golang.org/x/net/publicsuffix" 8 | ) 9 | 10 | const script = "[...new Set(Array.from(document.querySelectorAll('[src],[href],[url],[action]')).map(i => i.src || i.href || i.url || i.action))]" 11 | 12 | // IsURI detect valid URI 13 | func IsURI(s string) bool { 14 | _, e := url.ParseRequestURI(s) 15 | if e != nil { 16 | return false 17 | } 18 | 19 | u, e := url.Parse(s) 20 | if e != nil || u.Scheme == "" || u.Host == "" { 21 | return false 22 | } 23 | 24 | return true 25 | } 26 | 27 | // MergeSlices merges two slices of the same type into a 28 | // single slice, removing duplicates. 29 | func MergeSlices[T1 comparable, T2 []T1](v1, v2 T2) T2 { 30 | uniq := make(map[T1]struct{}) 31 | for _, v := range v1 { 32 | uniq[v] = struct{}{} 33 | } 34 | 35 | for v := range uniq { 36 | v2 = append(v2, v) 37 | } 38 | 39 | return v2 40 | } 41 | 42 | // SetScope sets the host and root (eTLD+1) for config. 43 | func (cfg *Config) SetScope(s string) { 44 | if u, err := url.Parse(s); err == nil { 45 | cfg.scope.hostname = u.Hostname() 46 | cfg.scope.root, _ = publicsuffix.EffectiveTLDPlusOne(u.Hostname()) 47 | } 48 | } 49 | 50 | func (cfg *Config) eval(s string) string { 51 | u, err := url.Parse(s) 52 | if err != nil && cfg.Logger != nil { 53 | cfg.Logger.Errorf("cannot eval %q URL with %q as template: %+v", s, cfg.Template, errors.Unwrap(err)) 54 | return s 55 | } 56 | 57 | if cfg.template == nil { 58 | return s 59 | } 60 | 61 | password, _ := u.User.Password() 62 | tags := map[string]interface{}{ 63 | "raw_url": u.String(), 64 | "scheme": u.Scheme, 65 | "user": u.User.String(), 66 | "username": u.User.Username(), 67 | "password": password, 68 | "host": u.Host, 69 | "hostname": u.Hostname(), 70 | "port": u.Port(), 71 | "path": u.Path, 72 | "raw_path": u.RawPath, 73 | "escaped_path": u.EscapedPath(), 74 | "raw_query": u.RawQuery, 75 | "fragment": u.Fragment, 76 | "raw_fragment": u.RawFragment, 77 | } 78 | 79 | return cfg.template.ExecuteString(tags) 80 | } 81 | -------------------------------------------------------------------------------- /pkg/galer/vars.go: -------------------------------------------------------------------------------- 1 | package galer 2 | 3 | import "github.com/chromedp/chromedp" 4 | 5 | var execAllocOpts = append( 6 | chromedp.DefaultExecAllocatorOptions[:], 7 | chromedp.DisableGPU, 8 | chromedp.IgnoreCertErrors, 9 | // chromedp.Flag("headless", false), 10 | ) 11 | --------------------------------------------------------------------------------