├── .gitignore ├── ratelimit.go ├── script └── release ├── README.mkd └── main.go /.gitignore: -------------------------------------------------------------------------------- 1 | concurl 2 | *.tgz 3 | *.exe 4 | *.zip 5 | -------------------------------------------------------------------------------- /ratelimit.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "sync" 5 | "time" 6 | ) 7 | 8 | // a rateLimiter allows you to delay operations 9 | // on a per-key basis. I.e. only one operation for 10 | // a given key can be done within the delay time 11 | type rateLimiter struct { 12 | sync.Mutex 13 | delay time.Duration 14 | ops map[string]time.Time 15 | } 16 | 17 | // newRateLimiter returns a new *rateLimiter for the 18 | // provided delay 19 | func newRateLimiter(delay time.Duration) *rateLimiter { 20 | return &rateLimiter{ 21 | delay: delay, 22 | ops: make(map[string]time.Time), 23 | } 24 | } 25 | 26 | // Block blocks until an operation for key is 27 | // allowed to proceed 28 | func (r *rateLimiter) Block(key string) { 29 | now := time.Now() 30 | 31 | r.Lock() 32 | 33 | // if there's nothing in the map we can 34 | // return straight away 35 | if _, ok := r.ops[key]; !ok { 36 | r.ops[key] = now 37 | r.Unlock() 38 | return 39 | } 40 | 41 | // if time is up we can return straight away 42 | t := r.ops[key] 43 | deadline := t.Add(r.delay) 44 | if now.After(deadline) { 45 | r.ops[key] = now 46 | r.Unlock() 47 | return 48 | } 49 | 50 | remaining := deadline.Sub(now) 51 | 52 | // Set the time of the operation 53 | r.ops[key] = now.Add(remaining) 54 | r.Unlock() 55 | 56 | // Block for the remaining time 57 | <-time.After(remaining) 58 | } 59 | -------------------------------------------------------------------------------- /script/release: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | PROJDIR=$(cd `dirname $0`/.. && pwd) 3 | 4 | VERSION="${1}" 5 | TAG="v${VERSION}" 6 | USER="tomnomnom" 7 | REPO="concurl" 8 | BINARY="${REPO}" 9 | 10 | if [[ -z "${VERSION}" ]]; then 11 | echo "Usage: ${0} " 12 | exit 1 13 | fi 14 | 15 | if [[ -z "${GITHUB_TOKEN}" ]]; then 16 | echo "You forgot to set your GITHUB_TOKEN" 17 | exit 2 18 | fi 19 | 20 | cd ${PROJDIR} 21 | 22 | # Run the tests 23 | go test 24 | if [ $? -ne 0 ]; then 25 | echo "Tests failed. Aborting." 26 | exit 3 27 | fi 28 | 29 | # Check if tag exists 30 | git fetch --tags 31 | git tag | grep "^${TAG}$" 32 | 33 | if [ $? -ne 0 ]; then 34 | github-release release \ 35 | --user ${USER} \ 36 | --repo ${REPO} \ 37 | --tag ${TAG} \ 38 | --name "${REPO} ${TAG}" \ 39 | --description "${TAG}" \ 40 | --pre-release 41 | fi 42 | 43 | 44 | for ARCH in "amd64" "386"; do 45 | for OS in "darwin" "linux" "windows" "freebsd"; do 46 | 47 | BINFILE="${BINARY}" 48 | 49 | if [[ "${OS}" == "windows" ]]; then 50 | BINFILE="${BINFILE}.exe" 51 | fi 52 | 53 | rm -f ${BINFILE} 54 | 55 | GOOS=${OS} GOARCH=${ARCH} go build github.com/${USER}/${REPO} 56 | 57 | if [[ "${OS}" == "windows" ]]; then 58 | ARCHIVE="${BINARY}-${OS}-${ARCH}-${VERSION}.zip" 59 | zip ${ARCHIVE} ${BINFILE} 60 | else 61 | ARCHIVE="${BINARY}-${OS}-${ARCH}-${VERSION}.tgz" 62 | tar --create --gzip --file=${ARCHIVE} ${BINFILE} 63 | fi 64 | 65 | echo "Uploading ${ARCHIVE}..." 66 | github-release upload \ 67 | --user ${USER} \ 68 | --repo ${REPO} \ 69 | --tag ${TAG} \ 70 | --name "${ARCHIVE}" \ 71 | --file ${PROJDIR}/${ARCHIVE} 72 | done 73 | done 74 | 75 | -------------------------------------------------------------------------------- /README.mkd: -------------------------------------------------------------------------------- 1 | # concurl 2 | 3 | Concurrently request URLs provided on `stdin` using the `curl` command line utility (with per-domain rate limiting). 4 | 5 | ## Install 6 | 7 | With Go: 8 | 9 | ``` 10 | ▶ go get -u github.com/tomnomnom/concurl 11 | ``` 12 | 13 | Or [download a release for your platform](https://github.com/tomnomnom/concurl/releases). 14 | 15 | ## Usage 16 | 17 | Basic usage: 18 | 19 | ``` 20 | ▶ cat urls.txt 21 | https://example.com/path?one=1&two=2 22 | https://example.com/pathtwo?two=2&one=1 23 | https://example.net/a/path?two=2&one=1 24 | 25 | ▶ cat urls.txt | concurl -c 3 26 | out/example.com/6ad33f150c6a17b4d51bb3a5425036160e18643c https://example.com/path?one=1&two=2 27 | out/example.net/33ce069e645b0cb190ef0205af9200ae53b57e53 https://example.net/a/path?two=2&one=1 28 | out/example.com/5657622dd56a6c64da72459132d576a8f89576e2 https://example.com/pathtwo?two=2&one=1 29 | 30 | ▶ head -n 7 out/example.net/33ce069e645b0cb190ef0205af9200ae53b57e53 31 | cmd: curl --silent https://example.net/a/path?two=2&one=1 32 | ------ 33 | 34 | 35 | 36 | 37 | Example Domain 38 | ``` 39 | 40 | ### Curl Options 41 | 42 | Supply options to the `curl` command after a `--`: 43 | 44 | ``` 45 | ▶ echo "https://httpbin.org/anything" | concurl -c 5 -- -H'User-Agent: concurl' -H'X-Foo: bar' 46 | out/httpbin.org/391256f9956ce947c3bcb9af616fe0725a35ff4e https://httpbin.org/anything 47 | 48 | ▶ cat out/httpbin.org/391256f9956ce947c3bcb9af616fe0725a35ff4e 49 | cmd: curl --silent https://httpbin.org/anything -HUser-Agent: concurl -HX-Foo: bar 50 | ------ 51 | 52 | { 53 | "args": {}, 54 | "data": "", 55 | "files": {}, 56 | "form": {}, 57 | "headers": { 58 | "Accept": "*/*", 59 | "Connection": "close", 60 | "Host": "httpbin.org", 61 | "User-Agent": "concurl", 62 | "X-Foo": "bar" 63 | }, 64 | "json": null, 65 | "method": "GET", 66 | "url": "https://httpbin.org/anything" 67 | } 68 | ``` 69 | 70 | ## Help 71 | 72 | ``` 73 | ▶ concurl -h 74 | Usage of concurl: 75 | -c int 76 | Concurrency level (default 20) 77 | -d int 78 | Delay between requests to the same domain (default 5000) 79 | -o string 80 | Output directory (default "out") 81 | ``` 82 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "crypto/sha1" 7 | "flag" 8 | "fmt" 9 | "io/ioutil" 10 | "net/url" 11 | "os" 12 | "os/exec" 13 | "path" 14 | "path/filepath" 15 | "strings" 16 | "sync" 17 | "time" 18 | ) 19 | 20 | func main() { 21 | var concurrency int 22 | flag.IntVar(&concurrency, "c", 10, "Concurrency level") 23 | 24 | var delay int 25 | flag.IntVar(&delay, "d", 5000, "Milliseconds between requests to the same domain") 26 | 27 | var outputDir string 28 | flag.StringVar(&outputDir, "o", "out", "Output directory") 29 | 30 | var noSave bool 31 | flag.BoolVar(&noSave, "no-save", false, "Don't save responses") 32 | 33 | flag.Parse() 34 | 35 | // channel to send URLs to workers 36 | jobs := make(chan string) 37 | 38 | rl := newRateLimiter(time.Duration(delay * 1000000)) 39 | 40 | var wg sync.WaitGroup 41 | 42 | for i := 0; i < concurrency; i++ { 43 | wg.Add(1) 44 | 45 | go func() { 46 | for u := range jobs { 47 | 48 | // get the domain for use in the path 49 | // and for rate limiting 50 | domain := "unknown" 51 | parsed, err := url.Parse(u) 52 | if err == nil { 53 | domain = parsed.Hostname() 54 | } 55 | 56 | // rate limit requests to the same domain 57 | rl.Block(domain) 58 | 59 | // we need the silent flag to get rid 60 | // of the progress output 61 | args := []string{"--silent", u} 62 | 63 | // pass all the arguments on to curl 64 | args = append(args, flag.Args()...) 65 | cmd := exec.Command("curl", args...) 66 | 67 | out, err := cmd.CombinedOutput() 68 | if err != nil { 69 | fmt.Printf("failed to get output: %s\n", err) 70 | continue 71 | } 72 | 73 | if noSave { 74 | fmt.Println(u) 75 | continue 76 | } 77 | 78 | // use a hash of the URL and the arguments as the filename 79 | filename := fmt.Sprintf("%x", sha1.Sum([]byte(u+strings.Join(args, " ")))) 80 | p := filepath.Join(outputDir, domain, filename) 81 | 82 | if _, err := os.Stat(path.Dir(p)); os.IsNotExist(err) { 83 | err = os.MkdirAll(path.Dir(p), 0755) 84 | if err != nil { 85 | fmt.Printf("failed to create output dir: %s\n", err) 86 | continue 87 | } 88 | } 89 | 90 | // include the command at the top of the output file 91 | buf := &bytes.Buffer{} 92 | buf.WriteString("cmd: curl ") 93 | buf.WriteString(strings.Join(args, " ")) 94 | buf.WriteString("\n------\n\n") 95 | buf.Write(out) 96 | 97 | err = ioutil.WriteFile(p, buf.Bytes(), 0644) 98 | if err != nil { 99 | fmt.Printf("failed to save output: %s\n", err) 100 | continue 101 | } 102 | 103 | fmt.Printf("%s %s\n", p, u) 104 | } 105 | 106 | wg.Done() 107 | }() 108 | } 109 | 110 | sc := bufio.NewScanner(os.Stdin) 111 | for sc.Scan() { 112 | // send each line (a domain) on the jobs channel 113 | jobs <- sc.Text() 114 | } 115 | 116 | close(jobs) 117 | wg.Wait() 118 | } 119 | --------------------------------------------------------------------------------