├── README.md
├── clients.go
├── main.go
├── main_test.go
└── ratelimit.go


/README.md:
--------------------------------------------------------------------------------
 1 | # gahttp
 2 | 
 3 | Async/concurrent HTTP requests for Go with rate-limiting.
 4 | 
 5 | Work in progress.
 6 | 
 7 | 
 8 | ## Example
 9 | 
10 | ```golang
11 | package main
12 | 
13 | import (
14 |     "fmt"
15 |     "net/http"
16 |     "time"
17 | 
18 |     "github.com/tomnomnom/gahttp"
19 | )
20 | 
21 | func printStatus(req *http.Request, resp *http.Response, err error) {
22 |     if err != nil {
23 |         return
24 |     }
25 |     fmt.Printf("%s: %s\n", req.URL, resp.Status)
26 | }
27 | 
28 | func main() {
29 |     p := gahttp.NewPipeline()
30 |     p.SetConcurrency(20)
31 |     p.SetRateLimit(time.Second * 1)
32 | 
33 |     urls := []string{
34 |         "http://example.com",
35 |         "http://example.com",
36 |         "http://example.com",
37 |         "http://example.net",
38 |         "http://example.org",
39 |     }
40 | 
41 |     for _, u := range urls {
42 |         p.Get(u, gahttp.Wrap(printStatus, gahttp.CloseBody))
43 |     }
44 |     p.Done()
45 | 
46 |     p.Wait()
47 | }
48 | ```
49 | 
50 | ## TODO
51 | 
52 | * `DoneAndWait()` func?
53 | * Helper for writing responses to channel? (e.g. `func ChanWriter() (chan *Response, procFn)`)
54 |     - For when you don't want to do the work concurrently
55 | * Actually handle timeouts / provide context interface for cancellation etc?
56 | 


--------------------------------------------------------------------------------
/clients.go:
--------------------------------------------------------------------------------
 1 | package gahttp
 2 | 
 3 | import (
 4 | 	"crypto/tls"
 5 | 	"net/http"
 6 | 	"time"
 7 | )
 8 | 
 9 | // NewDefaultClient returns the default HTTP client
10 | func NewDefaultClient() *http.Client {
11 | 	return &http.Client{
12 | 		Timeout: time.Second * 30,
13 | 	}
14 | }
15 | 
16 | // ClientOptions are a bitmask of options for HTTP clients
17 | type ClientOptions int
18 | 
19 | const (
20 | 	// Don't follow redirects
21 | 	NoRedirects ClientOptions = 1 << iota
22 | 
23 | 	// Skip verification of TLS certificates
24 | 	SkipVerify
25 | )
26 | 
27 | // NewClient returns a new client with the specified options
28 | func NewClient(opts ClientOptions) *http.Client {
29 | 
30 | 	transport := &http.Transport{}
31 | 
32 | 	if opts&SkipVerify > 0 {
33 | 		transport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
34 | 	}
35 | 
36 | 	client := &http.Client{
37 | 		Transport: transport,
38 | 		Timeout:   time.Second * 30,
39 | 	}
40 | 
41 | 	if opts&NoRedirects > 0 {
42 | 		client.CheckRedirect = func(req *http.Request, via []*http.Request) error {
43 | 			return http.ErrUseLastResponse
44 | 		}
45 | 	}
46 | 
47 | 	return client
48 | }
49 | 


--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
  1 | package gahttp
  2 | 
  3 | import (
  4 | 	"io"
  5 | 	"net/http"
  6 | 	"sync"
  7 | 	"time"
  8 | )
  9 | 
 10 | // a ProcFn is a function that processes an HTTP response.
 11 | // The HTTP request is provided for context, along with any
 12 | // error that occurred.
 13 | type ProcFn func(*http.Request, *http.Response, error)
 14 | 
 15 | // a request wraps a Go HTTP request struct, and a ProcFn
 16 | // to process its result
 17 | type request struct {
 18 | 	req *http.Request
 19 | 	fn  ProcFn
 20 | }
 21 | 
 22 | // a Pipeline is the main component of the gahttp package.
 23 | // It orchestrates making requests, optionally rate limiting them
 24 | type Pipeline struct {
 25 | 	concurrency int
 26 | 
 27 | 	client *http.Client
 28 | 	reqs   chan request
 29 | 
 30 | 	running bool
 31 | 	wg      sync.WaitGroup
 32 | 
 33 | 	rl          *rateLimiter
 34 | 	rateLimited bool
 35 | }
 36 | 
 37 | // New returns a new *Pipeline for the provided concurrency level
 38 | func NewPipeline() *Pipeline {
 39 | 	return &Pipeline{
 40 | 		concurrency: 1,
 41 | 
 42 | 		client: NewDefaultClient(),
 43 | 		reqs:   make(chan request),
 44 | 
 45 | 		running: false,
 46 | 
 47 | 		rl:          newRateLimiter(0),
 48 | 		rateLimited: false,
 49 | 	}
 50 | }
 51 | 
 52 | // NewWithClient returns a new *Pipeline for the provided concurrency
 53 | // level, and uses the provided *http.Client to make requests
 54 | func NewPipelineWithClient(client *http.Client) *Pipeline {
 55 | 	p := NewPipeline()
 56 | 	p.client = client
 57 | 	return p
 58 | }
 59 | 
 60 | // SetRateLimit sets the delay between requests to a given hostname
 61 | func (p *Pipeline) SetRateLimit(d time.Duration) {
 62 | 	if p.running {
 63 | 		return
 64 | 	}
 65 | 
 66 | 	if d == 0 {
 67 | 		p.rateLimited = false
 68 | 	} else {
 69 | 		p.rateLimited = true
 70 | 	}
 71 | 
 72 | 	p.rl.delay = d
 73 | }
 74 | 
 75 | // SetRateLimitMillis sets the delay between request to a given hostname
 76 | // in milliseconds. This function is provided as a convenience, to make
 77 | // it easy to accept integer values as command line arguments.
 78 | func (p *Pipeline) SetRateLimitMillis(m int) {
 79 | 	p.SetRateLimit(time.Duration(m * 1000000))
 80 | }
 81 | 
 82 | // SetClient sets the HTTP client used by the pipeline to make HTTP
 83 | // requests. It can only be set before the pipeline is running
 84 | func (p *Pipeline) SetClient(c *http.Client) {
 85 | 	if p.running {
 86 | 		return
 87 | 	}
 88 | 	p.client = c
 89 | }
 90 | 
 91 | // SetConcurrency sets the concurrency level for the pipeline.
 92 | // It can only be set before the pipeline is running
 93 | func (p *Pipeline) SetConcurrency(c int) {
 94 | 	if p.running {
 95 | 		return
 96 | 	}
 97 | 	p.concurrency = c
 98 | }
 99 | 
100 | // Do is the pipeline's generic request function; similar to
101 | // http.DefaultClient.Do(), but it also accepts a ProcFn which
102 | // will be called when the request has been executed
103 | func (p *Pipeline) Do(r *http.Request, fn ProcFn) {
104 | 	if !p.running {
105 | 		p.Run()
106 | 	}
107 | 
108 | 	// If you're doing a lot of requests to lots of
109 | 	// different hosts, having the underlying TCP
110 | 	// connections stay open can cause you to run
111 | 	// out of file descriptors pretty quickly. To
112 | 	// help prevent that, forcibly set all requests
113 | 	// to have 'Connection: close' set. This should
114 | 	// probably be made configurable, but even then
115 | 	// should still be turned on by default.
116 | 	r.Close = true
117 | 
118 | 	p.reqs <- request{r, fn}
119 | }
120 | 
121 | // Get is a convenience wrapper around the Do() function for making
122 | // HTTP GET requests. It accepts a URL and the ProcFn to process
123 | // the response.
124 | func (p *Pipeline) Get(u string, fn ProcFn) error {
125 | 	req, err := http.NewRequest("GET", u, nil)
126 | 	if err != nil {
127 | 		return err
128 | 	}
129 | 	p.Do(req, fn)
130 | 	return nil
131 | }
132 | 
133 | // Post is a convenience wrapper around the Do() function for making
134 | // HTTP POST requests. It accepts a URL, an io.Reader for the POST
135 | // body, and a ProcFn to process the response.
136 | func (p *Pipeline) Post(u string, body io.Reader, fn ProcFn) error {
137 | 	req, err := http.NewRequest("GET", u, body)
138 | 	if err != nil {
139 | 		return err
140 | 	}
141 | 	p.Do(req, fn)
142 | 	return nil
143 | }
144 | 
145 | // Done should be called to signal to the pipeline that all requests
146 | // that will be made have been enqueued. This closes the internal
147 | // channel used to send requests to the workers that are executing
148 | // the HTTP requests.
149 | func (p *Pipeline) Done() {
150 | 	close(p.reqs)
151 | }
152 | 
153 | // Run puts the pipeline into a running state. It launches the
154 | // worker processes that execute the HTTP requests. Run() is
155 | // called automatically by Do(), Get() and Post(), so it's often
156 | // not necessary to call it directly.
157 | func (p *Pipeline) Run() {
158 | 	if p.running {
159 | 		return
160 | 	}
161 | 	p.running = true
162 | 
163 | 	// launch workers
164 | 	for i := 0; i < p.concurrency; i++ {
165 | 		p.wg.Add(1)
166 | 		go func() {
167 | 			for r := range p.reqs {
168 | 				if p.rateLimited {
169 | 					p.rl.Block(r.req.URL.Hostname())
170 | 				}
171 | 
172 | 				resp, err := p.client.Do(r.req)
173 | 				r.fn(r.req, resp, err)
174 | 			}
175 | 			p.wg.Done()
176 | 		}()
177 | 	}
178 | }
179 | 
180 | // Wait blocks until all requests in the pipeline have been executed
181 | func (p *Pipeline) Wait() {
182 | 	p.wg.Wait()
183 | }
184 | 
185 | // CloseBody wraps a ProcFn and returns a version of it that automatically
186 | // closed the response body
187 | func CloseBody(fn ProcFn) ProcFn {
188 | 	return func(req *http.Request, resp *http.Response, err error) {
189 | 		fn(req, resp, err)
190 | 
191 | 		if resp == nil {
192 | 			return
193 | 		}
194 | 		if resp.Body != nil {
195 | 			resp.Body.Close()
196 | 		}
197 | 	}
198 | }
199 | 
200 | // IfNoError only calls the provided ProcFn if there was no error
201 | // when executing the HTTP request
202 | func IfNoError(fn ProcFn) ProcFn {
203 | 	return func(req *http.Request, resp *http.Response, err error) {
204 | 		if err == nil {
205 | 			fn(req, resp, err)
206 | 			return
207 | 		}
208 | 
209 | 		// because control isn't passed to the user's
210 | 		// function, when there's an error we need to
211 | 		// check for and close the response body
212 | 		if resp == nil {
213 | 			return
214 | 		}
215 | 		if resp.Body != nil {
216 | 			resp.Body.Close()
217 | 		}
218 | 	}
219 | }
220 | 
221 | // Wrap accepts a ProcFn and wraps it in any number of 'middleware'
222 | // functions (e.g. the CloseBody function).
223 | func Wrap(fn ProcFn, middleware ...func(ProcFn) ProcFn) ProcFn {
224 | 	for _, m := range middleware {
225 | 		fn = m(fn)
226 | 	}
227 | 	return fn
228 | }
229 | 


--------------------------------------------------------------------------------
/main_test.go:
--------------------------------------------------------------------------------
 1 | package gahttp
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"io/ioutil"
 6 | 	"net/http"
 7 | 	"net/http/httptest"
 8 | 	"strings"
 9 | 	"testing"
10 | )
11 | 
12 | func TestSmoke(t *testing.T) {
13 | 	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
14 | 		fmt.Fprintln(w, "a response")
15 | 	}))
16 | 	defer ts.Close()
17 | 
18 | 	p := NewPipeline()
19 | 
20 | 	p.Get(ts.URL, func(req *http.Request, resp *http.Response, err error) {
21 | 		if err != nil {
22 | 			t.Fatalf("want non-nil error passed to fn; have %s", err)
23 | 		}
24 | 
25 | 		if resp == nil {
26 | 			t.Fatalf("resp should not be nil")
27 | 		}
28 | 
29 | 		if resp.Body == nil {
30 | 			t.Fatalf("resp body should not be nil")
31 | 		}
32 | 
33 | 		b, err := ioutil.ReadAll(resp.Body)
34 | 		if err != nil {
35 | 			t.Fatalf("should have no error reading from body")
36 | 		}
37 | 
38 | 		if strings.TrimSpace(string(b)) != "a response" {
39 | 			t.Errorf("want 'a response' read from resp.Body; have '%s'", b)
40 | 		}
41 | 	})
42 | 	p.Done()
43 | 	p.Wait()
44 | }
45 | 


--------------------------------------------------------------------------------
/ratelimit.go:
--------------------------------------------------------------------------------
 1 | package gahttp
 2 | 
 3 | import (
 4 | 	"sync"
 5 | 	"time"
 6 | )
 7 | 
 8 | // a rateLimiter allows you to delay operations
 9 | // on a per-key basis. I.e. only one operation for
10 | // a given key can be done within the delay time
11 | type rateLimiter struct {
12 | 	sync.Mutex
13 | 	delay time.Duration
14 | 	ops   map[string]time.Time
15 | }
16 | 
17 | // newRateLimiter returns a new *rateLimiter for the
18 | // provided delay
19 | func newRateLimiter(delay time.Duration) *rateLimiter {
20 | 	return &rateLimiter{
21 | 		delay: delay,
22 | 		ops:   make(map[string]time.Time),
23 | 	}
24 | }
25 | 
26 | // Block blocks until an operation for key is
27 | // allowed to proceed
28 | func (r *rateLimiter) Block(key string) {
29 | 	now := time.Now()
30 | 
31 | 	r.Lock()
32 | 
33 | 	// if there's nothing in the map we can
34 | 	// return straight away
35 | 	if _, ok := r.ops[key]; !ok {
36 | 		r.ops[key] = now
37 | 		r.Unlock()
38 | 		return
39 | 	}
40 | 
41 | 	// if time is up we can return straight away
42 | 	t := r.ops[key]
43 | 	deadline := t.Add(r.delay)
44 | 	if now.After(deadline) {
45 | 		r.ops[key] = now
46 | 		r.Unlock()
47 | 		return
48 | 	}
49 | 
50 | 	remaining := deadline.Sub(now)
51 | 
52 | 	// Set the time of the operation
53 | 	r.ops[key] = now.Add(remaining)
54 | 	r.Unlock()
55 | 
56 | 	// Block for the remaining time
57 | 	<-time.After(remaining)
58 | }
59 | 


--------------------------------------------------------------------------------