├── README.md ├── clients.go ├── main.go ├── main_test.go └── ratelimit.go /README.md: -------------------------------------------------------------------------------- 1 | # gahttp 2 | 3 | Async/concurrent HTTP requests for Go with rate-limiting. 4 | 5 | Work in progress. 6 | 7 | 8 | ## Example 9 | 10 | ```golang 11 | package main 12 | 13 | import ( 14 | "fmt" 15 | "net/http" 16 | "time" 17 | 18 | "github.com/tomnomnom/gahttp" 19 | ) 20 | 21 | func printStatus(req *http.Request, resp *http.Response, err error) { 22 | if err != nil { 23 | return 24 | } 25 | fmt.Printf("%s: %s\n", req.URL, resp.Status) 26 | } 27 | 28 | func main() { 29 | p := gahttp.NewPipeline() 30 | p.SetConcurrency(20) 31 | p.SetRateLimit(time.Second * 1) 32 | 33 | urls := []string{ 34 | "http://example.com", 35 | "http://example.com", 36 | "http://example.com", 37 | "http://example.net", 38 | "http://example.org", 39 | } 40 | 41 | for _, u := range urls { 42 | p.Get(u, gahttp.Wrap(printStatus, gahttp.CloseBody)) 43 | } 44 | p.Done() 45 | 46 | p.Wait() 47 | } 48 | ``` 49 | 50 | ## TODO 51 | 52 | * `DoneAndWait()` func? 53 | * Helper for writing responses to channel? (e.g. `func ChanWriter() (chan *Response, procFn)`) 54 | - For when you don't want to do the work concurrently 55 | * Actually handle timeouts / provide context interface for cancellation etc? 56 | -------------------------------------------------------------------------------- /clients.go: -------------------------------------------------------------------------------- 1 | package gahttp 2 | 3 | import ( 4 | "crypto/tls" 5 | "net/http" 6 | "time" 7 | ) 8 | 9 | // NewDefaultClient returns the default HTTP client 10 | func NewDefaultClient() *http.Client { 11 | return &http.Client{ 12 | Timeout: time.Second * 30, 13 | } 14 | } 15 | 16 | // ClientOptions are a bitmask of options for HTTP clients 17 | type ClientOptions int 18 | 19 | const ( 20 | // Don't follow redirects 21 | NoRedirects ClientOptions = 1 << iota 22 | 23 | // Skip verification of TLS certificates 24 | SkipVerify 25 | ) 26 | 27 | // NewClient returns a new client with the specified options 28 | func NewClient(opts ClientOptions) *http.Client { 29 | 30 | transport := &http.Transport{} 31 | 32 | if opts&SkipVerify > 0 { 33 | transport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true} 34 | } 35 | 36 | client := &http.Client{ 37 | Transport: transport, 38 | Timeout: time.Second * 30, 39 | } 40 | 41 | if opts&NoRedirects > 0 { 42 | client.CheckRedirect = func(req *http.Request, via []*http.Request) error { 43 | return http.ErrUseLastResponse 44 | } 45 | } 46 | 47 | return client 48 | } 49 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package gahttp 2 | 3 | import ( 4 | "io" 5 | "net/http" 6 | "sync" 7 | "time" 8 | ) 9 | 10 | // a ProcFn is a function that processes an HTTP response. 11 | // The HTTP request is provided for context, along with any 12 | // error that occurred. 13 | type ProcFn func(*http.Request, *http.Response, error) 14 | 15 | // a request wraps a Go HTTP request struct, and a ProcFn 16 | // to process its result 17 | type request struct { 18 | req *http.Request 19 | fn ProcFn 20 | } 21 | 22 | // a Pipeline is the main component of the gahttp package. 23 | // It orchestrates making requests, optionally rate limiting them 24 | type Pipeline struct { 25 | concurrency int 26 | 27 | client *http.Client 28 | reqs chan request 29 | 30 | running bool 31 | wg sync.WaitGroup 32 | 33 | rl *rateLimiter 34 | rateLimited bool 35 | } 36 | 37 | // New returns a new *Pipeline for the provided concurrency level 38 | func NewPipeline() *Pipeline { 39 | return &Pipeline{ 40 | concurrency: 1, 41 | 42 | client: NewDefaultClient(), 43 | reqs: make(chan request), 44 | 45 | running: false, 46 | 47 | rl: newRateLimiter(0), 48 | rateLimited: false, 49 | } 50 | } 51 | 52 | // NewWithClient returns a new *Pipeline for the provided concurrency 53 | // level, and uses the provided *http.Client to make requests 54 | func NewPipelineWithClient(client *http.Client) *Pipeline { 55 | p := NewPipeline() 56 | p.client = client 57 | return p 58 | } 59 | 60 | // SetRateLimit sets the delay between requests to a given hostname 61 | func (p *Pipeline) SetRateLimit(d time.Duration) { 62 | if p.running { 63 | return 64 | } 65 | 66 | if d == 0 { 67 | p.rateLimited = false 68 | } else { 69 | p.rateLimited = true 70 | } 71 | 72 | p.rl.delay = d 73 | } 74 | 75 | // SetRateLimitMillis sets the delay between request to a given hostname 76 | // in milliseconds. This function is provided as a convenience, to make 77 | // it easy to accept integer values as command line arguments. 78 | func (p *Pipeline) SetRateLimitMillis(m int) { 79 | p.SetRateLimit(time.Duration(m * 1000000)) 80 | } 81 | 82 | // SetClient sets the HTTP client used by the pipeline to make HTTP 83 | // requests. It can only be set before the pipeline is running 84 | func (p *Pipeline) SetClient(c *http.Client) { 85 | if p.running { 86 | return 87 | } 88 | p.client = c 89 | } 90 | 91 | // SetConcurrency sets the concurrency level for the pipeline. 92 | // It can only be set before the pipeline is running 93 | func (p *Pipeline) SetConcurrency(c int) { 94 | if p.running { 95 | return 96 | } 97 | p.concurrency = c 98 | } 99 | 100 | // Do is the pipeline's generic request function; similar to 101 | // http.DefaultClient.Do(), but it also accepts a ProcFn which 102 | // will be called when the request has been executed 103 | func (p *Pipeline) Do(r *http.Request, fn ProcFn) { 104 | if !p.running { 105 | p.Run() 106 | } 107 | 108 | // If you're doing a lot of requests to lots of 109 | // different hosts, having the underlying TCP 110 | // connections stay open can cause you to run 111 | // out of file descriptors pretty quickly. To 112 | // help prevent that, forcibly set all requests 113 | // to have 'Connection: close' set. This should 114 | // probably be made configurable, but even then 115 | // should still be turned on by default. 116 | r.Close = true 117 | 118 | p.reqs <- request{r, fn} 119 | } 120 | 121 | // Get is a convenience wrapper around the Do() function for making 122 | // HTTP GET requests. It accepts a URL and the ProcFn to process 123 | // the response. 124 | func (p *Pipeline) Get(u string, fn ProcFn) error { 125 | req, err := http.NewRequest("GET", u, nil) 126 | if err != nil { 127 | return err 128 | } 129 | p.Do(req, fn) 130 | return nil 131 | } 132 | 133 | // Post is a convenience wrapper around the Do() function for making 134 | // HTTP POST requests. It accepts a URL, an io.Reader for the POST 135 | // body, and a ProcFn to process the response. 136 | func (p *Pipeline) Post(u string, body io.Reader, fn ProcFn) error { 137 | req, err := http.NewRequest("GET", u, body) 138 | if err != nil { 139 | return err 140 | } 141 | p.Do(req, fn) 142 | return nil 143 | } 144 | 145 | // Done should be called to signal to the pipeline that all requests 146 | // that will be made have been enqueued. This closes the internal 147 | // channel used to send requests to the workers that are executing 148 | // the HTTP requests. 149 | func (p *Pipeline) Done() { 150 | close(p.reqs) 151 | } 152 | 153 | // Run puts the pipeline into a running state. It launches the 154 | // worker processes that execute the HTTP requests. Run() is 155 | // called automatically by Do(), Get() and Post(), so it's often 156 | // not necessary to call it directly. 157 | func (p *Pipeline) Run() { 158 | if p.running { 159 | return 160 | } 161 | p.running = true 162 | 163 | // launch workers 164 | for i := 0; i < p.concurrency; i++ { 165 | p.wg.Add(1) 166 | go func() { 167 | for r := range p.reqs { 168 | if p.rateLimited { 169 | p.rl.Block(r.req.URL.Hostname()) 170 | } 171 | 172 | resp, err := p.client.Do(r.req) 173 | r.fn(r.req, resp, err) 174 | } 175 | p.wg.Done() 176 | }() 177 | } 178 | } 179 | 180 | // Wait blocks until all requests in the pipeline have been executed 181 | func (p *Pipeline) Wait() { 182 | p.wg.Wait() 183 | } 184 | 185 | // CloseBody wraps a ProcFn and returns a version of it that automatically 186 | // closed the response body 187 | func CloseBody(fn ProcFn) ProcFn { 188 | return func(req *http.Request, resp *http.Response, err error) { 189 | fn(req, resp, err) 190 | 191 | if resp == nil { 192 | return 193 | } 194 | if resp.Body != nil { 195 | resp.Body.Close() 196 | } 197 | } 198 | } 199 | 200 | // IfNoError only calls the provided ProcFn if there was no error 201 | // when executing the HTTP request 202 | func IfNoError(fn ProcFn) ProcFn { 203 | return func(req *http.Request, resp *http.Response, err error) { 204 | if err == nil { 205 | fn(req, resp, err) 206 | return 207 | } 208 | 209 | // because control isn't passed to the user's 210 | // function, when there's an error we need to 211 | // check for and close the response body 212 | if resp == nil { 213 | return 214 | } 215 | if resp.Body != nil { 216 | resp.Body.Close() 217 | } 218 | } 219 | } 220 | 221 | // Wrap accepts a ProcFn and wraps it in any number of 'middleware' 222 | // functions (e.g. the CloseBody function). 223 | func Wrap(fn ProcFn, middleware ...func(ProcFn) ProcFn) ProcFn { 224 | for _, m := range middleware { 225 | fn = m(fn) 226 | } 227 | return fn 228 | } 229 | -------------------------------------------------------------------------------- /main_test.go: -------------------------------------------------------------------------------- 1 | package gahttp 2 | 3 | import ( 4 | "fmt" 5 | "io/ioutil" 6 | "net/http" 7 | "net/http/httptest" 8 | "strings" 9 | "testing" 10 | ) 11 | 12 | func TestSmoke(t *testing.T) { 13 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 14 | fmt.Fprintln(w, "a response") 15 | })) 16 | defer ts.Close() 17 | 18 | p := NewPipeline() 19 | 20 | p.Get(ts.URL, func(req *http.Request, resp *http.Response, err error) { 21 | if err != nil { 22 | t.Fatalf("want non-nil error passed to fn; have %s", err) 23 | } 24 | 25 | if resp == nil { 26 | t.Fatalf("resp should not be nil") 27 | } 28 | 29 | if resp.Body == nil { 30 | t.Fatalf("resp body should not be nil") 31 | } 32 | 33 | b, err := ioutil.ReadAll(resp.Body) 34 | if err != nil { 35 | t.Fatalf("should have no error reading from body") 36 | } 37 | 38 | if strings.TrimSpace(string(b)) != "a response" { 39 | t.Errorf("want 'a response' read from resp.Body; have '%s'", b) 40 | } 41 | }) 42 | p.Done() 43 | p.Wait() 44 | } 45 | -------------------------------------------------------------------------------- /ratelimit.go: -------------------------------------------------------------------------------- 1 | package gahttp 2 | 3 | import ( 4 | "sync" 5 | "time" 6 | ) 7 | 8 | // a rateLimiter allows you to delay operations 9 | // on a per-key basis. I.e. only one operation for 10 | // a given key can be done within the delay time 11 | type rateLimiter struct { 12 | sync.Mutex 13 | delay time.Duration 14 | ops map[string]time.Time 15 | } 16 | 17 | // newRateLimiter returns a new *rateLimiter for the 18 | // provided delay 19 | func newRateLimiter(delay time.Duration) *rateLimiter { 20 | return &rateLimiter{ 21 | delay: delay, 22 | ops: make(map[string]time.Time), 23 | } 24 | } 25 | 26 | // Block blocks until an operation for key is 27 | // allowed to proceed 28 | func (r *rateLimiter) Block(key string) { 29 | now := time.Now() 30 | 31 | r.Lock() 32 | 33 | // if there's nothing in the map we can 34 | // return straight away 35 | if _, ok := r.ops[key]; !ok { 36 | r.ops[key] = now 37 | r.Unlock() 38 | return 39 | } 40 | 41 | // if time is up we can return straight away 42 | t := r.ops[key] 43 | deadline := t.Add(r.delay) 44 | if now.After(deadline) { 45 | r.ops[key] = now 46 | r.Unlock() 47 | return 48 | } 49 | 50 | remaining := deadline.Sub(now) 51 | 52 | // Set the time of the operation 53 | r.ops[key] = now.Add(remaining) 54 | r.Unlock() 55 | 56 | // Block for the remaining time 57 | <-time.After(remaining) 58 | } 59 | --------------------------------------------------------------------------------