├── .travis.yml ├── .gitignore ├── go.mod ├── gof3r ├── Readme.md ├── options_test.go ├── rm.go ├── put.go ├── get.go ├── main_test.go ├── cp_test.go ├── cp.go ├── main.go └── options.go ├── go.sum ├── internal ├── s3client │ ├── error.go │ └── s3client.go └── pool │ ├── pool_test.go │ └── pool.go ├── util.go ├── LICENSE.txt ├── http_client.go ├── getter_test.go ├── auth_test.go ├── list_objects_test.go ├── auth.go ├── delete_multiple.go ├── sign_test.go ├── sign.go ├── README.md ├── list_objects.go ├── getter.go ├── s3gof3r.go ├── putter.go └── s3gof3r_test.go /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | go: 3 | - 1.5.1 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.mprof 2 | *.out 3 | .env 4 | gof3r/gof3r 5 | 6 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/github/s3gof3r 2 | 3 | go 1.19 4 | 5 | require ( 6 | github.com/jessevdk/go-flags v1.5.0 7 | golang.org/x/sync v0.3.0 8 | ) 9 | 10 | require golang.org/x/sys v0.12.0 // indirect 11 | -------------------------------------------------------------------------------- /gof3r/Readme.md: -------------------------------------------------------------------------------- 1 | #gof3r# 2 | 3 | gof3r is a command-line interface for s3gof3r: fast, concurrent, streaming access to Amazon S3. 4 | 5 | ###Documentation### 6 | 7 | ####[MAN PAGE](http://randallmcpherson.com/gof3r.html)#### 8 | 9 | ####Additional documentation: [http://godoc.org/github.com/rlmcpherson/s3gof3r/gof3r](http://godoc.org/github.com/rlmcpherson/s3gof3r/gof3r)#### 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/jessevdk/go-flags v1.5.0 h1:1jKYvbxEjfUl0fmqTCOfonvskHHXMjBySTLW4y9LFvc= 2 | github.com/jessevdk/go-flags v1.5.0/go.mod h1:Fw0T6WPc1dYxT4mKEZRfG5kJhaTDP9pj1c2EWnYs/m4= 3 | golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= 4 | golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= 5 | golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 6 | golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o= 7 | golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 8 | -------------------------------------------------------------------------------- /gof3r/options_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | "net/http" 6 | "os" 7 | "os/user" 8 | "reflect" 9 | "testing" 10 | ) 11 | 12 | func TestHomeDir(t *testing.T) { 13 | hs := os.Getenv("HOME") 14 | defer os.Setenv("HOME", hs) 15 | 16 | u, err := user.Current() 17 | if err != nil { 18 | t.Fatal(err) 19 | } 20 | thdir := u.HomeDir 21 | 22 | if err := os.Setenv("HOME", ""); err != nil { 23 | t.Fatal(err) 24 | } 25 | hdir, err := homeDir() 26 | if err != nil { 27 | t.Fatal(err) 28 | } 29 | if hdir != thdir { 30 | t.Errorf("expected %s\n actual%s\n", thdir, hdir) 31 | } 32 | 33 | } 34 | 35 | func TestACL(t *testing.T) { 36 | h2 := http.Header{"X-Amz-Acl": []string{"public-read"}} 37 | h3 := ACL(http.Header{}, "public-read") 38 | if !reflect.DeepEqual(h3, h2) { 39 | log.Fatalf("mismatch: %v, %v", h2, h3) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /internal/s3client/error.go: -------------------------------------------------------------------------------- 1 | package s3client 2 | 3 | import ( 4 | "encoding/xml" 5 | "fmt" 6 | "net/http" 7 | ) 8 | 9 | // RespError represents an http error response 10 | // http://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html 11 | type RespError struct { 12 | Code string 13 | Message string 14 | Resource string 15 | RequestID string `xml:"RequestId"` 16 | StatusCode int 17 | } 18 | 19 | // NewRespError returns an error whose contents are based on the 20 | // contents of `r.Body`. It closes `r.Body`. 21 | func NewRespError(r *http.Response) *RespError { 22 | e := new(RespError) 23 | e.StatusCode = r.StatusCode 24 | _ = xml.NewDecoder(r.Body).Decode(e) // parse error from response 25 | _ = r.Body.Close() 26 | return e 27 | } 28 | 29 | func (e *RespError) Error() string { 30 | return fmt.Sprintf( 31 | "%d: %q", 32 | e.StatusCode, 33 | e.Message, 34 | ) 35 | } 36 | -------------------------------------------------------------------------------- /util.go: -------------------------------------------------------------------------------- 1 | package s3gof3r 2 | 3 | import "github.com/github/s3gof3r/internal/s3client" 4 | 5 | // convenience multipliers 6 | const ( 7 | _ = iota 8 | kb int64 = 1 << (10 * iota) 9 | mb 10 | gb 11 | tb 12 | pb 13 | eb 14 | ) 15 | 16 | // Min and Max functions 17 | func min64(a, b int64) int64 { 18 | if a < b { 19 | return a 20 | } 21 | return b 22 | } 23 | 24 | func min(a, b int) int { 25 | if a < b { 26 | return a 27 | } 28 | return b 29 | } 30 | 31 | func max(a, b int) int { 32 | if a > b { 33 | return a 34 | } 35 | return b 36 | } 37 | 38 | func max64(a, b int64) int64 { 39 | if a > b { 40 | return a 41 | } 42 | return b 43 | } 44 | 45 | type bufferPoolLogger struct{} 46 | 47 | func (l bufferPoolLogger) Printf(format string, a ...interface{}) { 48 | logger.debugPrintf(format, a...) 49 | } 50 | 51 | func StatusFromError(err error) (int, error) { 52 | if e, ok := err.(*s3client.RespError); ok { 53 | return e.StatusCode, nil 54 | } else { 55 | return 0, e 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (C) 2013 Randall McPherson 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a 4 | copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included 12 | in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /gof3r/rm.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "net/url" 7 | "os" 8 | 9 | "github.com/github/s3gof3r" 10 | "github.com/jessevdk/go-flags" 11 | ) 12 | 13 | type RmOpts struct { 14 | CommonOpts 15 | VersionID string `short:"v" long:"versionId" description:"version ID of the object version to delete" no-ini:"true"` 16 | } 17 | 18 | func (rm *RmOpts) Usage() string { 19 | return " [rm-OPTIONS]" 20 | } 21 | 22 | func (rm *RmOpts) Execute(args []string) error { 23 | 24 | k, err := getAWSKeys() 25 | if err != nil { 26 | return err 27 | } 28 | 29 | conf := new(s3gof3r.Config) 30 | *conf = *s3gof3r.DefaultConfig 31 | s3 := s3gof3r.New(rm.EndPoint, k) 32 | s3gof3r.SetLogger(os.Stderr, "", log.Ltime, rm.Debug) 33 | 34 | // parse positional cp args 35 | if len(args) != 1 { 36 | return fmt.Errorf("rm: path argument required") 37 | } 38 | 39 | //var urls [1]*url.URL 40 | u, err := url.ParseRequestURI(args[0]) 41 | if err != nil { 42 | return fmt.Errorf("parse error: %s", err) 43 | } 44 | if u.Host != "" && u.Scheme != "s3" { 45 | return fmt.Errorf("parse error: %s", u.String()) 46 | } 47 | return s3.Bucket(u.Host).Delete(u.Path) 48 | } 49 | 50 | func addRmOpts(opts *RmOpts, parser *flags.Parser) { 51 | cmd, err := parser.AddCommand("rm", "delete from S3", "", opts) 52 | if err != nil { 53 | log.Fatal(err) 54 | } 55 | cmd.ArgsRequired = true 56 | } 57 | -------------------------------------------------------------------------------- /http_client.go: -------------------------------------------------------------------------------- 1 | package s3gof3r 2 | 3 | import ( 4 | "net" 5 | "net/http" 6 | "time" 7 | ) 8 | 9 | type deadlineConn struct { 10 | Timeout time.Duration 11 | net.Conn 12 | } 13 | 14 | func (c *deadlineConn) Read(b []byte) (n int, err error) { 15 | if err = c.Conn.SetDeadline(time.Now().Add(c.Timeout)); err != nil { 16 | return 17 | } 18 | return c.Conn.Read(b) 19 | } 20 | 21 | func (c *deadlineConn) Write(b []byte) (n int, err error) { 22 | if err = c.Conn.SetDeadline(time.Now().Add(c.Timeout)); err != nil { 23 | return 24 | } 25 | return c.Conn.Write(b) 26 | } 27 | 28 | // ClientWithTimeout is an http client optimized for high throughput 29 | // to S3, It times out more agressively than the default 30 | // http client in net/http as well as setting deadlines on the TCP connection 31 | func ClientWithTimeout(timeout time.Duration) *http.Client { 32 | transport := &http.Transport{ 33 | Proxy: http.ProxyFromEnvironment, 34 | Dial: func(netw, addr string) (net.Conn, error) { 35 | c, err := net.DialTimeout(netw, addr, timeout) 36 | if err != nil { 37 | return nil, err 38 | } 39 | if tc, ok := c.(*net.TCPConn); ok { 40 | tc.SetKeepAlive(true) 41 | tc.SetKeepAlivePeriod(timeout) 42 | } 43 | return &deadlineConn{timeout, c}, nil 44 | }, 45 | ResponseHeaderTimeout: timeout, 46 | MaxIdleConnsPerHost: 10, 47 | } 48 | return &http.Client{Transport: transport} 49 | } 50 | -------------------------------------------------------------------------------- /internal/pool/pool_test.go: -------------------------------------------------------------------------------- 1 | package pool 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "strings" 7 | "sync" 8 | "testing" 9 | "time" 10 | ) 11 | 12 | type testLogger struct { 13 | lock sync.Mutex 14 | buf bytes.Buffer 15 | } 16 | 17 | func (logger *testLogger) Printf(format string, a ...interface{}) { 18 | logger.lock.Lock() 19 | defer logger.lock.Unlock() 20 | 21 | fmt.Fprintf(&logger.buf, format, a...) 22 | } 23 | 24 | func (logger *testLogger) String() string { 25 | logger.lock.Lock() 26 | defer logger.lock.Unlock() 27 | 28 | return logger.buf.String() 29 | } 30 | 31 | func TestBP(t *testing.T) { 32 | // send log output to buffer 33 | var lf testLogger 34 | bp := NewBufferPool(&lf, mb) 35 | bp.SetTimeout(1 * time.Millisecond) 36 | b := bp.Get() 37 | if cap(b) != int(mb) { 38 | t.Errorf("Expected buffer capacity: %d. Actual: %d", kb, cap(b)) 39 | } 40 | bp.Put(b) 41 | if n := bp.AllocationCount(); n != 2 { 42 | t.Errorf("Expected makes: %d. Actual: %d", 2, n) 43 | } 44 | 45 | b = bp.Get() 46 | bp.Put(b) 47 | 48 | // Give the pool time to realize that the existing buffers are 49 | // older than the timeout and to free them. Yes, this is 50 | // timing-dependent and therefore potentially flaky: 51 | time.Sleep(5 * time.Millisecond) 52 | 53 | if n := bp.AllocationCount(); n != 3 { 54 | t.Errorf("Expected makes: %d. Actual: %d", 3, n) 55 | } 56 | bp.Close() 57 | expLog := "3 buffers of 1 MB allocated" 58 | time.Sleep(1 * time.Millisecond) // wait for log 59 | ls := lf.String() 60 | if !strings.Contains(ls, expLog) { 61 | t.Errorf("BP debug logging on quit: \nExpected: %s\nActual: %s", 62 | expLog, ls) 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /gof3r/put.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "io" 5 | "log" 6 | "net/http" 7 | "os" 8 | 9 | "github.com/github/s3gof3r" 10 | "github.com/jessevdk/go-flags" 11 | ) 12 | 13 | type PutOpts struct { 14 | Key string `long:"key" short:"k" description:"S3 object key" required:"true" no-ini:"true"` 15 | Bucket string `long:"bucket" short:"b" description:"S3 bucket" required:"true" no-ini:"true"` 16 | Path string `short:"p" long:"path" description:"Path to file. Defaults to standard output for streaming." no-ini:"true"` 17 | DataOpts 18 | CommonOpts 19 | UpOpts 20 | } 21 | 22 | func (put *PutOpts) Execute(args []string) (err error) { 23 | conf := new(s3gof3r.Config) 24 | *conf = *s3gof3r.DefaultConfig 25 | k, err := getAWSKeys() 26 | if err != nil { 27 | return 28 | } 29 | s3 := s3gof3r.New(put.EndPoint, k) 30 | b := s3.Bucket(put.Bucket) 31 | conf.Concurrency = put.Concurrency 32 | if put.NoSSL { 33 | conf.Scheme = "http" 34 | } 35 | conf.PartSize = put.PartSize 36 | conf.Md5Check = !put.NoMd5 37 | conf.NTry = put.NTry 38 | s3gof3r.SetLogger(os.Stderr, "", log.LstdFlags, put.Debug) 39 | 40 | if put.Header == nil { 41 | put.Header = make(http.Header) 42 | } 43 | 44 | r, err := os.Open(put.Path) 45 | if err != nil { 46 | if put.Path == "" { 47 | r = os.Stdin 48 | } else { 49 | return 50 | } 51 | } 52 | defer checkClose(r, err) 53 | w, err := b.PutWriter(put.Key, ACL(put.Header, put.ACL), conf) 54 | if err != nil { 55 | return 56 | } 57 | defer checkClose(w, err) 58 | if _, err = io.Copy(w, r); err != nil { 59 | return 60 | } 61 | return 62 | } 63 | 64 | func addPutOpts(opts *PutOpts, parser *flags.Parser) { 65 | _, err := parser.AddCommand("put", "upload to S3", "put (upload) data to S3 object", opts) 66 | if err != nil { 67 | log.Fatal(err) 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /getter_test.go: -------------------------------------------------------------------------------- 1 | package s3gof3r 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "io/ioutil" 7 | "net/http" 8 | "net/http/httptest" 9 | "net/url" 10 | "testing" 11 | ) 12 | 13 | func NewFakeGetter(testurl string) (io.ReadCloser, error) { 14 | s3obj := New("s3.amazonaws.com", Keys{}) 15 | b := s3obj.Bucket("foobucket") 16 | u, err := url.Parse(testurl) 17 | if err != nil { 18 | return nil, fmt.Errorf("Failed to parse url: %s", testurl) 19 | } 20 | c := b.conf() 21 | c.NTry = 1 22 | g, _, err := newGetter(u, c, b) 23 | if err != nil { 24 | return nil, fmt.Errorf("newGetter() %s", err) 25 | } 26 | return g, nil 27 | } 28 | 29 | // Verify graceful recovery (don't hang) when we receive the target 30 | // content length but subsequent chunk requests fail. 31 | func TestFailedGet(t *testing.T) { 32 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 33 | w.Header().Set("Content-Length", "2") // trigger unexpected EOF in the client 34 | fmt.Fprintln(w, "") 35 | })) 36 | defer ts.Close() 37 | 38 | g, err := NewFakeGetter(ts.URL) 39 | if err != nil { 40 | t.Error(err) 41 | } 42 | defer g.Close() 43 | 44 | _, err = ioutil.ReadAll(g) 45 | if err == nil { 46 | t.Error("Expected ReadAll() to return an error") 47 | } 48 | } 49 | 50 | // Verify successful read when everything is working correctly. 51 | func TestGetterHappyPath(t *testing.T) { 52 | expStr := "happy test" 53 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 54 | fmt.Fprintln(w, expStr) 55 | })) 56 | defer ts.Close() 57 | 58 | g, err := NewFakeGetter(ts.URL) 59 | if err != nil { 60 | t.Error(err) 61 | } 62 | defer g.Close() 63 | 64 | d, err := ioutil.ReadAll(g) 65 | if err != nil { 66 | t.Error("ReadAll():", err) 67 | } 68 | if string(d[:len(d)-1]) != expStr { // strip trailing newline 69 | t.Errorf("Expected data to be: '%v'. Actual: '%v'", expStr, d) 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /auth_test.go: -------------------------------------------------------------------------------- 1 | package s3gof3r 2 | 3 | import ( 4 | "os" 5 | "strings" 6 | "testing" 7 | ) 8 | 9 | type authT struct { 10 | env []string 11 | } 12 | 13 | // save current environment for restoration 14 | func (s *authT) saveEnv() { 15 | s.env = os.Environ() 16 | os.Clearenv() 17 | } 18 | 19 | // restore environment after each test 20 | func (s *authT) restoreEnv() { 21 | os.Clearenv() 22 | for _, kv := range s.env { 23 | l := strings.SplitN(kv, "=", 2) 24 | os.Setenv(l[0], l[1]) 25 | } 26 | } 27 | 28 | func TestEnvKeysWithoutToken(t *testing.T) { 29 | testKeys := Keys{ 30 | AccessKey: "AKIDEXAMPLE", 31 | SecretKey: "wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY", 32 | } 33 | s := authT{} 34 | s.saveEnv() 35 | os.Setenv("AWS_ACCESS_KEY_ID", testKeys.AccessKey) 36 | os.Setenv("AWS_SECRET_ACCESS_KEY", testKeys.SecretKey) 37 | keys, err := EnvKeys() 38 | if err != nil { 39 | t.Error(err) 40 | } 41 | if keys != testKeys { 42 | t.Errorf("Keys do not match. Expected: %v. Actual: %v", testKeys, keys) 43 | } 44 | s.restoreEnv() 45 | } 46 | 47 | func TestEnvKeyWithToken(t *testing.T) { 48 | testKeys := Keys{ 49 | AccessKey: "AKIDEXAMPLE", 50 | SecretKey: "wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY", 51 | SecurityToken: "testtoken", 52 | } 53 | s := authT{} 54 | s.saveEnv() 55 | os.Setenv("AWS_ACCESS_KEY_ID", testKeys.AccessKey) 56 | os.Setenv("AWS_SECRET_ACCESS_KEY", testKeys.SecretKey) 57 | os.Setenv("AWS_SECURITY_TOKEN", testKeys.SecurityToken) 58 | keys, err := EnvKeys() 59 | if err != nil { 60 | t.Error(err) 61 | } 62 | if keys != testKeys { 63 | t.Errorf("Keys do not match. Expected: %v. Actual: %v", testKeys, keys) 64 | } 65 | s.restoreEnv() 66 | } 67 | 68 | func TestEnvKeysNotSet(t *testing.T) { 69 | s := authT{} 70 | s.saveEnv() 71 | _, err := EnvKeys() 72 | expErr := "keys not set in environment: AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY" 73 | if err.Error() != expErr { 74 | t.Errorf("Expected error: %v. Actual: %v", expErr, err) 75 | } 76 | s.restoreEnv() 77 | } 78 | -------------------------------------------------------------------------------- /gof3r/get.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "log" 7 | "os" 8 | 9 | "github.com/github/s3gof3r" 10 | "github.com/jessevdk/go-flags" 11 | ) 12 | 13 | type GetOpts struct { 14 | Key string `long:"key" short:"k" description:"S3 object key" required:"true" no-ini:"true"` 15 | Bucket string `long:"bucket" short:"b" description:"S3 bucket" required:"true" no-ini:"true"` 16 | Path string `short:"p" long:"path" description:"Path to file. Defaults to standard output for streaming." no-ini:"true"` 17 | DataOpts 18 | CommonOpts 19 | VersionID string `short:"v" long:"versionId" description:"Version ID of the object. Incompatible with md5 check (use --no-md5)." no-ini:"true"` 20 | } 21 | 22 | func (get *GetOpts) Execute(args []string) (err error) { 23 | conf := new(s3gof3r.Config) 24 | *conf = *s3gof3r.DefaultConfig 25 | k, err := getAWSKeys() 26 | if err != nil { 27 | return 28 | } 29 | s3 := s3gof3r.New(get.EndPoint, k) 30 | b := s3.Bucket(get.Bucket) 31 | conf.Concurrency = get.Concurrency 32 | if get.NoSSL { 33 | conf.Scheme = "http" 34 | } 35 | conf.PartSize = get.PartSize 36 | conf.Md5Check = !get.NoMd5 37 | conf.NTry = get.NTry 38 | 39 | s3gof3r.SetLogger(os.Stderr, "", log.LstdFlags, get.Debug) 40 | 41 | if get.VersionID != "" { 42 | get.Key = fmt.Sprintf("%s?versionId=%s", get.Key, get.VersionID) 43 | } 44 | 45 | w, err := os.Create(get.Path) 46 | if err != nil { 47 | if get.Path == "" { 48 | w = os.Stdout 49 | } else { 50 | return 51 | } 52 | } 53 | defer checkClose(w, err) 54 | r, header, err := b.GetReader(get.Key, conf) 55 | if err != nil { 56 | return 57 | } 58 | defer checkClose(r, err) 59 | if _, err = io.Copy(w, r); err != nil { 60 | return 61 | } 62 | if get.Debug { 63 | log.Println("Headers: ", header) 64 | } 65 | return 66 | } 67 | 68 | func addGetOpts(opts *GetOpts, parser *flags.Parser) { 69 | _, err := parser.AddCommand("get", "download from S3", "get (download) object from S3", opts) 70 | if err != nil { 71 | log.Fatal(err) 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /gof3r/main_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "strings" 7 | "testing" 8 | ) 9 | 10 | type flagTest struct { 11 | flags []string 12 | err error 13 | } 14 | 15 | var flagTests = []flagTest{ 16 | {[]string{"gof3r", "nocmd"}, 17 | errors.New("Unknown command")}, 18 | {[]string{"gof3r", "put", "-b", "fake-bucket", "-k", "test-key"}, 19 | errors.New("Access Denied")}, 20 | {[]string{"gof3r", "put", "-b", "fake-bucket", "-k", "key", 21 | "-c", "1", "-s", "1024", "--debug", "--no-ssl", "--no-md5"}, 22 | errors.New("Access Denied")}, 23 | {[]string{"gof3r", "get", "-b", "fake-bucket", "-k", "test-key"}, 24 | errors.New("Access Denied")}, 25 | {[]string{"gof3r", "get", "-b", "fake-bucket", "-k", "key", 26 | "-c", "1", "-s", "1024", "--debug", "--no-ssl", "--no-md5"}, 27 | errors.New("Access Denied")}, 28 | {[]string{"gof3r", "put"}, 29 | errors.New("required flags")}, 30 | {[]string{"gof3r", "put", "-b"}, 31 | errors.New("expected argument for flag")}, 32 | {[]string{"gof3r", "get", "-b"}, 33 | errors.New("expected argument for flag")}, 34 | {[]string{"gof3r", "get"}, 35 | errors.New("required flags")}, 36 | } 37 | 38 | func TestFlags(t *testing.T) { 39 | for _, tt := range flagTests { 40 | t.Run( 41 | fmt.Sprintf("TestFlags(%s)", strings.Join(tt.flags[1:], ", ")), 42 | func(t *testing.T) { 43 | _, parser := getOptionParser() 44 | _, err := parser.ParseArgs(tt.flags[1:]) 45 | errComp(tt.err, err, t, tt) 46 | }, 47 | ) 48 | } 49 | } 50 | 51 | func errComp(expect, actual error, t *testing.T, tt interface{}) bool { 52 | t.Helper() 53 | 54 | if expect == nil && actual == nil { 55 | return true 56 | } 57 | 58 | if expect == nil || actual == nil { 59 | t.Errorf("gof3r called with %v\n Expected: %v\n Actual: %v\n", tt, expect, actual) 60 | return false 61 | } 62 | if !strings.Contains(actual.Error(), expect.Error()) { 63 | t.Errorf("gof3r called with %v\n Expected: %v\n Actual: %v\n", tt, expect, actual) 64 | return false 65 | } 66 | return true 67 | 68 | } 69 | -------------------------------------------------------------------------------- /gof3r/cp_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "errors" 5 | "os" 6 | "testing" 7 | ) 8 | 9 | // convenience multipliers 10 | const ( 11 | _ = iota 12 | kb int64 = 1 << (10 * iota) 13 | mb 14 | gb 15 | ) 16 | 17 | var tb = os.Getenv("TEST_BUCKET") 18 | var defaultCpOpts = &CpOpts{ 19 | CommonOpts: CommonOpts{EndPoint: "s3.amazonaws.com"}, 20 | DataOpts: DataOpts{PartSize: mb}} 21 | 22 | type cpTest struct { 23 | *CpOpts 24 | args []string 25 | err error 26 | } 27 | 28 | var cpTests = []cpTest{ 29 | {defaultCpOpts, 30 | []string{"cp_test.go", "s3://" + tb + "/t1"}, 31 | nil}, 32 | {defaultCpOpts, 33 | []string{"s3://" + tb + "/t1", "s3://" + tb + "/t2"}, 34 | nil}, 35 | {defaultCpOpts, 36 | []string{"s3://" + tb + "/t1", "s3://" + tb + "//t2"}, 37 | nil}, 38 | {defaultCpOpts, 39 | []string{"s3://" + tb + "/t1", "/dev/null"}, 40 | nil}, 41 | {defaultCpOpts, 42 | []string{"s3://" + tb + "/noexist", "/dev/null"}, 43 | errors.New("404")}, 44 | {&CpOpts{ 45 | CommonOpts: CommonOpts{EndPoint: "s3-external-1.amazonaws.com"}, 46 | DataOpts: DataOpts{PartSize: mb}}, 47 | []string{"s3://" + tb + "/&exist", "/dev/null"}, 48 | errors.New("404")}, 49 | {&CpOpts{ 50 | DataOpts: DataOpts{NoSSL: true, 51 | PartSize: mb}}, 52 | []string{"s3://" + tb + "/t1", "s3://" + tb + "/tdir/.tst"}, 53 | nil}, 54 | {defaultCpOpts, 55 | []string{"s3://" + tb + "/t1"}, 56 | errors.New("source and destination arguments required")}, 57 | {defaultCpOpts, 58 | []string{"s://" + tb + "/t1", "s3://" + tb + "/tdir/.tst"}, 59 | errors.New("parse error: s://")}, 60 | {defaultCpOpts, 61 | []string{"http://%%s", ""}, 62 | errors.New("parse error: parse http")}, 63 | {defaultCpOpts, 64 | []string{"s3://" + tb + "/t1", "s3://no-bucket/.tst"}, 65 | errors.New("bucket does not exist")}, 66 | } 67 | 68 | func TestcpOptsExecute(t *testing.T) { 69 | 70 | if tb == "" { 71 | t.Fatal("TEST_BUCKET must be set in environment") 72 | } 73 | 74 | for _, tt := range cpTests { 75 | t.Log(tt) 76 | err := tt.Execute(tt.args) 77 | errComp(tt.err, err, t, tt) 78 | } 79 | 80 | } 81 | -------------------------------------------------------------------------------- /list_objects_test.go: -------------------------------------------------------------------------------- 1 | package s3gof3r 2 | 3 | import ( 4 | "log" 5 | "sort" 6 | "sync" 7 | "testing" 8 | ) 9 | 10 | var keysForListing = []string{ 11 | "list/one/two/three", 12 | "list/one/two/four", 13 | "list/two/three/four", 14 | "list/two/three/five", 15 | "list/three/four/five", 16 | "list/three/four/six", 17 | "list/four/five/six", 18 | "list/four/five/seven", 19 | } 20 | 21 | func uploadListerFiles() { 22 | var wg sync.WaitGroup 23 | for _, tt := range keysForListing { 24 | wg.Add(1) 25 | go func(path string) { 26 | err := b.putReader(path, &randSrc{Size: 20}) 27 | if err != nil { 28 | log.Fatal(err) 29 | } 30 | wg.Done() 31 | }(tt) 32 | } 33 | wg.Wait() 34 | } 35 | 36 | func testListObjects(t *testing.T, prefixes []string, iterations, concurrency int) { 37 | config := Config{ 38 | Concurrency: 1, 39 | Scheme: "https", 40 | } 41 | l, err := b.ListObjects(prefixes, 5, &config) 42 | if err != nil { 43 | t.Error(err) 44 | } 45 | 46 | actual := make([]string, 0, len(keysForListing)) 47 | actualIterations := 0 48 | for l.Next() { 49 | actualIterations++ 50 | actual = append(actual, l.Value()...) 51 | } 52 | 53 | err = l.Error() 54 | if err != nil { 55 | t.Error(err) 56 | } 57 | 58 | if actualIterations != iterations { 59 | t.Errorf("expected %d iterations, got %d", iterations, actualIterations) 60 | } 61 | 62 | if len(actual) != len(keysForListing) { 63 | t.Errorf("expected %d keys, got %d", len(keysForListing), len(actual)) 64 | } 65 | 66 | sort.Strings(keysForListing) 67 | sort.Strings(actual) 68 | 69 | for i, a := range keysForListing { 70 | if a != actual[i] { 71 | t.Errorf("result mismatch, expected '%s', got '%s'", a, actual[i]) 72 | } 73 | } 74 | } 75 | 76 | func TestListObjects(t *testing.T) { 77 | t.Parallel() 78 | 79 | uploadListerFiles() 80 | 81 | testListObjects(t, []string{"list/"}, 2, 1) 82 | testListObjects(t, []string{"list/"}, 2, 5) 83 | testListObjects(t, []string{"list/one/", "list/two/", "list/three", "list/four"}, 4, 1) 84 | testListObjects(t, []string{"list/one/", "list/two/", "list/three", "list/four"}, 4, 5) 85 | } 86 | -------------------------------------------------------------------------------- /gof3r/cp.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "log" 7 | "net/url" 8 | "os" 9 | "strings" 10 | 11 | "github.com/github/s3gof3r" 12 | "github.com/jessevdk/go-flags" 13 | ) 14 | 15 | type CpArg struct { 16 | Source string `name:"source"` 17 | Dest string `name:"dest"` 18 | } 19 | 20 | type CpOpts struct { 21 | DataOpts 22 | CommonOpts 23 | CpArg `positional-args:"true" required:"true"` 24 | UpOpts 25 | } 26 | 27 | func (cp *CpOpts) Execute(args []string) (err error) { 28 | 29 | k, err := getAWSKeys() 30 | if err != nil { 31 | return 32 | } 33 | 34 | conf := new(s3gof3r.Config) 35 | *conf = *s3gof3r.DefaultConfig 36 | s3 := s3gof3r.New(cp.EndPoint, k) 37 | conf.Concurrency = cp.Concurrency 38 | if cp.NoSSL { 39 | conf.Scheme = "http" 40 | } 41 | conf.PartSize = cp.PartSize 42 | conf.Md5Check = !cp.NoMd5 43 | conf.NTry = cp.NTry 44 | s3gof3r.SetLogger(os.Stderr, "", log.LstdFlags, cp.Debug) 45 | 46 | src, err := func(src string) (io.ReadCloser, error) { 47 | if !strings.HasPrefix(strings.ToLower(src), "s3") { 48 | return os.Open(src) 49 | } 50 | u, err := url.ParseRequestURI(src) 51 | if err != nil { 52 | return nil, fmt.Errorf("parse error: %s", err) 53 | } 54 | 55 | r, _, err := s3.Bucket(u.Host).GetReader(u.Path, conf) 56 | return r, err 57 | }(cp.Source) 58 | if err != nil { 59 | return 60 | } 61 | defer checkClose(src, err) 62 | 63 | dst, err := func(dst string) (io.WriteCloser, error) { 64 | if !strings.HasPrefix(strings.ToLower(dst), "s3") { 65 | return os.Create(dst) 66 | } 67 | u, err := url.ParseRequestURI(dst) 68 | if err != nil { 69 | return nil, fmt.Errorf("parse error: %s", err) 70 | } 71 | 72 | return s3.Bucket(u.Host).PutWriter(u.Path, ACL(cp.Header, cp.ACL), conf) 73 | }(cp.Dest) 74 | if err != nil { 75 | return 76 | } 77 | 78 | defer checkClose(dst, err) 79 | _, err = io.Copy(dst, src) 80 | return 81 | } 82 | 83 | func addCpOpts(opts *CpOpts, parser *flags.Parser) { 84 | cmd, err := parser.AddCommand("cp", "copy S3 objects", "copy S3 objects to or from S3 and local files", opts) 85 | if err != nil { 86 | log.Fatal(err) 87 | } 88 | cmd.Aliases = []string{"copy"} 89 | } 90 | -------------------------------------------------------------------------------- /auth.go: -------------------------------------------------------------------------------- 1 | package s3gof3r 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "io/ioutil" 7 | "net/http" 8 | "os" 9 | "time" 10 | 11 | "github.com/github/s3gof3r/internal/s3client" 12 | ) 13 | 14 | // Keys for an Amazon Web Services account. 15 | // Used for signing http requests. 16 | type Keys struct { 17 | AccessKey string 18 | SecretKey string 19 | SecurityToken string 20 | } 21 | 22 | type mdCreds struct { 23 | Code string 24 | LastUpdated string 25 | Type string 26 | AccessKeyID string `xml:"AccessKeyId"` 27 | SecretAccessKey string 28 | Token string 29 | Expiration string 30 | } 31 | 32 | // InstanceKeys Requests the AWS keys from the instance-based metadata on EC2 33 | // Assumes only one IAM role. 34 | func InstanceKeys() (Keys, error) { 35 | rolePath := "http://169.254.169.254/latest/meta-data/iam/security-credentials/" 36 | var creds mdCreds 37 | 38 | // request the role name for the instance 39 | // assumes there is only one 40 | resp, err := ClientWithTimeout(2 * time.Second).Get(rolePath) 41 | if err != nil { 42 | return Keys{}, err 43 | } 44 | if resp.StatusCode != 200 { 45 | return Keys{}, s3client.NewRespError(resp) 46 | } 47 | 48 | role, err := ioutil.ReadAll(resp.Body) 49 | closeErr := resp.Body.Close() 50 | if err != nil { 51 | return Keys{}, err 52 | } 53 | if closeErr != nil { 54 | return Keys{}, closeErr 55 | } 56 | 57 | // request the credential metadata for the role 58 | resp, err = http.Get(rolePath + string(role)) 59 | if err != nil { 60 | return Keys{}, err 61 | } 62 | if resp.StatusCode != 200 { 63 | return Keys{}, s3client.NewRespError(resp) 64 | } 65 | 66 | metadata, err := ioutil.ReadAll(resp.Body) 67 | closeErr = resp.Body.Close() 68 | if err != nil { 69 | return Keys{}, err 70 | } 71 | if closeErr != nil { 72 | return Keys{}, closeErr 73 | } 74 | 75 | if err = json.Unmarshal([]byte(metadata), &creds); err != nil { 76 | return Keys{}, err 77 | } 78 | 79 | return Keys{ 80 | AccessKey: creds.AccessKeyID, 81 | SecretKey: creds.SecretAccessKey, 82 | SecurityToken: creds.Token, 83 | }, nil 84 | } 85 | 86 | // EnvKeys Reads the AWS keys from the environment 87 | func EnvKeys() (keys Keys, err error) { 88 | keys = Keys{ 89 | AccessKey: os.Getenv("AWS_ACCESS_KEY_ID"), 90 | SecretKey: os.Getenv("AWS_SECRET_ACCESS_KEY"), 91 | SecurityToken: os.Getenv("AWS_SECURITY_TOKEN"), 92 | } 93 | if keys.AccessKey == "" || keys.SecretKey == "" { 94 | err = fmt.Errorf("keys not set in environment: AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY") 95 | } 96 | return 97 | } 98 | -------------------------------------------------------------------------------- /delete_multiple.go: -------------------------------------------------------------------------------- 1 | package s3gof3r 2 | 3 | import ( 4 | "bytes" 5 | "crypto/md5" 6 | "encoding/base64" 7 | "encoding/xml" 8 | "io/ioutil" 9 | "net/http" 10 | 11 | "github.com/github/s3gof3r/internal/s3client" 12 | ) 13 | 14 | type deleteObject struct { 15 | Key string `xml:"Key"` 16 | VersionId string `xml:"VersionId,omitempty"` 17 | } 18 | 19 | type deleteRequest struct { 20 | XMLName xml.Name `xml:"Delete"` 21 | Objects []deleteObject `xml:"Object"` 22 | Quiet bool `xml:"Quiet"` 23 | } 24 | 25 | type DeletedObject struct { 26 | Key string `xml:"Key"` 27 | VersionId string `xml:"VersionId"` 28 | DeleteMarker bool `xml:"DeleteMarker"` 29 | DeleteMarkerVersionId string `xml:"DeleteMarkerVersionId"` 30 | } 31 | 32 | type DeleteError struct { 33 | Key string `xml:"Key"` 34 | Code string `xml:"Code"` 35 | Message string `xml:"Message"` 36 | } 37 | 38 | type DeleteResult struct { 39 | XMLName xml.Name `xml:"DeleteResult"` 40 | Deleted []DeletedObject `xml:"Deleted"` 41 | Errors []DeleteError `xml:"Error"` 42 | } 43 | 44 | func deleteMultiple(c *Config, b *Bucket, quiet bool, keys []string) (DeleteResult, error) { 45 | if len(keys) == 0 { 46 | return DeleteResult{}, nil 47 | } 48 | 49 | u, err := b.url("", c) 50 | if err != nil { 51 | return DeleteResult{}, err 52 | } 53 | u.RawQuery = "delete" 54 | 55 | objects := make([]deleteObject, 0, len(keys)) 56 | for _, key := range keys { 57 | objects = append(objects, deleteObject{Key: key}) 58 | } 59 | 60 | deleteRequest := deleteRequest{ 61 | Objects: objects, 62 | Quiet: quiet, 63 | } 64 | 65 | body, err := xml.Marshal(deleteRequest) 66 | if err != nil { 67 | return DeleteResult{}, err 68 | } 69 | 70 | md5sum := md5.Sum(body) 71 | r := http.Request{ 72 | Method: "POST", 73 | URL: u, 74 | Body: ioutil.NopCloser(bytes.NewReader(body)), 75 | ContentLength: int64(len(body)), 76 | Header: make(http.Header), 77 | } 78 | r.Header.Set(s3client.MD5Header, base64.StdEncoding.EncodeToString(md5sum[:])) 79 | b.Sign(&r) 80 | 81 | resp, err := b.conf().Do(&r) 82 | if err != nil { 83 | return DeleteResult{}, err 84 | } 85 | if resp.StatusCode != 200 { 86 | return DeleteResult{}, s3client.NewRespError(resp) 87 | } 88 | 89 | var result DeleteResult 90 | decoder := xml.NewDecoder(resp.Body) 91 | if err := decoder.Decode(&result); err != nil { 92 | _ = resp.Body.Close() 93 | return DeleteResult{}, err 94 | } 95 | 96 | if err := resp.Body.Close(); err != nil { 97 | return DeleteResult{}, err 98 | } 99 | 100 | return result, nil 101 | } 102 | -------------------------------------------------------------------------------- /internal/pool/pool.go: -------------------------------------------------------------------------------- 1 | package pool 2 | 3 | import ( 4 | "container/list" 5 | "sync" 6 | "sync/atomic" 7 | "time" 8 | ) 9 | 10 | // convenience multipliers 11 | const ( 12 | _ = iota 13 | kb int64 = 1 << (10 * iota) 14 | mb 15 | gb 16 | tb 17 | pb 18 | eb 19 | ) 20 | 21 | type qb struct { 22 | when time.Time 23 | s []byte 24 | } 25 | 26 | type BufferPool struct { 27 | makes uint64 28 | get chan []byte 29 | give chan []byte 30 | quit chan struct{} 31 | timeout time.Duration 32 | sizech chan int64 33 | timeoutCh chan time.Duration 34 | wg sync.WaitGroup 35 | } 36 | 37 | type logger interface { 38 | Printf(format string, a ...interface{}) 39 | } 40 | 41 | func NewBufferPool(logger logger, bufsz int64) (sp *BufferPool) { 42 | sp = &BufferPool{ 43 | get: make(chan []byte), 44 | give: make(chan []byte), 45 | quit: make(chan struct{}), 46 | timeout: time.Minute, 47 | sizech: make(chan int64), 48 | timeoutCh: make(chan time.Duration), 49 | } 50 | 51 | sp.wg.Add(1) 52 | go func() { 53 | defer sp.wg.Done() 54 | 55 | q := new(list.List) 56 | for { 57 | if q.Len() == 0 { 58 | q.PushFront(qb{when: time.Now(), s: make([]byte, bufsz)}) 59 | atomic.AddUint64(&sp.makes, 1) 60 | } 61 | 62 | e := q.Front() 63 | 64 | // Discard `e`, but not if it's the only item in `q` 65 | // (otherwise we'll just create it again the next time 66 | // through the loop): 67 | timeout := time.NewTimer(sp.timeout) 68 | var stale <-chan time.Time 69 | if q.Len() > 1 { 70 | stale = timeout.C 71 | } 72 | 73 | select { 74 | case b := <-sp.give: 75 | timeout.Stop() 76 | q.PushFront(qb{when: time.Now(), s: b}) 77 | case sp.get <- e.Value.(qb).s: 78 | timeout.Stop() 79 | q.Remove(e) 80 | case <-stale: 81 | // free unused slices older than timeout 82 | e := q.Front() 83 | for e != nil { 84 | n := e.Next() 85 | if time.Since(e.Value.(qb).when) > sp.timeout { 86 | q.Remove(e) 87 | e.Value = nil 88 | } 89 | e = n 90 | } 91 | case sz := <-sp.sizech: // update buffer size, free buffers 92 | bufsz = sz 93 | case timeout := <-sp.timeoutCh: 94 | sp.timeout = timeout 95 | case <-sp.quit: 96 | logger.Printf("%d buffers of %d MB allocated", sp.makes, bufsz/(1*mb)) 97 | return 98 | } 99 | } 100 | 101 | }() 102 | return sp 103 | } 104 | 105 | func (bp *BufferPool) Get() []byte { 106 | return <-bp.get 107 | } 108 | 109 | func (bp *BufferPool) Put(buf []byte) { 110 | bp.give <- buf 111 | } 112 | 113 | func (bp *BufferPool) Close() { 114 | close(bp.quit) 115 | bp.wg.Wait() 116 | } 117 | 118 | func (bp *BufferPool) SetBufferSize(bufsz int64) { 119 | bp.sizech <- bufsz 120 | } 121 | 122 | func (bp *BufferPool) SetTimeout(timeout time.Duration) { 123 | bp.timeoutCh <- timeout 124 | } 125 | 126 | func (bp *BufferPool) AllocationCount() uint64 { 127 | return atomic.LoadUint64(&bp.makes) 128 | } 129 | -------------------------------------------------------------------------------- /gof3r/main.go: -------------------------------------------------------------------------------- 1 | // gof3r is a command-line interface for s3gof3r: fast, concurrent, streaming access to Amazon S3. 2 | // 3 | // Example Usage: 4 | // To stream up to S3: 5 | // $ | gof3r put -b -k 6 | // To stream down from S3: 7 | // $ gof3r get -b -k | 8 | // To upload a file to S3: 9 | // $ gof3r cp s3:/// -m -m... 10 | // To download a file from S3: 11 | // $ gof3r cp s3:/// 12 | // 13 | // 14 | // Set AWS keys as environment Variables (required unless using ec2 instance-based credentials): 15 | // 16 | // $ export AWS_ACCESS_KEY_ID= 17 | // $ export AWS_SECRET_ACCESS_KEY= 18 | // 19 | // Examples: 20 | // $ tar -cf - /foo_dir/ | gof3r put -b my_s3_bucket -k bar_dir/s3_object -m x-amz-meta-custom-metadata:abc123 -m x-amz-server-side-encryption:AES256 21 | // $ gof3r get -b my_s3_bucket -k bar_dir/s3_object | tar -x 22 | // 23 | // 24 | // MAN PAGE 25 | // 26 | // http://randallmcpherson.com/s3gof3r/gof3r/gof3r.html 27 | // 28 | // A man page may also be generated with `gof3r -m` 29 | // 30 | package main 31 | 32 | import ( 33 | "errors" 34 | "fmt" 35 | "io" 36 | "os" 37 | "runtime" 38 | "time" 39 | 40 | "github.com/github/s3gof3r" 41 | "github.com/jessevdk/go-flags" 42 | ) 43 | 44 | const ( 45 | name = "gof3r" 46 | version = "0.5.0" 47 | ) 48 | 49 | func main() { 50 | // set the number of processors to use to the number of cpus for parallelization of concurrent transfers 51 | runtime.GOMAXPROCS(runtime.NumCPU()) 52 | 53 | start := time.Now() 54 | 55 | opts, parser := getOptionParser() 56 | 57 | // parse ini file 58 | if err := parseIni(parser); err != nil { 59 | fmt.Fprintln(os.Stderr, err) 60 | } 61 | 62 | // parser calls the Execute function for the command after parsing the command line options. 63 | if _, err := parser.Parse(); err != nil { 64 | if opts.AppOpts.WriteIni { 65 | writeIni(parser) // exits 66 | } 67 | 68 | // handling for flag parse errors 69 | if ferr, ok := err.(*flags.Error); ok { 70 | if ferr.Type == flags.ErrHelp { 71 | parser.WriteHelp(os.Stderr) 72 | } else { 73 | var cmd string 74 | if parser.Active != nil { 75 | cmd = parser.Active.Name 76 | } 77 | fmt.Fprintf(os.Stderr, "gof3r error: %s\n", err) 78 | fmt.Fprintf(os.Stderr, "run 'gof3r %s --help' for usage.\n", cmd) 79 | } 80 | } else { // handle non-parse errors 81 | fmt.Fprintf(os.Stderr, "gof3r error: %s\n", err) 82 | } 83 | os.Exit(1) 84 | } 85 | fmt.Fprintf(os.Stderr, "duration: %v\n", time.Since(start)) 86 | } 87 | 88 | // getAWSKeys gets the AWS Keys from environment variables or the instance-based metadata on EC2 89 | // Environment variables are attempted first, followed by the instance-based credentials. 90 | func getAWSKeys() (keys s3gof3r.Keys, err error) { 91 | 92 | keys, err = s3gof3r.EnvKeys() 93 | if err == nil { 94 | return 95 | } 96 | keys, err = s3gof3r.InstanceKeys() 97 | if err == nil { 98 | return 99 | } 100 | err = errors.New("no AWS keys found") 101 | return 102 | } 103 | 104 | func checkClose(c io.Closer, err error) { 105 | if c != nil { 106 | cerr := c.Close() 107 | if err == nil { 108 | err = cerr 109 | } 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /sign_test.go: -------------------------------------------------------------------------------- 1 | package s3gof3r 2 | 3 | import ( 4 | "net/http" 5 | "testing" 6 | "time" 7 | ) 8 | 9 | func newSigner(region string) *signer { 10 | endpoint := "https://examplebucket.s3.amazonaws.com" 11 | req, _ := http.NewRequest("GET", endpoint, nil) 12 | req.URL.Path = "/test.txt" 13 | req.Header.Add("Range", "bytes=0-9") 14 | return &signer{ 15 | Request: req, 16 | Time: time.Date(2013, 05, 24, 0, 0, 0, 0, time.UTC), 17 | Region: region, 18 | Keys: Keys{ 19 | AccessKey: "AKIAIOSFODNN7EXAMPLE", 20 | SecretKey: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", 21 | }, 22 | } 23 | } 24 | 25 | func TestBuildTime(t *testing.T) { 26 | s := newSigner("us-east-1") 27 | s.buildTime() 28 | expect := "20130524T000000Z" 29 | if date := s.Request.Header.Get("X-Amz-Date"); date != expect { 30 | t.Errorf("date don't match, got '%s', expected '%s'", date, expect) 31 | } 32 | } 33 | 34 | func TestBuildCredentialString(t *testing.T) { 35 | s := newSigner("us-east-1") 36 | s.bodyDigest() 37 | s.buildCredentialString() 38 | expect := "20130524/us-east-1/s3/aws4_request" 39 | if s.credentialString != expect { 40 | t.Errorf("credential string don't match, got '%s', expected '%s'", 41 | s.credentialString, expect) 42 | } 43 | } 44 | 45 | func TestBuildCanonicalHeaders(t *testing.T) { 46 | s := newSigner("us-east-1") 47 | s.buildTime() 48 | s.buildCredentialString() 49 | s.buildCanonicalHeaders() 50 | 51 | expectSigned := "host;range;x-amz-date" 52 | if s.signedHeaders != expectSigned { 53 | t.Errorf("signed headers don't match, got '%s', expected '%s'", 54 | s.signedHeaders, expectSigned) 55 | } 56 | 57 | expectCanonical := `host:examplebucket.s3.amazonaws.com 58 | range:bytes=0-9 59 | x-amz-date:20130524T000000Z` 60 | if s.canonicalHeaders != expectCanonical { 61 | t.Errorf("canonical headers don't match, got '%s', expected '%s'", 62 | s.canonicalHeaders, expectCanonical) 63 | } 64 | } 65 | 66 | func TestBuildCanonicalString(t *testing.T) { 67 | s := newSigner("us-east-1") 68 | s.buildTime() 69 | s.buildCredentialString() 70 | s.buildCanonicalHeaders() 71 | s.buildCanonicalString() 72 | expect := `GET 73 | /test.txt 74 | 75 | host:examplebucket.s3.amazonaws.com 76 | range:bytes=0-9 77 | x-amz-date:20130524T000000Z 78 | 79 | host;range;x-amz-date 80 | e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855` 81 | if s.canonicalString != expect { 82 | t.Errorf("canonical string don't match, got '%s', expected '%s'", 83 | s.canonicalString, expect) 84 | } 85 | } 86 | 87 | func TestBuildStringToSign(t *testing.T) { 88 | s := newSigner("us-east-1") 89 | s.buildTime() 90 | s.buildCredentialString() 91 | s.buildCanonicalHeaders() 92 | s.buildCanonicalString() 93 | s.buildStringToSign() 94 | expect := `AWS4-HMAC-SHA256 95 | 20130524T000000Z 96 | 20130524/us-east-1/s3/aws4_request 97 | 8946e8df7a95b4714c63ae8664bbab443f99610b0858e8966eac22c72dae0232` 98 | if s.stringToSign != expect { 99 | t.Errorf("string to sign don't match, got '%s', expected '%s'", 100 | s.stringToSign, expect) 101 | } 102 | } 103 | 104 | func TestBuildSignature(t *testing.T) { 105 | s := newSigner("us-east-1") 106 | s.buildTime() 107 | s.buildCredentialString() 108 | s.buildCanonicalHeaders() 109 | s.buildCanonicalString() 110 | s.buildStringToSign() 111 | s.buildSignature() 112 | expect := "b4904babad39b29ebe2eaefecf4c7037be9c6362be0aebe68ea5c700020e5085" 113 | if s.signature != expect { 114 | t.Errorf("signature don't match, got '%s', expected '%s'", 115 | s.signature, expect) 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /gof3r/options.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "net/http" 7 | "os" 8 | "os/exec" 9 | "strings" 10 | 11 | "github.com/jessevdk/go-flags" 12 | ) 13 | 14 | const ( 15 | iniFile = ".gof3r.ini" 16 | ) 17 | 18 | // CommonOpts are Options common to all commands 19 | type CommonOpts struct { 20 | EndPoint string `long:"endpoint" description:"Amazon S3 endpoint" default:"s3.amazonaws.com" ini-name:"endpoint"` 21 | Debug bool `long:"debug" description:"Enable debug logging." ini-name:"debug"` 22 | } 23 | 24 | // DataOpts are Options common to cp, get, and put commands 25 | type DataOpts struct { 26 | NoSSL bool `long:"no-ssl" description:"Do not use SSL for endpoint connection." ini-name:"no-ssl"` 27 | NoMd5 bool `long:"no-md5" description:"Do not use md5 hash checking to ensure data integrity. By default, the md5 hash of is calculated concurrently during puts, stored at .md5/.md5, and verified on gets." ini-name:"no-md5"` 28 | Concurrency int `long:"concurrency" short:"c" default:"10" description:"Concurrency of transfers" ini-name:"concurrency"` 29 | PartSize int64 `long:"partsize" short:"s" description:"Initial size of concurrent parts, in bytes" default:"20971520" ini-name:"partsize"` 30 | NTry int `long:"retries" description:"Number of attempts to try" default:"10" ini-name:"retries"` 31 | } 32 | 33 | // UpOpts are Options for uploading common to cp and put commands 34 | type UpOpts struct { 35 | Header http.Header `long:"header" short:"m" description:"HTTP headers. May be used to set custom metadata, server-side encryption etc." ini-name:"header"` 36 | ACL string `long:"acl" description:"canned acl to apply to the object"` 37 | } 38 | 39 | type AppOpts struct { 40 | Version func() `long:"version" short:"v" description:"Print version"` 41 | Man func() `long:"manpage" short:"m" description:"Create gof3r.man man page in current directory"` 42 | WriteIni bool `long:"writeini" short:"i" description:"Write .gof3r.ini in current user's home directory" no-ini:"true"` 43 | } 44 | 45 | type gof3rOpts struct { 46 | AppOpts 47 | CpOpts 48 | GetOpts 49 | PutOpts 50 | RmOpts 51 | } 52 | 53 | func getOptionParser() (*gof3rOpts, *flags.Parser) { 54 | var opts gof3rOpts 55 | parser := flags.NewParser(&opts.AppOpts, (flags.HelpFlag | flags.PassDoubleDash)) 56 | 57 | // set parser fields 58 | parser.ShortDescription = "streaming, concurrent s3 client" 59 | 60 | opts.AppOpts.Version = func() { 61 | fmt.Fprintf(os.Stderr, "%s version %s\n", name, version) 62 | os.Exit(0) 63 | } 64 | 65 | opts.AppOpts.Man = func() { 66 | f, err := os.Create(name + ".man") 67 | if err != nil { 68 | log.Fatal(err) 69 | } 70 | parser.WriteManPage(f) 71 | fmt.Fprintf(os.Stderr, "man page written to %s\n", f.Name()) 72 | os.Exit(0) 73 | } 74 | 75 | addCpOpts(&opts.CpOpts, parser) 76 | addGetOpts(&opts.GetOpts, parser) 77 | addPutOpts(&opts.PutOpts, parser) 78 | addRmOpts(&opts.RmOpts, parser) 79 | 80 | return &opts, parser 81 | } 82 | 83 | func iniPath() (path string, exist bool, err error) { 84 | hdir, err := homeDir() 85 | if err != nil { 86 | return 87 | } 88 | path = fmt.Sprintf("%s/%s", hdir, iniFile) 89 | if _, staterr := os.Stat(path); !os.IsNotExist(staterr) { 90 | exist = true 91 | } 92 | return 93 | } 94 | 95 | func parseIni(parser *flags.Parser) (err error) { 96 | p, exist, err := iniPath() 97 | if err != nil || !exist { 98 | return 99 | } 100 | return flags.NewIniParser(parser).ParseFile(p) 101 | } 102 | 103 | func writeIni(parser *flags.Parser) { 104 | p, exist, err := iniPath() 105 | if err != nil { 106 | log.Fatal(err) 107 | } 108 | if exist { 109 | fmt.Fprintf(os.Stderr, "%s exists, refusing to overwrite.\n", p) 110 | } else { 111 | if err := flags.NewIniParser(parser).WriteFile(p, 112 | (flags.IniIncludeComments | flags.IniIncludeDefaults | flags.IniCommentDefaults)); err != nil { 113 | log.Fatal(err) 114 | } 115 | fmt.Fprintf(os.Stderr, "ini file written to %s\n", p) 116 | } 117 | os.Exit(0) 118 | } 119 | 120 | // find unix home directory 121 | func homeDir() (string, error) { 122 | if h := os.Getenv("HOME"); h != "" { 123 | return h, nil 124 | } 125 | h, err := exec.Command("sh", "-c", "eval echo ~$USER").Output() 126 | if err == nil && len(h) > 0 { 127 | return strings.TrimSpace(string(h)), nil 128 | } 129 | return "", fmt.Errorf("home directory not found for current user") 130 | } 131 | 132 | // add canned acl to http.Header 133 | func ACL(h http.Header, acl string) http.Header { 134 | if acl != "" { 135 | h.Set("x-amz-acl", acl) 136 | } 137 | return h 138 | } 139 | -------------------------------------------------------------------------------- /sign.go: -------------------------------------------------------------------------------- 1 | package s3gof3r 2 | 3 | import ( 4 | "bytes" 5 | "crypto/hmac" 6 | "crypto/sha256" 7 | "encoding/hex" 8 | "io/ioutil" 9 | "net/http" 10 | "sort" 11 | "strings" 12 | "time" 13 | ) 14 | 15 | const ( 16 | prefix = "AWS4-HMAC-SHA256" 17 | isoFormat = "20060102T150405Z" 18 | shortDate = "20060102" 19 | ) 20 | 21 | var ignoredHeaders = map[string]bool{ 22 | "Authorization": true, 23 | "Content-Type": true, 24 | "Content-Length": true, 25 | "User-Agent": true, 26 | } 27 | 28 | type signer struct { 29 | Time time.Time 30 | Request *http.Request 31 | Region string 32 | Keys Keys 33 | 34 | credentialString string 35 | signedHeaders string 36 | signature string 37 | 38 | canonicalHeaders string 39 | canonicalString string 40 | stringToSign string 41 | } 42 | 43 | // Sign signs the http.Request 44 | func (b *Bucket) Sign(req *http.Request) { 45 | if req.Header == nil { 46 | req.Header = http.Header{} 47 | } 48 | if b.S3.Keys.SecurityToken != "" { 49 | req.Header.Set("X-Amz-Security-Token", b.S3.Keys.SecurityToken) 50 | } 51 | req.Header.Set("User-Agent", "S3Gof3r") 52 | s := &signer{ 53 | Time: time.Now(), 54 | Request: req, 55 | Region: b.S3.Region(), 56 | Keys: b.S3.Keys, 57 | } 58 | s.sign() 59 | } 60 | 61 | func (s *signer) sign() { 62 | s.buildTime() 63 | s.buildCredentialString() 64 | s.buildCanonicalHeaders() 65 | s.buildCanonicalString() 66 | s.buildStringToSign() 67 | s.buildSignature() 68 | parts := []string{ 69 | prefix + " Credential=" + s.Keys.AccessKey + "/" + s.credentialString, 70 | "SignedHeaders=" + s.signedHeaders, 71 | "Signature=" + s.signature, 72 | } 73 | s.Request.Header.Set("Authorization", strings.Join(parts, ",")) 74 | } 75 | 76 | func (s *signer) buildTime() { 77 | s.Request.Header.Set("X-Amz-Date", s.Time.UTC().Format(isoFormat)) 78 | } 79 | 80 | func (s *signer) buildCredentialString() { 81 | s.credentialString = strings.Join([]string{ 82 | s.Time.UTC().Format(shortDate), 83 | s.Region, 84 | "s3", 85 | "aws4_request", 86 | }, "/") 87 | } 88 | 89 | func (s *signer) buildCanonicalHeaders() { 90 | var headers []string 91 | headers = append(headers, "host") 92 | for k := range s.Request.Header { 93 | if _, ok := ignoredHeaders[http.CanonicalHeaderKey(k)]; ok { 94 | continue 95 | } 96 | headers = append(headers, strings.ToLower(k)) 97 | } 98 | sort.Strings(headers) 99 | 100 | s.signedHeaders = strings.Join(headers, ";") 101 | 102 | headerValues := make([]string, len(headers)) 103 | for i, k := range headers { 104 | if k == "host" { 105 | headerValues[i] = "host:" + s.Request.URL.Host 106 | } else { 107 | headerValues[i] = k + ":" + 108 | strings.Join(s.Request.Header[http.CanonicalHeaderKey(k)], ",") 109 | } 110 | } 111 | 112 | s.canonicalHeaders = strings.Join(headerValues, "\n") 113 | } 114 | 115 | func (s *signer) buildCanonicalString() { 116 | s.Request.URL.RawQuery = strings.Replace(s.Request.URL.Query().Encode(), "+", "%20", -1) 117 | uri := s.Request.URL.Opaque 118 | if uri != "" { 119 | uri = "/" + strings.Join(strings.Split(uri, "/")[3:], "/") 120 | } else { 121 | uri = s.Request.URL.EscapedPath() 122 | } 123 | if uri == "" { 124 | uri = "/" 125 | } 126 | 127 | uri = strings.Replace(uri, "@", "%40", -1) 128 | uri = strings.Replace(uri, ":", "%3A", -1) 129 | 130 | s.canonicalString = strings.Join([]string{ 131 | s.Request.Method, 132 | uri, 133 | s.Request.URL.RawQuery, 134 | s.canonicalHeaders + "\n", 135 | s.signedHeaders, 136 | s.bodyDigest(), 137 | }, "\n") 138 | } 139 | 140 | func (s *signer) buildStringToSign() { 141 | s.stringToSign = strings.Join([]string{ 142 | prefix, 143 | s.Time.UTC().Format(isoFormat), 144 | s.credentialString, 145 | hex.EncodeToString(sha([]byte(s.canonicalString))), 146 | }, "\n") 147 | } 148 | 149 | func (s *signer) buildSignature() { 150 | secret := s.Keys.SecretKey 151 | date := hmacSign([]byte("AWS4"+secret), []byte(s.Time.UTC().Format(shortDate))) 152 | region := hmacSign(date, []byte(s.Region)) 153 | service := hmacSign(region, []byte("s3")) 154 | credentials := hmacSign(service, []byte("aws4_request")) 155 | signature := hmacSign(credentials, []byte(s.stringToSign)) 156 | s.signature = hex.EncodeToString(signature) 157 | } 158 | 159 | func (s *signer) bodyDigest() string { 160 | hash := s.Request.Header.Get("X-Amz-Content-Sha256") 161 | if hash == "" { 162 | if s.Request.Body == nil { 163 | hash = hex.EncodeToString(sha([]byte{})) 164 | } else { 165 | body, _ := ioutil.ReadAll(s.Request.Body) 166 | s.Request.Body = ioutil.NopCloser(bytes.NewReader(body)) 167 | hash = hex.EncodeToString(sha(body)) 168 | } 169 | s.Request.Header.Add("X-Amz-Content-Sha256", hash) 170 | } 171 | return hash 172 | } 173 | 174 | func hmacSign(key []byte, data []byte) []byte { 175 | hash := hmac.New(sha256.New, key) 176 | hash.Write(data) 177 | return hash.Sum(nil) 178 | } 179 | 180 | func sha(data []byte) []byte { 181 | hash := sha256.New() 182 | hash.Write(data) 183 | return hash.Sum(nil) 184 | } 185 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # s3gof3r [![Build Status](https://travis-ci.org/rlmcpherson/s3gof3r.svg?branch=master)](https://travis-ci.org/rlmcpherson/s3gof3r) [![GoDoc](https://godoc.org/github.com/rlmcpherson/s3gof3r?status.png)](https://godoc.org/github.com/rlmcpherson/s3gof3r) 2 | 3 | s3gof3r provides fast, parallelized, pipelined streaming access to Amazon S3. It includes a command-line interface: `gof3r`. 4 | 5 | It is optimized for high speed transfer of large objects into and out of Amazon S3. Streaming support allows for usage like: 6 | 7 | ``` 8 | $ tar -czf - | gof3r put -b -k 9 | $ gof3r get -b -k | tar -zx 10 | ``` 11 | 12 | 13 | **Speed Benchmarks** 14 | 15 | On an EC2 instance, gof3r can exceed 1 Gbps for both puts and gets: 16 | 17 | ``` 18 | $ gof3r get -b test-bucket -k 8_GB_tar | pv -a | tar -x 19 | Duration: 53.201632211s 20 | [ 167MB/s] 21 | 22 | 23 | $ tar -cf - test_dir/ | pv -a | gof3r put -b test-bucket -k 8_GB_tar 24 | Duration: 1m16.080800315s 25 | [ 119MB/s] 26 | ``` 27 | 28 | These tests were performed on an m1.xlarge EC2 instance with a virtualized 1 Gigabit ethernet interface. See [Amazon EC2 Instance Details](http://aws.amazon.com/ec2/instance-types/instance-details/) for more information. 29 | 30 | 31 | **Features** 32 | 33 | - *Speed:* Especially for larger s3 objects where parallelism can be exploited, s3gof3r will saturate the bandwidth of an EC2 instance. See the Benchmarks above. 34 | 35 | - *Streaming Uploads and Downloads:* As the above examples illustrate, streaming allows the gof3r command-line tool to be used with linux/unix pipes. This allows transformation of the data in parallel as it is uploaded or downloaded from S3. 36 | 37 | - *End-to-end Integrity Checking:* s3gof3r calculates the md5 hash of the stream in parallel while uploading and downloading. On upload, a file containing the md5 hash is saved in s3. This is checked against the calculated md5 on download. On upload, the content-md5 of each part is calculated and sent with the header to be checked by AWS. s3gof3r also checks the 'hash of hashes' returned by S3 in the `Etag` field on completion of a multipart upload. See the [S3 API Reference](http://docs.aws.amazon.com/AmazonS3/latest/API/mpUploadComplete.html) for details. 38 | 39 | - *Retry Everything:* All http requests and every part is retried on both uploads and downloads. Requests to S3 frequently time out, especially under high load, so this is essential to complete large uploads or downloads. 40 | 41 | - *Memory Efficiency:* Memory used to upload and download parts is recycled. For an upload or download with the default concurrency of 10 and part size of 20 MB, the maximum memory usage is less than 300 MB. Memory footprint can be further reduced by reducing part size or concurrency. 42 | 43 | 44 | 45 | ## Installation 46 | 47 | s3gof3r is written in Go and requires go 1.5 or later. It can be installed with `go get` to download and compile it from source. To install the command-line tool, `gof3r` set `GO15VENDOREXPERIMENT=1` in your environment: 48 | 49 | $ go get github.com/rlmcpherson/s3gof3r/gof3r 50 | 51 | To install just the package for use in other Go programs: 52 | 53 | $ go get github.com/rlmcpherson/s3gof3r 54 | 55 | ### Release Binaries 56 | 57 | To try the latest release of the gof3r command-line interface without installing go, download the statically-linked binary for your architecture from **[Github Releases](https://github.com/rlmcpherson/s3gof3r/releases).** 58 | 59 | 60 | 61 | ## gof3r (command-line interface) usage: 62 | 63 | ``` 64 | To stream up to S3: 65 | $ | gof3r put -b -k 66 | To stream down from S3: 67 | $ gof3r get -b -k | 68 | To upload a file to S3: 69 | $ $ gof3r cp s3:/// 70 | To download a file from S3: 71 | $ gof3r cp s3:/// 72 | ``` 73 | 74 | Set AWS keys as environment Variables: 75 | 76 | ``` 77 | $ export AWS_ACCESS_KEY_ID= 78 | $ export AWS_SECRET_ACCESS_KEY= 79 | ``` 80 | 81 | gof3r also supports [IAM role](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html)-based keys from EC2 instance metadata. If available and environment variables are not set, these keys are used are used automatically. 82 | 83 | Examples: 84 | 85 | ``` 86 | $ tar -cf - /foo_dir/ | gof3r put -b my_s3_bucket -k bar_dir/s3_object -m x-amz-meta-custom-metadata:abc123 -m x-amz-server-side-encryption:AES256 87 | $ gof3r get -b my_s3_bucket -k bar_dir/s3_object | tar -x 88 | ``` 89 | **see the [gof3r man page](http://randallmcpherson.com/gof3r.html) for complete usage** 90 | 91 | 92 | 93 | ## Testing 94 | 95 | In order to test this module, you will need an S3 bucket that you can write to. This can be arranged roughly by following these guides: 96 | 97 | 1. [Set up S3](https://docs.aws.amazon.com/AmazonS3/latest/userguide/setting-up-s3.html) 98 | 99 | 2. [Create a bucket](https://docs.aws.amazon.com/AmazonS3/latest/userguide/creating-bucket.html) to be used for testing. _The tests seem to currently require the bucket to be in the `us-east-1` region_, so place your bucket there. 100 | 101 | 3. (Optional) Create a restricted IAM user with programmatic read/write access to the bucket. The required permissions are described [here](https://docs.aws.amazon.com/AmazonS3/latest/userguide/example-policies-s3.html), though obviously replace `awsexamplebucket1` with the name of your bucket. Record the user's access key ID and secret access key. 102 | 103 | 4. Set up a shell file that exports the bucket name and credentials into the environment: 104 | 105 | ```shell 106 | export TEST_BUCKET= 107 | export AWS_ACCESS_KEY_ID= 108 | export AWS_SECRET_ACCESS_KEY= 109 | ``` 110 | 111 | The bucket name should be the short name; i.e., not the full URL. I'll assume that the file is called `test-creds.sh`. This file contains secrets, so be sure to set restrictive permissions on it and don't commit it to Git. 112 | 113 | 5. Run the tests, first loading the credentials temporarily into the environment: 114 | 115 | ```shell 116 | ( . test-creds.sh && go test ./... ) 117 | ``` 118 | 119 | Note that the tests will read and write smallish amounts of data to the bucket, so this might incur some costs (though the free tier is probably enough for a reasonable amount of testing). 120 | 121 | 122 | 123 | ## Documentation 124 | 125 | **s3gof3r package:** See the [godocs](http://godoc.org/github.com/rlmcpherson/s3gof3r) for api documentation. 126 | 127 | **gof3r cli :** [godoc](http://godoc.org/github.com/rlmcpherson/s3gof3r/gof3r) and [gof3r man page](http://randallmcpherson.com/gof3r.html) 128 | 129 | 130 | Have a question? Ask it on the [s3gof3r Mailing List](https://groups.google.com/forum/#!forum/s3gof3r) 131 | -------------------------------------------------------------------------------- /list_objects.go: -------------------------------------------------------------------------------- 1 | package s3gof3r 2 | 3 | import ( 4 | "context" 5 | "encoding/xml" 6 | "math" 7 | "net/http" 8 | "strconv" 9 | "time" 10 | 11 | "github.com/github/s3gof3r/internal/s3client" 12 | "golang.org/x/sync/errgroup" 13 | ) 14 | 15 | func newObjectLister(c *Config, b *Bucket, prefixes []string, maxKeys int) (*ObjectLister, error) { 16 | cCopy := *c 17 | cCopy.NTry = max(c.NTry, 1) 18 | cCopy.Concurrency = max(c.Concurrency, 1) 19 | 20 | bCopy := *b 21 | 22 | ctx, cancel := context.WithCancel(context.TODO()) 23 | 24 | l := ObjectLister{ 25 | cancel: cancel, 26 | b: &bCopy, 27 | c: &cCopy, 28 | prefixCh: make(chan string, len(prefixes)), 29 | resultCh: make(chan []string, 1), 30 | maxKeys: maxKeys, 31 | } 32 | 33 | // Enqueue all of the prefixes that we were given. This won't 34 | // block because we have initialized `prefixCh` to be long enough 35 | // to hold all of them. This has the added benefit that there is 36 | // no data race if the caller happens to modify the contents of 37 | // the slice after this call returns. 38 | for _, p := range prefixes { 39 | l.prefixCh <- p 40 | } 41 | close(l.prefixCh) 42 | 43 | eg, ctx := errgroup.WithContext(ctx) 44 | 45 | for i := 0; i < min(l.c.Concurrency, len(prefixes)); i++ { 46 | eg.Go(func() error { 47 | return l.worker(ctx) 48 | }) 49 | } 50 | 51 | go func() { 52 | l.finalErr = eg.Wait() 53 | close(l.resultCh) 54 | l.cancel() 55 | }() 56 | 57 | return &l, nil 58 | } 59 | 60 | type ObjectLister struct { 61 | cancel context.CancelFunc 62 | 63 | b *Bucket 64 | c *Config 65 | maxKeys int 66 | 67 | prefixCh chan string 68 | resultCh chan []string 69 | 70 | // finalErr is set before closing `resultCh` if any of the workers 71 | // returned errors. Any subsequent calls to `Next()` report this 72 | // error. 73 | finalErr error 74 | 75 | // currentValue and currentErr are the "results" of the most 76 | // recent call to `Next()`. 77 | currentValue []string 78 | currentErr error 79 | } 80 | 81 | func (l *ObjectLister) worker(ctx context.Context) error { 82 | for p := range l.prefixCh { 83 | var continuation string 84 | retries: 85 | for { 86 | res, err := l.retryListObjects(ctx, p, continuation) 87 | if err != nil { 88 | select { 89 | case <-ctx.Done(): 90 | return ctx.Err() 91 | default: 92 | return err 93 | } 94 | } 95 | 96 | keys := make([]string, 0, len(res.Contents)) 97 | for _, c := range res.Contents { 98 | keys = append(keys, c.Key) 99 | } 100 | 101 | select { 102 | case <-ctx.Done(): 103 | return ctx.Err() 104 | case l.resultCh <- keys: 105 | continuation = res.NextContinuationToken 106 | if continuation != "" { 107 | continue 108 | } 109 | 110 | // Break from this prefix and grab the next one 111 | break retries 112 | } 113 | } 114 | } 115 | 116 | return nil 117 | } 118 | 119 | func (l *ObjectLister) retryListObjects( 120 | ctx context.Context, p, continuation string, 121 | ) (*listBucketResult, error) { 122 | var err error 123 | var res *listBucketResult 124 | var timer *time.Timer 125 | for i := 0; i < l.c.NTry; i++ { 126 | opts := listObjectsOptions{MaxKeys: l.maxKeys, Prefix: p, ContinuationToken: continuation} 127 | res, err = listObjects(l.c, l.b, opts) 128 | if err == nil { 129 | return res, nil 130 | } 131 | 132 | // Exponential back-off, reusing the timer if possible: 133 | duration := time.Duration(math.Exp2(float64(i))) * 100 * time.Millisecond 134 | if timer == nil { 135 | timer = time.NewTimer(duration) 136 | } else { 137 | // The only way to get here is if the timer was created 138 | // during an earlier iteration of the loop, in which case 139 | // the select below must have gone through the `<-timer.C` 140 | // branch, which drained the timer. So it is safe to call 141 | // `Reset()`: 142 | timer.Reset(duration) 143 | } 144 | 145 | select { 146 | case <-timer.C: 147 | // Timer has fired and been drained, so it is ready for reuse. 148 | case <-ctx.Done(): 149 | // Stop the timer to prevent a resource leak: 150 | timer.Stop() 151 | return nil, ctx.Err() 152 | } 153 | } 154 | 155 | return nil, err 156 | } 157 | 158 | // Next moves the iterator to the next set of results. It returns true if there 159 | // are more results, or false if there are no more results or there was an 160 | // error. 161 | func (l *ObjectLister) Next() bool { 162 | var ok bool 163 | l.currentValue, ok = <-l.resultCh 164 | if !ok { 165 | // If there has been an error, we now show it to the caller: 166 | l.currentErr = l.finalErr 167 | return false 168 | } 169 | 170 | return true 171 | } 172 | 173 | func (l *ObjectLister) Value() []string { 174 | return l.currentValue 175 | } 176 | 177 | func (l *ObjectLister) Error() error { 178 | return l.currentErr 179 | } 180 | 181 | func (l *ObjectLister) Close() { 182 | l.cancel() 183 | } 184 | 185 | // ListObjectsOptions specifies the options for a ListObjects operation on a S3 186 | // bucket 187 | type listObjectsOptions struct { 188 | // Maximum number of keys to return per request 189 | MaxKeys int 190 | // Only list those keys that start with the given prefix 191 | Prefix string 192 | // Continuation token from the previous request 193 | ContinuationToken string 194 | } 195 | 196 | type listBucketResult struct { 197 | Name string `xml:"Name"` 198 | Prefix string `xml:"Prefix"` 199 | KeyCount int `xml:"KeyCount"` 200 | MaxKeys int `xml:"MaxKeys"` 201 | IsTruncated bool `xml:"IsTrucated"` 202 | NextContinuationToken string `xml:"NextContinuationToken"` 203 | Contents []listBucketContents `xml:"Contents"` 204 | } 205 | 206 | type listBucketContents struct { 207 | Key string `xml:"Key"` 208 | LastModified time.Time `xml:"LastModified"` 209 | ETag string `xml:"ETag"` 210 | Size int64 `xml:"Size"` 211 | StorageClass string `xml:"StorageClass"` 212 | CommonPrefixes []CommonPrefix `xml:"CommonPrefixes"` 213 | } 214 | 215 | type CommonPrefix struct { 216 | Prefix string `xml:"Prefix"` 217 | } 218 | 219 | type ListObjectsResult struct { 220 | result *listBucketResult 221 | } 222 | 223 | func listObjects(c *Config, b *Bucket, opts listObjectsOptions) (*listBucketResult, error) { 224 | result := new(listBucketResult) 225 | u, err := b.url("", c) 226 | if err != nil { 227 | return nil, err 228 | } 229 | 230 | q := u.Query() 231 | q.Set("list-type", "2") 232 | if opts.MaxKeys > 0 { 233 | q.Set("max-keys", strconv.Itoa(opts.MaxKeys)) 234 | } 235 | if opts.Prefix != "" { 236 | q.Set("prefix", opts.Prefix) 237 | } 238 | if opts.ContinuationToken != "" { 239 | q.Set("continuation-token", opts.ContinuationToken) 240 | } 241 | u.RawQuery = q.Encode() 242 | 243 | r := http.Request{ 244 | Method: "GET", 245 | URL: u, 246 | } 247 | b.Sign(&r) 248 | 249 | resp, err := b.conf().Do(&r) 250 | if err != nil { 251 | return nil, err 252 | } 253 | if resp.StatusCode != 200 { 254 | return nil, s3client.NewRespError(resp) 255 | } 256 | 257 | err = xml.NewDecoder(resp.Body).Decode(result) 258 | closeErr := resp.Body.Close() 259 | if err != nil { 260 | return nil, err 261 | } 262 | if closeErr != nil { 263 | return nil, closeErr 264 | } 265 | 266 | return result, nil 267 | } 268 | -------------------------------------------------------------------------------- /getter.go: -------------------------------------------------------------------------------- 1 | package s3gof3r 2 | 3 | import ( 4 | "crypto/md5" 5 | "fmt" 6 | "hash" 7 | "io" 8 | "io/ioutil" 9 | "math" 10 | "net/http" 11 | "net/url" 12 | "sync" 13 | "syscall" 14 | "time" 15 | 16 | "github.com/github/s3gof3r/internal/pool" 17 | "github.com/github/s3gof3r/internal/s3client" 18 | ) 19 | 20 | const qWaitMax = 2 21 | 22 | type getter struct { 23 | url *url.URL 24 | b *Bucket 25 | bufsz int64 26 | err error 27 | 28 | chunkID int 29 | rChunk *chunk 30 | contentLen int64 31 | bytesRead int64 32 | chunkTotal int 33 | 34 | readCh chan *chunk 35 | getCh chan *chunk 36 | quit chan struct{} 37 | 38 | workerAborted chan struct{} 39 | abortOnce sync.Once 40 | 41 | qWait map[int]*chunk 42 | qWaitLen uint 43 | cond sync.Cond 44 | 45 | sp *pool.BufferPool 46 | 47 | closed bool 48 | c *Config 49 | 50 | md5 hash.Hash 51 | cIdx int64 52 | } 53 | 54 | type chunk struct { 55 | id int 56 | header http.Header 57 | start int64 58 | size int64 59 | b []byte 60 | } 61 | 62 | func newGetter(getURL *url.URL, c *Config, b *Bucket) (io.ReadCloser, http.Header, error) { 63 | g := new(getter) 64 | g.url = getURL 65 | g.b = new(Bucket) 66 | *g.b = *b 67 | g.c = c.safeCopy(1) 68 | g.bufsz = g.c.PartSize 69 | 70 | g.getCh = make(chan *chunk) 71 | g.readCh = make(chan *chunk) 72 | g.quit = make(chan struct{}) 73 | g.workerAborted = make(chan struct{}) 74 | g.qWait = make(map[int]*chunk) 75 | g.b = b 76 | g.md5 = md5.New() 77 | g.cond = sync.Cond{L: &sync.Mutex{}} 78 | 79 | // use get instead of head for error messaging 80 | resp, err := g.retryRequest("GET", g.url.String(), nil) 81 | if err != nil { 82 | return nil, nil, err 83 | } 84 | if resp.StatusCode != 200 { 85 | return nil, nil, s3client.NewRespError(resp) 86 | } 87 | // Otherwise, we don't need the body: 88 | _ = resp.Body.Close() 89 | 90 | // Golang changes content-length to -1 when chunked transfer encoding / EOF close response detected 91 | if resp.ContentLength == -1 { 92 | return nil, nil, fmt.Errorf("Retrieving objects with undefined content-length " + 93 | " responses (chunked transfer encoding / EOF close) is not supported") 94 | } 95 | 96 | g.contentLen = resp.ContentLength 97 | g.chunkTotal = int((g.contentLen + g.bufsz - 1) / g.bufsz) // round up, integer division 98 | logger.debugPrintf("object size: %3.2g MB", float64(g.contentLen)/float64((1*mb))) 99 | 100 | g.sp = pool.NewBufferPool(bufferPoolLogger{}, g.bufsz) 101 | 102 | for i := 0; i < g.c.Concurrency; i++ { 103 | go g.worker() 104 | } 105 | go g.initChunks() 106 | return g, resp.Header, nil 107 | } 108 | 109 | func (g *getter) retryRequest(method, urlStr string, body io.ReadSeeker) (resp *http.Response, err error) { 110 | for i := 0; i < g.c.NTry; i++ { 111 | var req *http.Request 112 | req, err = http.NewRequest(method, urlStr, body) 113 | if err != nil { 114 | return 115 | } 116 | 117 | if body != nil { 118 | req.Header.Set(s3client.SHA256Header, s3client.SHA256Reader(body)) 119 | } 120 | 121 | g.b.Sign(req) 122 | resp, err = g.c.Client.Do(req) 123 | if err == nil && resp.StatusCode == 500 { 124 | time.Sleep(time.Duration(math.Exp2(float64(i))) * 100 * time.Millisecond) // exponential back-off 125 | continue 126 | } 127 | if err == nil { 128 | return 129 | } 130 | logger.debugPrintln(err) 131 | if body != nil { 132 | if _, err = body.Seek(0, 0); err != nil { 133 | return 134 | } 135 | } 136 | } 137 | return 138 | } 139 | 140 | func (g *getter) initChunks() { 141 | id := 0 142 | for i := int64(0); i < g.contentLen; { 143 | size := min64(g.bufsz, g.contentLen-i) 144 | c := &chunk{ 145 | id: id, 146 | header: http.Header{ 147 | "Range": {fmt.Sprintf("bytes=%d-%d", 148 | i, i+size-1)}, 149 | }, 150 | start: i, 151 | size: size, 152 | b: nil, 153 | } 154 | i += size 155 | id++ 156 | g.getCh <- c 157 | } 158 | close(g.getCh) 159 | } 160 | 161 | func (g *getter) worker() { 162 | for c := range g.getCh { 163 | g.retryGetChunk(c) 164 | if g.err != nil { 165 | // tell Read() caller that 1 or more chunks can't be read; abort 166 | g.abortOnce.Do(func() { close(g.workerAborted) }) 167 | break 168 | } 169 | } 170 | } 171 | 172 | func (g *getter) retryGetChunk(c *chunk) { 173 | var err error 174 | c.b = g.sp.Get() 175 | for i := 0; i < g.c.NTry; i++ { 176 | err = g.getChunk(c) 177 | if err == nil { 178 | return 179 | } 180 | logger.debugPrintf("error on attempt %d: retrying chunk: %v, error: %s", i, c.id, err) 181 | time.Sleep(time.Duration(math.Exp2(float64(i))) * 100 * time.Millisecond) // exponential back-off 182 | } 183 | select { 184 | case <-g.quit: // check for closed quit channel before setting error 185 | return 186 | default: 187 | g.err = err 188 | } 189 | } 190 | 191 | func (g *getter) getChunk(c *chunk) error { 192 | // ensure buffer is empty 193 | r, err := http.NewRequest("GET", g.url.String(), nil) 194 | if err != nil { 195 | return err 196 | } 197 | r.Header = c.header 198 | g.b.Sign(r) 199 | resp, err := g.c.Client.Do(r) 200 | if err != nil { 201 | return err 202 | } 203 | if resp.StatusCode != 206 && resp.StatusCode != 200 { 204 | return s3client.NewRespError(resp) 205 | } 206 | 207 | n, err := io.ReadAtLeast(resp.Body, c.b, int(c.size)) 208 | if err != nil { 209 | _ = resp.Body.Close() 210 | return err 211 | } 212 | if err := resp.Body.Close(); err != nil { 213 | return err 214 | } 215 | if int64(n) != c.size { 216 | return fmt.Errorf("chunk %d: Expected %d bytes, received %d", 217 | c.id, c.size, n) 218 | } 219 | g.readCh <- c 220 | 221 | // wait for qWait to drain before starting next chunk 222 | g.cond.L.Lock() 223 | for g.qWaitLen >= qWaitMax { 224 | if g.closed { 225 | return nil 226 | } 227 | g.cond.Wait() 228 | } 229 | g.cond.L.Unlock() 230 | return nil 231 | } 232 | 233 | func (g *getter) Read(p []byte) (int, error) { 234 | var err error 235 | if g.closed { 236 | return 0, syscall.EINVAL 237 | } 238 | if g.err != nil { 239 | return 0, g.err 240 | } 241 | nw := 0 242 | for nw < len(p) { 243 | if g.bytesRead == g.contentLen { 244 | return nw, io.EOF 245 | } else if g.bytesRead > g.contentLen { 246 | // Here for robustness / completeness 247 | // Should not occur as golang uses LimitedReader up to content-length 248 | return nw, fmt.Errorf("Expected %d bytes, received %d (too many bytes)", 249 | g.contentLen, g.bytesRead) 250 | } 251 | 252 | // If for some reason no more chunks to be read and bytes are off, error, incomplete result 253 | if g.chunkID >= g.chunkTotal { 254 | return nw, fmt.Errorf("Expected %d bytes, received %d and chunkID %d >= chunkTotal %d (no more chunks remaining)", 255 | g.contentLen, g.bytesRead, g.chunkID, g.chunkTotal) 256 | } 257 | 258 | if g.rChunk == nil { 259 | g.rChunk, err = g.nextChunk() 260 | if err != nil { 261 | return 0, err 262 | } 263 | g.cIdx = 0 264 | } 265 | 266 | n := copy(p[nw:], g.rChunk.b[g.cIdx:g.rChunk.size]) 267 | g.cIdx += int64(n) 268 | nw += n 269 | g.bytesRead += int64(n) 270 | 271 | if g.cIdx >= g.rChunk.size { // chunk complete 272 | g.sp.Put(g.rChunk.b) 273 | g.chunkID++ 274 | g.rChunk = nil 275 | } 276 | } 277 | return nw, nil 278 | } 279 | 280 | func (g *getter) nextChunk() (*chunk, error) { 281 | for { 282 | // first check qWait 283 | c := g.qWait[g.chunkID] 284 | if c != nil { 285 | delete(g.qWait, g.chunkID) 286 | g.cond.L.Lock() 287 | g.qWaitLen-- 288 | g.cond.L.Unlock() 289 | g.cond.Signal() // wake up waiting worker goroutine 290 | if g.c.Md5Check { 291 | if _, err := g.md5.Write(c.b[:c.size]); err != nil { 292 | return nil, err 293 | } 294 | } 295 | return c, nil 296 | } 297 | // if next chunk not in qWait, read from channel 298 | select { 299 | case c := <-g.readCh: 300 | g.qWait[c.id] = c 301 | g.cond.L.Lock() 302 | g.qWaitLen++ 303 | g.cond.L.Unlock() 304 | case <-g.workerAborted: 305 | return nil, g.err // worker aborted, quit 306 | case <-g.quit: 307 | return nil, g.err // fatal error, quit. 308 | } 309 | } 310 | } 311 | 312 | func (g *getter) Close() error { 313 | if g.closed { 314 | return syscall.EINVAL 315 | } 316 | g.closed = true 317 | g.sp.Close() 318 | close(g.quit) 319 | g.cond.Broadcast() 320 | if g.err != nil { 321 | return g.err 322 | } 323 | if g.bytesRead != g.contentLen { 324 | return fmt.Errorf("read error: %d bytes read. expected: %d", g.bytesRead, g.contentLen) 325 | } 326 | if g.c.Md5Check { 327 | if err := g.checkMd5(); err != nil { 328 | return err 329 | } 330 | } 331 | return nil 332 | } 333 | 334 | func (g *getter) checkMd5() error { 335 | calcMd5 := fmt.Sprintf("%x", g.md5.Sum(nil)) 336 | md5Path := fmt.Sprint(".md5", g.url.Path, ".md5") 337 | md5Url, err := g.b.url(md5Path, g.c) 338 | if err != nil { 339 | return err 340 | } 341 | 342 | logger.debugPrintln("md5: ", calcMd5) 343 | logger.debugPrintln("md5Path: ", md5Path) 344 | resp, err := g.retryRequest("GET", md5Url.String(), nil) 345 | if err != nil { 346 | return err 347 | } 348 | if resp.StatusCode != 200 { 349 | return fmt.Errorf( 350 | "MD5 check failed: %s not found: %s", md5Url.String(), s3client.NewRespError(resp), 351 | ) 352 | } 353 | 354 | givenMd5, err := ioutil.ReadAll(resp.Body) 355 | closeErr := resp.Body.Close() 356 | if err != nil { 357 | return err 358 | } 359 | if closeErr != nil { 360 | return closeErr 361 | } 362 | if calcMd5 != string(givenMd5) { 363 | return fmt.Errorf("MD5 mismatch. given:%s calculated:%s", givenMd5, calcMd5) 364 | } 365 | return nil 366 | } 367 | -------------------------------------------------------------------------------- /s3gof3r.go: -------------------------------------------------------------------------------- 1 | // Package s3gof3r provides fast, parallelized, streaming access to Amazon S3. It includes a command-line interface: `gof3r`. 2 | package s3gof3r 3 | 4 | import ( 5 | "errors" 6 | "fmt" 7 | "io" 8 | "io/ioutil" 9 | "log" 10 | "net/http" 11 | "net/url" 12 | "os" 13 | "path" 14 | "regexp" 15 | "strings" 16 | "time" 17 | 18 | "github.com/github/s3gof3r/internal/s3client" 19 | ) 20 | 21 | const versionParam = "versionId" 22 | 23 | var regionMatcher = regexp.MustCompile(`s3[-.]([a-z0-9-]+)\.amazonaws\.com([.a-z0-9]*)`) 24 | 25 | // S3 contains the domain or endpoint of an S3-compatible service and 26 | // the authentication keys for that service. 27 | type S3 struct { 28 | Domain string // The s3-compatible endpoint. Defaults to "s3.amazonaws.com" 29 | Keys 30 | } 31 | 32 | // Region returns the service region infering it from S3 domain. 33 | func (s *S3) Region() string { 34 | region := os.Getenv("AWS_REGION") 35 | switch s.Domain { 36 | case "s3.amazonaws.com", "s3-external-1.amazonaws.com": 37 | return "us-east-1" 38 | case "s3-accelerate.amazonaws.com": 39 | if region == "" { 40 | panic("can't find endpoint region") 41 | } 42 | return region 43 | default: 44 | regions := regionMatcher.FindStringSubmatch(s.Domain) 45 | if len(regions) < 2 { 46 | if region == "" { 47 | panic("can't find endpoint region") 48 | } 49 | return region 50 | } 51 | return regions[1] 52 | } 53 | } 54 | 55 | // A Bucket for an S3 service. 56 | type Bucket struct { 57 | *S3 58 | Name string 59 | *Config 60 | } 61 | 62 | // Config includes configuration parameters for s3gof3r 63 | type Config struct { 64 | *http.Client // http client to use for requests 65 | Concurrency int // number of parts to get or put concurrently 66 | PartSize int64 // initial part size in bytes to use for multipart gets or puts 67 | NTry int // maximum attempts for each part 68 | Md5Check bool // The md5 hash of the object is stored in /.md5/.md5 69 | // When true, it is stored on puts and verified on gets 70 | Scheme string // url scheme, defaults to 'https' 71 | PathStyle bool // use path style bucket addressing instead of virtual host style 72 | } 73 | 74 | // DefaultConfig contains defaults used if *Config is nil 75 | var DefaultConfig = &Config{ 76 | Concurrency: 10, 77 | PartSize: 20 * mb, 78 | NTry: 10, 79 | Md5Check: true, 80 | Scheme: "https", 81 | Client: ClientWithTimeout(clientTimeout), 82 | } 83 | 84 | // safeCopy returns a pointer to a fresh copy of `c`, with some 85 | // parameters adjusted to be within allowable limits. 86 | func (c *Config) safeCopy(minPartSize int64) *Config { 87 | cCopy := *c 88 | cCopy.Concurrency = max(c.Concurrency, 1) 89 | cCopy.NTry = max(c.NTry, 1) 90 | cCopy.PartSize = max64(minPartSize, cCopy.PartSize) 91 | return &cCopy 92 | } 93 | 94 | // http client timeout 95 | const clientTimeout = 5 * time.Second 96 | 97 | // DefaultDomain is set to the endpoint for the U.S. S3 service. 98 | var DefaultDomain = "s3.amazonaws.com" 99 | 100 | // New Returns a new S3 101 | // domain defaults to DefaultDomain if empty 102 | func New(domain string, keys Keys) *S3 { 103 | if domain == "" { 104 | domain = DefaultDomain 105 | } 106 | return &S3{domain, keys} 107 | } 108 | 109 | // Bucket returns a bucket on s3 110 | // Bucket Config is initialized to DefaultConfig 111 | func (s *S3) Bucket(name string) *Bucket { 112 | return &Bucket{ 113 | S3: s, 114 | Name: name, 115 | Config: DefaultConfig, 116 | } 117 | } 118 | 119 | // GetReader provides a reader and downloads data using parallel ranged get requests. 120 | // Data from the requests are ordered and written sequentially. 121 | // 122 | // Data integrity is verified via the option specified in c. 123 | // Header data from the downloaded object is also returned, useful for reading object metadata. 124 | // DefaultConfig is used if c is nil 125 | // Callers should call Close on r to ensure that all resources are released. 126 | // 127 | // To specify an object version in a versioned bucket, the version ID may be included in the path as a url parameter. See http://docs.aws.amazon.com/AmazonS3/latest/dev/RetrievingObjectVersions.html 128 | func (b *Bucket) GetReader(path string, c *Config) (r io.ReadCloser, h http.Header, err error) { 129 | if path == "" { 130 | return nil, nil, errors.New("empty path requested") 131 | } 132 | if c == nil { 133 | c = b.conf() 134 | } 135 | u, err := b.url(path, c) 136 | if err != nil { 137 | return nil, nil, err 138 | } 139 | return newGetter(u, c, b) 140 | } 141 | 142 | // PutWriter provides a writer to upload data as multipart upload requests. 143 | // 144 | // Each header in h is added to the HTTP request header. This is useful for specifying 145 | // options such as server-side encryption in metadata as well as custom user metadata. 146 | // DefaultConfig is used if c is nil. 147 | // Callers should call Close on w to ensure that all resources are released. 148 | func (b *Bucket) PutWriter(path string, h http.Header, c *Config) (w io.WriteCloser, err error) { 149 | if c == nil { 150 | c = b.conf() 151 | } 152 | blobURL, err := b.url(path, c) 153 | if err != nil { 154 | return nil, err 155 | } 156 | 157 | var md5URL *url.URL 158 | if c.Md5Check { 159 | md5Path := fmt.Sprint(".md5", blobURL.Path, ".md5") 160 | var err error 161 | md5URL, err = b.url(md5Path, c) 162 | if err != nil { 163 | return nil, err 164 | } 165 | } 166 | 167 | client := s3client.New(blobURL, md5URL, b, c.Client, c.NTry, bufferPoolLogger{}) 168 | 169 | return newPutter(client, h, c) 170 | } 171 | 172 | // url returns a parsed url to the given path. c must not be nil 173 | func (b *Bucket) url(bPath string, c *Config) (*url.URL, error) { 174 | 175 | // parse versionID parameter from path, if included 176 | // See https://github.com/rlmcpherson/s3gof3r/issues/84 for rationale 177 | purl, err := url.Parse(bPath) 178 | if err != nil { 179 | return nil, err 180 | } 181 | var vals url.Values 182 | if v := purl.Query().Get(versionParam); v != "" { 183 | vals = make(url.Values) 184 | vals.Add(versionParam, v) 185 | bPath = strings.Split(bPath, "?")[0] // remove versionID from path 186 | } 187 | 188 | // handling for bucket names containing periods / explicit PathStyle addressing 189 | // http://docs.aws.amazon.com/AmazonS3/latest/dev/BucketRestrictions.html for details 190 | if strings.Contains(b.Name, ".") || c.PathStyle { 191 | return &url.URL{ 192 | Host: b.S3.Domain, 193 | Scheme: c.Scheme, 194 | Path: path.Clean(fmt.Sprintf("/%s/%s", b.Name, bPath)), 195 | RawQuery: vals.Encode(), 196 | }, nil 197 | } else { 198 | return &url.URL{ 199 | Scheme: c.Scheme, 200 | Path: path.Clean(fmt.Sprintf("/%s", bPath)), 201 | Host: path.Clean(fmt.Sprintf("%s.%s", b.Name, b.S3.Domain)), 202 | RawQuery: vals.Encode(), 203 | }, nil 204 | } 205 | } 206 | 207 | func (b *Bucket) conf() *Config { 208 | c := b.Config 209 | if c == nil { 210 | c = DefaultConfig 211 | } 212 | return c 213 | } 214 | 215 | // Delete deletes the key at path 216 | // If the path does not exist, Delete returns nil (no error). 217 | func (b *Bucket) Delete(path string) error { 218 | if err := b.delete(path); err != nil { 219 | return err 220 | } 221 | // try to delete md5 file 222 | if b.Md5Check { 223 | if err := b.delete(fmt.Sprintf("/.md5/%s.md5", path)); err != nil { 224 | return err 225 | } 226 | } 227 | 228 | logger.Printf("%s deleted from %s\n", path, b.Name) 229 | return nil 230 | } 231 | 232 | func (b *Bucket) delete(path string) error { 233 | u, err := b.url(path, b.conf()) 234 | if err != nil { 235 | return err 236 | } 237 | r := http.Request{ 238 | Method: "DELETE", 239 | URL: u, 240 | } 241 | b.Sign(&r) 242 | resp, err := b.conf().Do(&r) 243 | if err != nil { 244 | return err 245 | } 246 | if resp.StatusCode != 204 { 247 | return s3client.NewRespError(resp) 248 | } 249 | if err := resp.Body.Close(); err != nil { 250 | return err 251 | } 252 | 253 | return nil 254 | } 255 | 256 | // ListObjects returns a list of objects under the given prefixes using parallel 257 | // requests for each prefix and any continuations. 258 | // 259 | // maxKeys indicates how many keys should be returned per request 260 | func (b *Bucket) ListObjects(prefixes []string, maxKeys int, c *Config) (*ObjectLister, error) { 261 | if c == nil { 262 | c = b.conf() 263 | } 264 | 265 | return newObjectLister(c, b, prefixes, maxKeys) 266 | } 267 | 268 | // DeleteMultiple deletes multiple keys in a single request. 269 | // 270 | // If 'quiet' is false, the result includes the requested paths and whether they 271 | // were deleted. 272 | func (b *Bucket) DeleteMultiple(quiet bool, keys ...string) (DeleteResult, error) { 273 | // We also want to try to delete the corresponding md5 files 274 | if b.Md5Check { 275 | md5Keys := make([]string, 0, len(keys)) 276 | for _, key := range keys { 277 | md5Keys = append(md5Keys, fmt.Sprintf("/.md5/%s.md", key)) 278 | } 279 | keys = append(keys, md5Keys...) 280 | } 281 | 282 | return deleteMultiple(b.conf(), b, quiet, keys) 283 | } 284 | 285 | // SetLogger wraps the standard library log package. 286 | // 287 | // It allows the internal logging of s3gof3r to be set to a desired output and format. 288 | // Setting debug to true enables debug logging output. s3gof3r does not log output by default. 289 | func SetLogger(out io.Writer, prefix string, flag int, debug bool) { 290 | logger = internalLogger{ 291 | log.New(out, prefix, flag), 292 | debug, 293 | } 294 | } 295 | 296 | type internalLogger struct { 297 | *log.Logger 298 | debug bool 299 | } 300 | 301 | var logger internalLogger 302 | 303 | func (l *internalLogger) debugPrintln(v ...interface{}) { 304 | if logger.debug { 305 | logger.Println(v...) 306 | } 307 | } 308 | 309 | func (l *internalLogger) debugPrintf(format string, v ...interface{}) { 310 | if logger.debug { 311 | logger.Printf(format, v...) 312 | } 313 | } 314 | 315 | // Initialize internal logger to log to no-op (ioutil.Discard) by default. 316 | func init() { 317 | logger = internalLogger{ 318 | log.New(ioutil.Discard, "", log.LstdFlags), 319 | false, 320 | } 321 | } 322 | -------------------------------------------------------------------------------- /internal/s3client/s3client.go: -------------------------------------------------------------------------------- 1 | package s3client 2 | 3 | import ( 4 | "bytes" 5 | "crypto/sha256" 6 | "encoding/hex" 7 | "encoding/xml" 8 | "errors" 9 | "fmt" 10 | "io" 11 | "math" 12 | "net/http" 13 | "net/url" 14 | "strconv" 15 | "strings" 16 | "time" 17 | ) 18 | 19 | const ( 20 | MD5Header = "content-md5" 21 | SHA256Header = "X-Amz-Content-Sha256" 22 | ) 23 | 24 | // Client is a Client that encapsules low-level interactions with a 25 | // specific, single blob. 26 | type Client struct { 27 | url *url.URL 28 | md5URL *url.URL 29 | signer signer 30 | httpClient *http.Client // http client to use for requests 31 | nTry int 32 | logger logger 33 | } 34 | 35 | func New( 36 | url, md5URL *url.URL, signer signer, httpClient *http.Client, nTry int, logger logger, 37 | ) *Client { 38 | c := Client{ 39 | url: url, 40 | md5URL: md5URL, 41 | signer: signer, 42 | httpClient: httpClient, 43 | nTry: nTry, 44 | logger: logger, 45 | } 46 | return &c 47 | } 48 | 49 | func (c *Client) StartMultipartUpload(h http.Header) (string, error) { 50 | resp, err := c.retryRequest("POST", c.url.String()+"?uploads", nil, h) 51 | if err != nil { 52 | return "", err 53 | } 54 | if resp.StatusCode != 200 { 55 | return "", NewRespError(resp) 56 | } 57 | 58 | var r struct { 59 | UploadID string `xml:"UploadId"` 60 | } 61 | 62 | err = xml.NewDecoder(resp.Body).Decode(&r) 63 | closeErr := resp.Body.Close() 64 | if err != nil { 65 | return "", err 66 | } 67 | if closeErr != nil { 68 | _ = c.AbortMultipartUpload(r.UploadID) 69 | return "", closeErr 70 | } 71 | return r.UploadID, nil 72 | } 73 | 74 | type Part struct { 75 | Data []byte 76 | 77 | // Read by xml encoder 78 | PartNumber int 79 | ETag string 80 | 81 | // Checksums 82 | MD5 string 83 | SHA256 string 84 | } 85 | 86 | // UploadPart uploads a part of a multipart upload, checking the etag 87 | // returned by S3 against the calculated value, including retries. 88 | func (c *Client) UploadPart(uploadID string, part *Part) error { 89 | var err error 90 | for i := 0; i < c.nTry; i++ { 91 | err = c.uploadPartAttempt(uploadID, part) 92 | if err == nil { 93 | return nil 94 | } 95 | c.logger.Printf( 96 | "Error on attempt %d: Retrying part: %d, Error: %s", i, part.PartNumber, err, 97 | ) 98 | // Exponential back-off: 99 | time.Sleep(time.Duration(math.Exp2(float64(i))) * 100 * time.Millisecond) 100 | } 101 | return err 102 | } 103 | 104 | // uploadPartAttempt makes one attempt to upload a part of a multipart 105 | // upload, checking the etag returned by S3 against the calculated 106 | // value. 107 | func (c *Client) uploadPartAttempt(uploadID string, part *Part) error { 108 | v := url.Values{} 109 | v.Set("partNumber", strconv.Itoa(part.PartNumber)) 110 | v.Set("uploadId", uploadID) 111 | req, err := http.NewRequest("PUT", c.url.String()+"?"+v.Encode(), bytes.NewReader(part.Data)) 112 | if err != nil { 113 | return err 114 | } 115 | req.ContentLength = int64(len(part.Data)) 116 | req.Header.Set(MD5Header, part.MD5) 117 | req.Header.Set(SHA256Header, part.SHA256) 118 | c.signer.Sign(req) 119 | resp, err := c.httpClient.Do(req) 120 | if err != nil { 121 | return err 122 | } 123 | if resp.StatusCode != 200 { 124 | return NewRespError(resp) 125 | } 126 | if err := resp.Body.Close(); err != nil { 127 | return err 128 | } 129 | 130 | s := resp.Header.Get("etag") 131 | if len(s) < 2 { 132 | return fmt.Errorf("Got Bad etag:%s", s) 133 | } 134 | s = s[1 : len(s)-1] // includes quote chars for some reason 135 | if part.ETag != s { 136 | return fmt.Errorf("Response etag does not match. Remote:%s Calculated:%s", s, part.ETag) 137 | } 138 | return nil 139 | } 140 | 141 | // CompleteMultipartUpload completes a multiline upload, using 142 | // `parts`, which have been uploaded already. Retry on errors. On 143 | // success, return the etag that was returned by S3. 144 | func (c *Client) CompleteMultipartUpload(uploadID string, parts []*Part) (string, error) { 145 | attemptsLeft := 5 146 | for { 147 | eTag, retryable, err := c.completeMultipartUpload(uploadID, parts) 148 | if err == nil { 149 | // Success! 150 | return eTag, nil 151 | } 152 | 153 | attemptsLeft-- 154 | if !retryable || attemptsLeft == 0 { 155 | return "", err 156 | } 157 | } 158 | } 159 | 160 | // completeMultipartUpload makes one attempt at completing a multiline 161 | // upload, using `parts`, which have been uploaded already. Return: 162 | // 163 | // * `eTag, false, nil` on success; 164 | // * `"", true, err` if there was a retryable error; 165 | // * `"", false, err` if there was an unretryable error. 166 | func (c *Client) completeMultipartUpload(uploadID string, parts []*Part) (string, bool, error) { 167 | type xmlPart struct { 168 | PartNumber int 169 | ETag string 170 | } 171 | 172 | var xmlParts struct { 173 | XMLName string `xml:"CompleteMultipartUpload"` 174 | Part []xmlPart 175 | } 176 | xmlParts.Part = make([]xmlPart, len(parts)) 177 | for i, part := range parts { 178 | xmlParts.Part[i] = xmlPart{ 179 | PartNumber: part.PartNumber, 180 | ETag: part.ETag, 181 | } 182 | } 183 | 184 | body, err := xml.Marshal(xmlParts) 185 | if err != nil { 186 | return "", false, err 187 | } 188 | 189 | b := bytes.NewReader(body) 190 | v := url.Values{} 191 | v.Set("uploadId", uploadID) 192 | 193 | resp, err := c.retryRequest("POST", c.url.String()+"?"+v.Encode(), b, nil) 194 | if err != nil { 195 | // If the connection got closed (firwall, proxy, etc.) 196 | // we should also retry, just like if we'd had a 500. 197 | if err == io.ErrUnexpectedEOF { 198 | return "", true, err 199 | } 200 | 201 | return "", false, err 202 | } 203 | if resp.StatusCode != 200 { 204 | return "", false, NewRespError(resp) 205 | } 206 | 207 | // S3 will return an error under a 200 as well. Instead of the 208 | // CompleteMultipartUploadResult that we expect below, we might be 209 | // getting an Error, e.g. with InternalError under it. We should behave 210 | // in that case as though we received a 500 and try again. 211 | 212 | var r struct { 213 | ETag string 214 | Code string 215 | } 216 | 217 | err = xml.NewDecoder(resp.Body).Decode(&r) 218 | closeErr := resp.Body.Close() 219 | if err != nil { 220 | // The decoder unfortunately returns string error 221 | // instead of specific errors. 222 | if err.Error() == "unexpected EOF" { 223 | return "", true, err 224 | } 225 | 226 | return "", false, err 227 | } 228 | if closeErr != nil { 229 | return "", true, closeErr 230 | } 231 | 232 | // This is what S3 returns instead of a 500 when we should try 233 | // to complete the multipart upload again 234 | if r.Code == "InternalError" { 235 | return "", true, errors.New("S3 internal error") 236 | } 237 | // Some other generic error 238 | if r.Code != "" { 239 | return "", false, fmt.Errorf("CompleteMultipartUpload error: %s", r.Code) 240 | } 241 | 242 | return strings.Trim(r.ETag, "\""), false, nil 243 | } 244 | 245 | // AbortMultipartUpload aborts a multipart upload, discarding any 246 | // partly-uploaded contents. 247 | func (c *Client) AbortMultipartUpload(uploadID string) error { 248 | v := url.Values{} 249 | v.Set("uploadId", uploadID) 250 | s := c.url.String() + "?" + v.Encode() 251 | resp, err := c.retryRequest("DELETE", s, nil, nil) 252 | if err != nil { 253 | return err 254 | } 255 | if resp.StatusCode != 204 { 256 | return NewRespError(resp) 257 | } 258 | _ = resp.Body.Close() 259 | 260 | return nil 261 | } 262 | 263 | // PutMD5 attempts to write an md5 file in a ".md5" subdirectory of 264 | // the directory where the blob is stored, with retries. For example, 265 | // the md5 for blob https://mybucket.s3.amazonaws.com/gof3r will be 266 | // stored in https://mybucket.s3.amazonaws.com/.md5/gof3r.md5. 267 | func (c *Client) PutMD5(sum string) error { 268 | c.logger.Printf("md5: %s", sum) 269 | c.logger.Printf("md5Path: %s", c.md5URL.Path) 270 | var err error 271 | for i := 0; i < c.nTry; i++ { 272 | err = c.putMD5(sum) 273 | if err == nil { 274 | break 275 | } 276 | } 277 | return err 278 | } 279 | 280 | // putMD5 makes one attempt to write an md5 file in a ".md5" 281 | // subdirectory of the directory where the blob is stored; e.g., the 282 | // md5 for blob https://mybucket.s3.amazonaws.com/gof3r will be stored 283 | // in https://mybucket.s3.amazonaws.com/.md5/gof3r.md5. 284 | func (c *Client) putMD5(md5 string) error { 285 | md5Reader := strings.NewReader(md5) 286 | 287 | r, err := http.NewRequest("PUT", c.md5URL.String(), md5Reader) 288 | if err != nil { 289 | return err 290 | } 291 | c.signer.Sign(r) 292 | resp, err := c.httpClient.Do(r) 293 | if err != nil { 294 | return err 295 | } 296 | if resp.StatusCode != 200 { 297 | return NewRespError(resp) 298 | } 299 | if err := resp.Body.Close(); err != nil { 300 | return err 301 | } 302 | 303 | return nil 304 | } 305 | 306 | var err500 = errors.New("received 500 from server") 307 | 308 | func (c *Client) retryRequest( 309 | method, urlStr string, body io.ReadSeeker, h http.Header, 310 | ) (*http.Response, error) { 311 | attempt := 0 312 | for { 313 | req, err := http.NewRequest(method, urlStr, body) 314 | if err != nil { 315 | return nil, err 316 | } 317 | for k := range h { 318 | for _, v := range h[k] { 319 | req.Header.Add(k, v) 320 | } 321 | } 322 | 323 | if body != nil { 324 | req.Header.Set(SHA256Header, SHA256Reader(body)) 325 | } 326 | 327 | c.signer.Sign(req) 328 | resp, err := c.httpClient.Do(req) 329 | if err == nil && resp.StatusCode == 500 { 330 | _ = resp.Body.Close() 331 | err = err500 332 | // Exponential back-off: 333 | time.Sleep(time.Duration(math.Exp2(float64(attempt))) * 100 * time.Millisecond) 334 | } 335 | if err == nil { 336 | // Success! 337 | return resp, nil 338 | } 339 | 340 | c.logger.Printf("%v", err) 341 | 342 | attempt++ 343 | if attempt >= c.nTry { 344 | return nil, err 345 | } 346 | 347 | // Rewind the body so that it can be replayed for the next 348 | // attempt: 349 | if body != nil { 350 | if _, err = body.Seek(0, 0); err != nil { 351 | return nil, err 352 | } 353 | } 354 | } 355 | } 356 | 357 | type signer interface { 358 | Sign(*http.Request) 359 | } 360 | 361 | type logger interface { 362 | Printf(format string, a ...interface{}) 363 | } 364 | 365 | // Return the SHA-256 checksum of the contents of `r` in hex format, 366 | // then seek back to the original location in `r`. 367 | func SHA256Reader(r io.ReadSeeker) string { 368 | hash := sha256.New() 369 | start, _ := r.Seek(0, 1) 370 | defer r.Seek(start, 0) 371 | 372 | io.Copy(hash, r) 373 | sum := hash.Sum(nil) 374 | return hex.EncodeToString(sum) 375 | } 376 | -------------------------------------------------------------------------------- /putter.go: -------------------------------------------------------------------------------- 1 | package s3gof3r 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "crypto/md5" 7 | "crypto/sha256" 8 | "encoding/base64" 9 | "encoding/hex" 10 | "errors" 11 | "fmt" 12 | "hash" 13 | "io" 14 | "net/http" 15 | "runtime" 16 | "strings" 17 | "sync" 18 | "syscall" 19 | 20 | "github.com/github/s3gof3r/internal/pool" 21 | "github.com/github/s3gof3r/internal/s3client" 22 | "golang.org/x/sync/errgroup" 23 | ) 24 | 25 | // defined by amazon 26 | const ( 27 | minPartSize = 5 * mb 28 | maxPartSize = 5 * gb 29 | maxObjSize = 5 * tb 30 | maxNPart = 10000 31 | ) 32 | 33 | type s3Putter interface { 34 | StartMultipartUpload(h http.Header) (string, error) 35 | UploadPart(uploadID string, part *s3client.Part) error 36 | CompleteMultipartUpload(uploadID string, parts []*s3client.Part) (string, error) 37 | AbortMultipartUpload(uploadID string) error 38 | PutMD5(md5 string) error 39 | } 40 | 41 | // putter is an `io.Writer` that uploads the data written to it to an 42 | // S3 blob. 43 | // 44 | // Data flow for data written via `putter.Write()`: 45 | // 46 | // receive putPart() 47 | // +----------+ +----+ 48 | // > | worker() | --> | S3 | 49 | // p.pw.Write() pr.Read() send / +----------+ +----+ 50 | // +--------+ +---------+ +--------------+ +------+ / +----------+ +----+ 51 | // | caller | --> | io.Pipe | --> | queueParts() | --> | ch | --> | worker() | --> | S3 | 52 | // +--------+ +---------+ +--------------+ +------+ \ +----------+ +----+ 53 | // | \ +----------+ +----+ 54 | // | > | worker() | --> | S3 | 55 | // hashContent() +----------+ +----+ 56 | // | 57 | // v 58 | // +-------+ Close() +----+ 59 | // | p.xml | -------------------------------------> | S3 | 60 | // +-------+ +----+ 61 | // 62 | // The normal shutdown sequence: 63 | // 64 | // * The caller invokes `p.Close()`. 65 | // 66 | // * This closes `p.pr`, the write end of the pipe, which causes 67 | // `queueParts()` to read an EOF and return. 68 | // 69 | // * The `queueParts()` goroutine closes the read end of the pipe 70 | // (which makes any future calls to `p.Write()` fail) and closes 71 | // `p.ch`. 72 | // 73 | // * The closure of `p.ch` causes the `worker()` invocations to 74 | // return. 75 | // 76 | // * When all of the above goroutines finish, `p.eg.Wait()` returns, 77 | // allowing the `CompleteMultipartUpload` step to proceed. 78 | // 79 | // If an error occurs in one of the goroutines, the goroutine returns 80 | // an error, which causes the `errgroup.Group` to cancel its context, 81 | // causing most of the other goroutines to exit promptly. The only 82 | // tricky one is `queueParts()`, which might be blocked reading from 83 | // the read end of the pipe. So an extra goroutine waits on 84 | // `ctx.Done()` and then closes the write end of the pipe if it hasn't 85 | // already been closed by `p.Close()`. 86 | type putter struct { 87 | cancel context.CancelFunc 88 | 89 | c *Config 90 | client s3Putter 91 | 92 | pw *io.PipeWriter 93 | 94 | bufsz int64 95 | closeOnce sync.Once 96 | eg *errgroup.Group 97 | md5OfParts hash.Hash 98 | md5 hash.Hash 99 | eTag string 100 | 101 | sp *pool.BufferPool 102 | 103 | uploadID string 104 | parts []*s3client.Part 105 | putsz int64 106 | } 107 | 108 | // Sends an S3 multipart upload initiation request. 109 | // See http://docs.amazonwebservices.com/AmazonS3/latest/dev/mpuoverview.html. 110 | // The initial request returns an UploadId that we use to identify 111 | // subsequent PUT requests. 112 | func newPutter(client s3Putter, h http.Header, c *Config) (*putter, error) { 113 | ctx, cancel := context.WithCancel(context.Background()) 114 | eg, ctx := errgroup.WithContext(ctx) 115 | 116 | c = c.safeCopy(minPartSize) 117 | p := putter{ 118 | cancel: cancel, 119 | c: c, 120 | client: client, 121 | bufsz: c.PartSize, 122 | eg: eg, 123 | md5OfParts: md5.New(), 124 | md5: md5.New(), 125 | } 126 | 127 | var err error 128 | p.uploadID, err = p.client.StartMultipartUpload(h) 129 | if err != nil { 130 | p.cancel() 131 | return nil, err 132 | } 133 | 134 | p.sp = pool.NewBufferPool(bufferPoolLogger{}, p.bufsz) 135 | pr, pw := io.Pipe() 136 | p.pw = pw 137 | 138 | ch := make(chan *s3client.Part) 139 | 140 | p.eg.Go(func() error { 141 | err = p.queueParts(ctx, pr, ch) 142 | p.closeOnce.Do(func() { pr.Close() }) 143 | close(ch) 144 | return err 145 | }) 146 | 147 | for i := 0; i < p.c.Concurrency; i++ { 148 | p.eg.Go( 149 | func() error { 150 | err := p.worker(ch) 151 | if err != nil { 152 | // If a worker finishes with an error, close `pr` 153 | // with the same error, so that writers are 154 | // unblocked and also return that error. 155 | p.closeOnce.Do(func() { pr.CloseWithError(err) }) 156 | } 157 | return err 158 | }, 159 | ) 160 | } 161 | 162 | // If the context is cancelled before `p.Close()` is called, close 163 | // `p.pw` to unblock `queueParts()` (which might be waiting on 164 | // `pr.Read()`). This also prevents any more successful calls to 165 | // `Write()`. 166 | go func() { 167 | <-ctx.Done() 168 | p.closeOnce.Do(func() { 169 | p.pw.CloseWithError(errors.New("upload aborted")) 170 | }) 171 | }() 172 | 173 | return &p, nil 174 | } 175 | 176 | func (p *putter) Write(b []byte) (int, error) { 177 | n, err := p.pw.Write(b) 178 | if err == io.ErrClosedPipe { 179 | // For backwards compatibility: 180 | err = syscall.EINVAL 181 | } 182 | return n, err 183 | } 184 | 185 | // queueParts reads from `r`, breaks the input into parts of size (at 186 | // most) `p.bufsz`, adds the data to the hash, and passes each part to 187 | // `p.ch` to be uploaded by the workers. It terminates when it has 188 | // exausted the input or experiences a read error. 189 | func (p *putter) queueParts(ctx context.Context, r io.Reader, ch chan<- *s3client.Part) error { 190 | for { 191 | buf := p.sp.Get() 192 | if int64(cap(buf)) != p.bufsz { 193 | buf = make([]byte, p.bufsz) 194 | runtime.GC() 195 | } 196 | n, err := io.ReadFull(r, buf) 197 | lastPart := false 198 | switch err { 199 | case nil: 200 | // No error. Send this part then continue looping. 201 | case io.EOF: 202 | if len(p.parts) > 0 { 203 | // There was an EOF immediately after the previous 204 | // part. This new part would be empty, so we don't 205 | // have to send it. 206 | return nil 207 | } 208 | // The file was zero length. In this case, we have to 209 | // upload the zero-length part, but then we're done: 210 | lastPart = true 211 | case io.ErrUnexpectedEOF: 212 | // The input was exhausted but only partly filled this 213 | // part. Send what we have, then we're done. 214 | lastPart = true 215 | default: 216 | // There was some other kind of error: 217 | return err 218 | } 219 | 220 | part, err := p.addPart(buf[:n]) 221 | if err != nil { 222 | return err 223 | } 224 | 225 | select { 226 | case ch <- part: 227 | case <-ctx.Done(): 228 | return ctx.Err() 229 | } 230 | 231 | if lastPart { 232 | return nil 233 | } 234 | 235 | // if necessary, double buffer size every 2000 parts due to the 10000-part AWS limit 236 | // to reach the 5 Terabyte max object size, initial part size must be ~85 MB 237 | count := len(p.parts) 238 | if count%2000 == 0 && count < maxNPart && growPartSize(count, p.bufsz, p.putsz) { 239 | p.bufsz = min64(p.bufsz*2, maxPartSize) 240 | p.sp.SetBufferSize(p.bufsz) // update pool buffer size 241 | logger.debugPrintf("part size doubled to %d", p.bufsz) 242 | } 243 | } 244 | } 245 | 246 | // newPart creates a new "multipart upload" part containing the bytes 247 | // in `buf`, assigns it a part number, hashes its contents into 248 | // `p.md5`, adds it to `p.xml.Part`, and returns it. It does not do 249 | // anything to cause the part to get uploaded. FIXME: the part is 250 | // returned even if there is an error hashing the data. 251 | func (p *putter) addPart(buf []byte) (*s3client.Part, error) { 252 | p.putsz += int64(len(buf)) 253 | part := &s3client.Part{ 254 | Data: buf, 255 | PartNumber: len(p.parts) + 1, 256 | } 257 | var err error 258 | part.MD5, part.SHA256, part.ETag, err = p.hashContent(part.Data) 259 | 260 | p.parts = append(p.parts, part) 261 | 262 | return part, err 263 | } 264 | 265 | // worker receives parts from `p.ch` that are ready to upload, and 266 | // uploads them to S3 as file parts. Then it recycles the part's 267 | // buffer back to the buffer pool. 268 | func (p *putter) worker(ch <-chan *s3client.Part) error { 269 | for part := range ch { 270 | err := p.client.UploadPart(p.uploadID, part) 271 | if err != nil { 272 | return err 273 | } 274 | 275 | // Give the buffer back to the pool, first making sure 276 | // that its length is set to its full capacity: 277 | p.sp.Put(part.Data[:cap(part.Data)]) 278 | part.Data = nil 279 | } 280 | return nil 281 | } 282 | 283 | func (p *putter) Close() error { 284 | defer p.cancel() 285 | defer p.sp.Close() 286 | 287 | cleanup := func() { 288 | p.cancel() 289 | p.eg.Wait() 290 | if p.uploadID != "" { 291 | err := p.client.AbortMultipartUpload(p.uploadID) 292 | if err != nil { 293 | logger.Printf("Error aborting multipart upload: %v\n", err) 294 | } 295 | } 296 | } 297 | 298 | // Closing `p.pw` prevents any future `Write()` calls from 299 | // succeeding and tells `queueParts()` that no more data is 300 | // coming: 301 | var err error 302 | p.closeOnce.Do(func() { 303 | err = p.pw.Close() 304 | }) 305 | if err != nil { 306 | cleanup() 307 | return errors.New("unexpected error closing internal pipe") 308 | } 309 | 310 | err = p.eg.Wait() 311 | if err != nil { 312 | cleanup() 313 | return err 314 | } 315 | 316 | eTag, err := p.client.CompleteMultipartUpload(p.uploadID, p.parts) 317 | if err != nil { 318 | cleanup() 319 | return err 320 | } 321 | p.eTag = eTag 322 | 323 | if err := p.checkMd5sOfParts(); err != nil { 324 | cleanup() 325 | return err 326 | } 327 | 328 | if p.c.Md5Check { 329 | sum := fmt.Sprintf("%x", p.md5.Sum(nil)) 330 | // FIXME: should this error really be ignored? 331 | _ = p.client.PutMD5(sum) 332 | } 333 | return nil 334 | } 335 | 336 | // checkMd5sOfParts checks the md5 hash of the concatenated part md5 337 | // hashes against the returned ETag. More info: 338 | // https://forums.aws.amazon.com/thread.jspa?messageID=456442񯛺 339 | func (p *putter) checkMd5sOfParts() error { 340 | // Get the MD5 of the part checksums that we've been computing as 341 | // parts were added: 342 | calculatedMd5ofParts := fmt.Sprintf("%x", p.md5OfParts.Sum(nil)) 343 | 344 | // Find the comparable hash in the ETag returned from S3: 345 | remoteMd5ofParts := p.eTag 346 | remoteMd5ofParts = strings.Split(remoteMd5ofParts, "-")[0] 347 | if len(remoteMd5ofParts) == 0 { 348 | return fmt.Errorf("Nil ETag") 349 | } 350 | 351 | if calculatedMd5ofParts != remoteMd5ofParts { 352 | return fmt.Errorf("MD5 hash of part hashes comparison failed. Hash from multipart complete header: %s."+ 353 | " Calculated multipart hash: %s.", remoteMd5ofParts, calculatedMd5ofParts) 354 | } 355 | 356 | return nil 357 | } 358 | 359 | // Md5 functions 360 | func (p *putter) hashContent(buf []byte) (string, string, string, error) { 361 | m := md5.New() 362 | s := sha256.New() 363 | mw := io.MultiWriter(m, s, p.md5) 364 | if _, err := io.Copy(mw, bytes.NewReader(buf)); err != nil { 365 | return "", "", "", err 366 | } 367 | md5Sum := m.Sum(nil) 368 | shaSum := hex.EncodeToString(s.Sum(nil)) 369 | etag := hex.EncodeToString(md5Sum) 370 | // add to checksum of all parts for verification on upload completion 371 | if _, err := p.md5OfParts.Write(md5Sum); err != nil { 372 | return "", "", "", err 373 | } 374 | return base64.StdEncoding.EncodeToString(md5Sum), shaSum, etag, nil 375 | } 376 | 377 | // returns true unless partSize is large enough 378 | // to achieve maxObjSize with remaining parts 379 | func growPartSize(partIndex int, partSize, putsz int64) bool { 380 | return (maxObjSize-putsz)/(maxNPart-int64(partIndex)) > partSize 381 | } 382 | -------------------------------------------------------------------------------- /s3gof3r_test.go: -------------------------------------------------------------------------------- 1 | package s3gof3r 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "crypto/rand" 7 | "errors" 8 | "flag" 9 | "fmt" 10 | "io" 11 | "io/ioutil" 12 | "log" 13 | "net/http" 14 | "os" 15 | "strings" 16 | "sync" 17 | "syscall" 18 | "testing" 19 | "time" 20 | 21 | "github.com/github/s3gof3r/internal/s3client" 22 | ) 23 | 24 | var b *tB 25 | 26 | func TestMain(m *testing.M) { 27 | flag.Parse() 28 | if testing.Verbose() { 29 | SetLogger(os.Stderr, "test: ", (log.LstdFlags | log.Lshortfile), true) 30 | } 31 | 32 | var err error 33 | b, err = testBucket() 34 | if err != nil { 35 | log.Fatalf("creating test bucket: %v", err) 36 | } 37 | uploadTestFiles() 38 | os.Exit(m.Run()) 39 | } 40 | 41 | func uploadTestFiles() { 42 | var wg sync.WaitGroup 43 | for _, tt := range getTests { 44 | if tt.rSize >= 0 { 45 | wg.Add(1) 46 | go func(path string, rSize int64) { 47 | err := b.putReader(path, &randSrc{Size: int(rSize)}) 48 | if err != nil { 49 | log.Fatalf("creating test file '%s': %v", path, err) 50 | } 51 | wg.Done() 52 | }(tt.path, tt.rSize) 53 | } 54 | } 55 | wg.Wait() 56 | } 57 | 58 | var getTests = []struct { 59 | path string 60 | config *Config 61 | rSize int64 62 | err error 63 | }{ 64 | {"t1.test", nil, 1 * kb, nil}, 65 | {"no-md5", &Config{Scheme: "https", Client: ClientWithTimeout(clientTimeout), Md5Check: false}, 1, nil}, 66 | {"NoKey", nil, -1, &s3client.RespError{StatusCode: 404, Message: "The specified key does not exist."}}, 67 | {"", nil, -1, fmt.Errorf("empty path requested")}, 68 | {"1_mb_test", 69 | &Config{Concurrency: 2, PartSize: 5 * mb, NTry: 2, Md5Check: true, Scheme: "https", Client: ClientWithTimeout(2 * time.Second)}, 70 | 1 * mb, 71 | nil}, 72 | {"b1", nil, 1, nil}, 73 | {"0byte", &Config{Scheme: "https", Client: ClientWithTimeout(clientTimeout), Md5Check: false}, 0, nil}, 74 | } 75 | 76 | func TestGetReader(t *testing.T) { 77 | t.Parallel() 78 | 79 | for _, tt := range getTests { 80 | r, h, err := b.GetReader(tt.path, tt.config) 81 | if err != nil { 82 | errComp(tt.err, err, t, tt) 83 | continue 84 | } 85 | t.Logf("headers for %s: %v\n", tt.path, h) 86 | w := ioutil.Discard 87 | 88 | n, err := io.Copy(w, r) 89 | if err != nil { 90 | t.Error(err) 91 | } 92 | if n != tt.rSize { 93 | t.Errorf("Expected size: %d. Actual: %d", tt.rSize, n) 94 | 95 | } 96 | err = r.Close() 97 | errComp(tt.err, err, t, tt) 98 | 99 | } 100 | } 101 | 102 | func TestPutWriter(t *testing.T) { 103 | t.Parallel() 104 | var putTests = []struct { 105 | path string 106 | data []byte 107 | header http.Header 108 | config *Config 109 | wSize int64 110 | err error 111 | }{ 112 | {"testfile", []byte("test_data"), nil, nil, 9, nil}, 113 | {"", []byte("test_data"), nil, nil, 114 | 9, &s3client.RespError{StatusCode: 400, Message: "A key must be specified"}}, 115 | {"test0byte", []byte(""), nil, nil, 0, nil}, 116 | {"testhg", []byte("foo"), goodHeader(), nil, 3, nil}, 117 | {"testhb", []byte("foo"), badHeader(), nil, 3, 118 | &s3client.RespError{StatusCode: 400, Message: "The encryption method specified is not supported"}}, 119 | {"nomd5", []byte("foo"), goodHeader(), 120 | &Config{Concurrency: 1, PartSize: 5 * mb, NTry: 1, Md5Check: false, Scheme: "http", Client: http.DefaultClient}, 3, nil}, 121 | {"noconc", []byte("foo"), nil, 122 | &Config{Concurrency: 0, PartSize: 5 * mb, NTry: 1, Md5Check: true, Scheme: "https", Client: ClientWithTimeout(5 * time.Second)}, 3, nil}, 123 | {"enc test", []byte("test_data"), nil, nil, 9, nil}, 124 | } 125 | 126 | for _, tt := range putTests { 127 | w, err := b.PutWriter(tt.path, tt.header, tt.config) 128 | if err != nil { 129 | errComp(tt.err, err, t, tt) 130 | continue 131 | } 132 | r := bytes.NewReader(tt.data) 133 | 134 | n, err := io.Copy(w, r) 135 | if err != nil { 136 | t.Error(err) 137 | } 138 | if n != tt.wSize { 139 | t.Errorf("Expected size: %d. Actual: %d", tt.wSize, n) 140 | 141 | } 142 | err = w.Close() 143 | errComp(tt.err, err, t, tt) 144 | } 145 | } 146 | 147 | type multiTest struct { 148 | path string 149 | data io.Reader 150 | header http.Header 151 | config *Config 152 | wSize int64 153 | err error 154 | } 155 | 156 | // Tests of multipart puts and gets 157 | // Since the minimum part size is 5 mb, these take longer to run 158 | // These tests can be skipped by running test with the short flag 159 | func TestMulti(t *testing.T) { 160 | if testing.Short() { 161 | t.Skip("skipping, short mode") 162 | } 163 | 164 | t.Parallel() 165 | var putMultiTests = []multiTest{ 166 | {"1mb_test.test", &randSrc{Size: int(1 * mb)}, goodHeader(), nil, 1 * mb, nil}, 167 | {"21mb_test.test", &randSrc{Size: int(21 * mb)}, goodHeader(), 168 | &Config{Concurrency: 3, PartSize: 5 * mb, NTry: 2, Md5Check: true, Scheme: "https", 169 | Client: ClientWithTimeout(5 * time.Second)}, 21 * mb, nil}, 170 | {"timeout.test1", &randSrc{Size: int(1 * mb)}, goodHeader(), 171 | &Config{Concurrency: 1, PartSize: 5 * mb, NTry: 1, Md5Check: false, Scheme: "https", 172 | Client: ClientWithTimeout(1 * time.Millisecond)}, 1 * mb, 173 | errors.New("timeout")}, 174 | {"timeout.test2", &randSrc{Size: int(1 * mb)}, goodHeader(), 175 | &Config{Concurrency: 1, PartSize: 5 * mb, NTry: 1, Md5Check: true, Scheme: "https", 176 | Client: ClientWithTimeout(1 * time.Millisecond)}, 1 * mb, 177 | errors.New("timeout")}, 178 | {"toosmallpart", &randSrc{Size: int(6 * mb)}, goodHeader(), 179 | &Config{Concurrency: 4, PartSize: 5 * mb, NTry: 3, Md5Check: false, Scheme: "https", 180 | Client: ClientWithTimeout(2 * time.Second)}, 6 * mb, nil}, 181 | } 182 | var wg sync.WaitGroup 183 | for _, tt := range putMultiTests { 184 | w, err := b.PutWriter(tt.path, tt.header, tt.config) 185 | if err != nil { 186 | errComp(tt.err, err, t, tt) 187 | continue 188 | } 189 | wg.Add(1) 190 | 191 | go func(w io.WriteCloser, tt multiTest) { 192 | n, err := io.Copy(w, tt.data) 193 | if err != nil { 194 | t.Error(err) 195 | } 196 | if n != tt.wSize { 197 | t.Errorf("Expected size: %d. Actual: %d", tt.wSize, n) 198 | 199 | } 200 | err = w.Close() 201 | errComp(tt.err, err, t, tt) 202 | r, h, err := b.GetReader(tt.path, tt.config) 203 | if err != nil { 204 | errComp(tt.err, err, t, tt) 205 | //return 206 | } 207 | t.Logf("headers %v\n", h) 208 | gw := ioutil.Discard 209 | 210 | n, err = io.Copy(gw, r) 211 | if err != nil { 212 | t.Error(err) 213 | } 214 | if n != tt.wSize { 215 | t.Errorf("Expected size: %d. Actual: %d", tt.wSize, n) 216 | 217 | } 218 | t.Logf("got %s", tt.path) 219 | err = r.Close() 220 | errComp(tt.err, err, t, tt) 221 | wg.Done() 222 | }(w, tt) 223 | } 224 | wg.Wait() 225 | } 226 | 227 | type tB struct { 228 | *Bucket 229 | } 230 | 231 | func testBucket() (*tB, error) { 232 | k, err := InstanceKeys() 233 | if err != nil { 234 | k, err = EnvKeys() 235 | if err != nil { 236 | return nil, err 237 | } 238 | } 239 | bucket := os.Getenv("TEST_BUCKET") 240 | if bucket == "" { 241 | return nil, errors.New("TEST_BUCKET must be set in environment") 242 | 243 | } 244 | s3 := New("", k) 245 | b := tB{s3.Bucket(bucket)} 246 | 247 | return &b, err 248 | } 249 | 250 | func (b *tB) putReader(path string, r io.Reader) error { 251 | if r == nil { 252 | return nil // special handling for nil case 253 | } 254 | 255 | w, err := b.PutWriter(path, nil, nil) 256 | if err != nil { 257 | return err 258 | } 259 | _, err = io.Copy(w, r) 260 | if err != nil { 261 | return err 262 | } 263 | err = w.Close() 264 | if err != nil { 265 | return err 266 | } 267 | 268 | return nil 269 | } 270 | 271 | func errComp(expect, actual error, t *testing.T, tt interface{}) bool { 272 | if expect == nil && actual == nil { 273 | return true 274 | } 275 | 276 | if expect == nil || actual == nil { 277 | t.Errorf("called with %v\n Expected: %v\n Actual: %v\n", tt, expect, actual) 278 | return false 279 | } 280 | if !strings.Contains(actual.Error(), expect.Error()) { 281 | t.Errorf("called with %v\n Expected: %v\n Actual: %v\n", tt, expect, actual) 282 | return false 283 | } 284 | return true 285 | 286 | } 287 | 288 | func goodHeader() http.Header { 289 | header := make(http.Header) 290 | header.Add("x-amz-server-side-encryption", "AES256") 291 | header.Add("x-amz-meta-foometadata", "testmeta") 292 | return header 293 | } 294 | 295 | func badHeader() http.Header { 296 | header := make(http.Header) 297 | header.Add("x-amz-server-side-encryption", "AES512") 298 | return header 299 | } 300 | 301 | type randSrc struct { 302 | Size int 303 | total int 304 | } 305 | 306 | func (r *randSrc) Read(p []byte) (int, error) { 307 | n, err := rand.Read(p) 308 | r.total = r.total + n 309 | if r.total >= r.Size { 310 | return n - (r.total - r.Size), io.EOF 311 | } 312 | return n, err 313 | } 314 | 315 | func ExampleBucket_PutWriter() error { 316 | k, err := EnvKeys() // get S3 keys from environment 317 | if err != nil { 318 | return err 319 | } 320 | // Open bucket to put file into 321 | s3 := New("", k) 322 | b := s3.Bucket("bucketName") 323 | 324 | // open file to upload 325 | file, err := os.Open("fileName") 326 | if err != nil { 327 | return err 328 | } 329 | 330 | // Open a PutWriter for upload 331 | w, err := b.PutWriter(file.Name(), nil, nil) 332 | if err != nil { 333 | return err 334 | } 335 | if _, err = io.Copy(w, file); err != nil { // Copy into S3 336 | return err 337 | } 338 | if err = w.Close(); err != nil { 339 | return err 340 | } 341 | return nil 342 | } 343 | 344 | func ExampleBucket_GetReader() error { 345 | k, err := EnvKeys() // get S3 keys from environment 346 | if err != nil { 347 | return err 348 | } 349 | 350 | // Open bucket to put file into 351 | s3 := New("", k) 352 | b := s3.Bucket("bucketName") 353 | 354 | r, h, err := b.GetReader("keyName", nil) 355 | if err != nil { 356 | return err 357 | } 358 | // stream to standard output 359 | if _, err = io.Copy(os.Stdout, r); err != nil { 360 | return err 361 | } 362 | err = r.Close() 363 | if err != nil { 364 | return err 365 | } 366 | fmt.Println(h) // print key header data 367 | return nil 368 | } 369 | 370 | func TestDelete(t *testing.T) { 371 | 372 | var deleteTests = []struct { 373 | path string 374 | exist bool 375 | err error 376 | }{ 377 | {"delete1", true, nil}, 378 | {"delete 2", false, nil}, 379 | {"/delete 2", false, nil}, 380 | } 381 | 382 | for _, tt := range deleteTests { 383 | if tt.exist { 384 | err := b.putReader(tt.path, &randSrc{Size: 1}) 385 | 386 | if err != nil { 387 | t.Fatal(err) 388 | } 389 | } 390 | err := b.Delete(tt.path) 391 | t.Log(err) 392 | errComp(tt.err, err, t, tt) 393 | } 394 | } 395 | 396 | func TestDeleteMultiple(t *testing.T) { 397 | 398 | var filesToDelete = []struct { 399 | path string 400 | exist bool 401 | }{ 402 | {"multi-delete1", true}, 403 | {"multi-delete 2", false}, 404 | {"/mutli-delete 2", false}, 405 | } 406 | 407 | keys := make([]string, 0, len(filesToDelete)) 408 | for _, tt := range filesToDelete { 409 | if tt.exist { 410 | err := b.putReader(tt.path, &randSrc{Size: 1}) 411 | 412 | if err != nil { 413 | t.Fatal(err) 414 | } 415 | } 416 | keys = append(keys, tt.path) 417 | } 418 | 419 | res, err := b.DeleteMultiple(false, keys...) 420 | t.Log(res.Errors) 421 | if len(res.Errors) != 0 { 422 | t.Errorf("mutiple delete included errors") 423 | } 424 | // Twice because of the md5 files 425 | if len(res.Deleted) != 2*len(filesToDelete) { 426 | t.Errorf("Wrong number of deleted objects") 427 | } 428 | t.Log(err) 429 | errComp(nil, err, t, keys) 430 | } 431 | 432 | func TestGetVersion(t *testing.T) { 433 | t.Parallel() 434 | 435 | var versionTests = []struct { 436 | path string 437 | err error 438 | }{ 439 | {"key1", nil}, 440 | } 441 | for _, tt := range versionTests { 442 | if err := b.putReader(tt.path, &randSrc{Size: 1}); err != nil { 443 | t.Fatal(err) 444 | } 445 | // get version id 446 | r, h, err := b.GetReader(tt.path, nil) 447 | if err != nil { 448 | t.Fatal(err) 449 | } 450 | r.Close() 451 | v := h.Get("x-amz-version-id") 452 | if v == "" { 453 | t.Logf("versioning not enabled on %s\n", b.Name) 454 | t.SkipNow() 455 | } 456 | // upload again for > 1 version 457 | if err := b.putReader(tt.path, &randSrc{Size: 1}); err != nil { 458 | t.Fatal(err) 459 | } 460 | 461 | // request first uploaded version 462 | t.Logf("version id: %s", v) 463 | p := fmt.Sprintf("%s?versionId=%s", tt.path, v) 464 | r, _, err = b.GetReader(p, nil) 465 | if err != nil { 466 | t.Fatal(err) 467 | } 468 | r.Close() 469 | errComp(tt.err, err, t, tt) 470 | } 471 | } 472 | 473 | func TestPutWriteAfterClose(t *testing.T) { 474 | t.Parallel() 475 | 476 | w, err := b.PutWriter("test", nil, nil) 477 | if err != nil { 478 | t.Fatal(err) 479 | } 480 | err = w.Close() 481 | if err != nil { 482 | t.Fatal(err) 483 | } 484 | 485 | _, err = w.Write(make([]byte, 10)) 486 | if err != syscall.EINVAL { 487 | t.Errorf("expected %v on write after close, got %v", syscall.EINVAL, err) 488 | } 489 | } 490 | 491 | func TestGetReadAfterClose(t *testing.T) { 492 | t.Parallel() 493 | 494 | r, _, err := b.GetReader("test", nil) 495 | if err != nil { 496 | t.Fatal(err) 497 | } 498 | err = r.Close() 499 | if err != nil { 500 | t.Fatal(err) 501 | } 502 | 503 | _, err = r.Read(make([]byte, 10)) 504 | if err != syscall.EINVAL { 505 | t.Errorf("expected %v on read after close, got %v", syscall.EINVAL, err) 506 | } 507 | } 508 | 509 | // Test Close when downloading of parts still in progress 510 | func TestGetCloseBeforeRead(t *testing.T) { 511 | r, _, err := b.GetReader(getTests[4].path, getTests[4].config) 512 | if err != nil { 513 | t.Fatal(err) 514 | } 515 | //terr := fmt.Errorf("read error: 0 bytes read. expected: %d", getTests[4].rSize) 516 | terr := fmt.Errorf("read error: %d bytes read. expected: %d", 0, getTests[4].rSize) 517 | tmr := time.NewTimer(100 * time.Millisecond) 518 | defer tmr.Stop() 519 | closed := make(chan struct{}) 520 | go func() { 521 | err = r.Close() 522 | close(closed) 523 | if err != nil && err.Error() != terr.Error() || err == nil { 524 | t.Errorf("expected error %v on Close, got %v", terr, err) 525 | } 526 | }() 527 | 528 | // fail test if close does not return before timeout 529 | select { 530 | case <-closed: 531 | tmr.Stop() 532 | case <-tmr.C: 533 | t.Fatal("getter close did not return before timeout") 534 | } 535 | } 536 | 537 | // fakePutter implements `s3Putter` but its `UploadPart()` method 538 | // always fails. 539 | type fakePutter struct { 540 | ctx context.Context 541 | uploadCh chan error 542 | completeCh chan error 543 | } 544 | 545 | func (p *fakePutter) StartMultipartUpload(_ http.Header) (string, error) { 546 | return "fakeUploadID", nil 547 | } 548 | 549 | func (p *fakePutter) UploadPart(_ string, _ *s3client.Part) error { 550 | select { 551 | case err, ok := <-p.uploadCh: 552 | if !ok { 553 | return errors.New("upload called too many times") 554 | } 555 | return err 556 | case <-p.ctx.Done(): 557 | return errors.New("upload context expired") 558 | } 559 | } 560 | 561 | func (p *fakePutter) CompleteMultipartUpload(_ string, _ []*s3client.Part) (string, error) { 562 | select { 563 | case err, ok := <-p.completeCh: 564 | if !ok { 565 | return "", errors.New("complete called too many times") 566 | } 567 | return "fakeETag", err 568 | case <-p.ctx.Done(): 569 | return "", errors.New("complete context expired") 570 | } 571 | } 572 | 573 | func (p *fakePutter) AbortMultipartUpload(_ string) error { 574 | return errors.New("AbortMultipartUpload not implemented") 575 | } 576 | 577 | func (p *fakePutter) PutMD5(_ string) error { 578 | return errors.New("PutMD5 not implemented") 579 | } 580 | 581 | func TestPutterUploadError(t *testing.T) { 582 | ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) 583 | defer cancel() 584 | 585 | uploadErr := errors.New("upload error") 586 | client := fakePutter{ 587 | ctx: ctx, 588 | uploadCh: make(chan error, 1), 589 | completeCh: make(chan error, 1), 590 | } 591 | p, err := newPutter(&client, nil, b.conf()) 592 | if err != nil { 593 | t.Errorf("error instantiating putter: %v", err) 594 | } 595 | 596 | client.uploadCh <- uploadErr 597 | close(client.uploadCh) 598 | 599 | _, err = p.Write([]byte("foo")) 600 | // We don't insist that this return an error, but if it does it 601 | // has to be `uploadErr`. 602 | if err != nil && err != uploadErr { 603 | t.Errorf("unexpected error on Write: %v", err) 604 | } 605 | 606 | client.completeCh <- nil 607 | err = p.Close() 608 | if err != uploadErr { 609 | t.Errorf("expected error %v on Close, got %v", uploadErr, err) 610 | } 611 | } 612 | 613 | func TestBulkPutterUploadError(t *testing.T) { 614 | ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) 615 | defer cancel() 616 | 617 | uploadErr := errors.New("upload error") 618 | client := fakePutter{ 619 | ctx: ctx, 620 | uploadCh: make(chan error, 1), 621 | } 622 | p, err := newPutter(&client, nil, b.conf()) 623 | if err != nil { 624 | t.Errorf("error instantiating putter: %v", err) 625 | } 626 | 627 | client.uploadCh <- uploadErr 628 | 629 | data := []byte("longish string to fill the buffer sooner") 630 | for { 631 | // After the asynchronous attempt to write the first part is 632 | // seen to have failed, but before the second attempt is 633 | // allowed to complete, this must return an error: 634 | _, err = p.Write(data) 635 | if err != nil { 636 | if err != uploadErr { 637 | t.Errorf("unexpected error on Write: %v", err) 638 | } 639 | break 640 | } 641 | } 642 | 643 | // After the first error has occurred, we should continue to get 644 | // the same error: 645 | _, err = p.Write(data) 646 | switch err { 647 | case uploadErr: 648 | // OK. 649 | case nil: 650 | t.Errorf("missing error on Write") 651 | default: 652 | t.Errorf("unexpected error on Write: %v", err) 653 | } 654 | 655 | // We should get the same error again from `Close()`, and 656 | // `CompleteMultipartUpload()` should never be called (which it 657 | // can't be, because `client.completeCh` can't be received from): 658 | err = p.Close() 659 | if err != uploadErr { 660 | t.Errorf("expected error %v on Close, got %v", uploadErr, err) 661 | } 662 | } 663 | 664 | func TestBulkPutterSecondUploadError(t *testing.T) { 665 | ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) 666 | defer cancel() 667 | 668 | uploadErr := errors.New("upload error") 669 | client := fakePutter{ 670 | ctx: ctx, 671 | uploadCh: make(chan error, 1), 672 | } 673 | p, err := newPutter(&client, nil, b.conf()) 674 | if err != nil { 675 | t.Errorf("error instantiating putter: %v", err) 676 | } 677 | 678 | go func() { 679 | // Let the first upload succeed: 680 | client.uploadCh <- nil 681 | // and all subsequent ones fail: 682 | for { 683 | select { 684 | case client.uploadCh <- uploadErr: 685 | case <-ctx.Done(): 686 | break 687 | } 688 | } 689 | }() 690 | 691 | data := []byte("longish string to fill the buffer sooner") 692 | for { 693 | _, err = p.Write(data) 694 | if err != nil { 695 | if err != uploadErr { 696 | t.Errorf("unexpected error on Write: %v", err) 697 | } 698 | break 699 | } 700 | } 701 | 702 | // After the first error has occurred, we should continue to get 703 | // the same error: 704 | _, err = p.Write(data) 705 | switch err { 706 | case uploadErr: 707 | // OK. 708 | case nil: 709 | t.Errorf("missing error on Write") 710 | default: 711 | t.Errorf("unexpected error on Write: %v", err) 712 | } 713 | 714 | // We should get the same error again from `Close()`, and 715 | // `CompleteMultipartUpload()` should never be called (which it 716 | // can't be, because `client.completeCh` can't be received from): 717 | err = p.Close() 718 | if err != uploadErr { 719 | t.Errorf("expected error %v on Close, got %v", uploadErr, err) 720 | } 721 | } 722 | 723 | func TestPutterCompleteError(t *testing.T) { 724 | ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) 725 | defer cancel() 726 | 727 | completeErr := errors.New("complete error") 728 | client := fakePutter{ 729 | ctx: ctx, 730 | uploadCh: make(chan error), 731 | completeCh: make(chan error, 1), 732 | } 733 | p, err := newPutter(&client, nil, b.conf()) 734 | if err != nil { 735 | t.Errorf("error instantiating putter: %v", err) 736 | } 737 | 738 | go func() { 739 | // Let all uploads succeed: 740 | for { 741 | select { 742 | case client.uploadCh <- nil: 743 | case <-ctx.Done(): 744 | break 745 | } 746 | } 747 | }() 748 | 749 | _, err = p.Write([]byte("foo")) 750 | if err != nil { 751 | t.Errorf("unexpected error on Write: %v", err) 752 | } 753 | 754 | client.completeCh <- completeErr 755 | err = p.Close() 756 | if err != completeErr { 757 | t.Errorf("expected error %v on Close, got %v", completeErr, err) 758 | } 759 | } 760 | 761 | func TestGetterAfterError(t *testing.T) { 762 | r, _, err := b.GetReader("test", nil) 763 | if err != nil { 764 | t.Fatal(err) 765 | } 766 | g, ok := r.(*getter) 767 | if !ok { 768 | t.Fatal("getter type cast failed") 769 | } 770 | terr := fmt.Errorf("test error") 771 | g.err = terr 772 | _, err = r.Read([]byte("foo")) 773 | if err != terr { 774 | t.Errorf("expected error %v on Read, got %v", terr, err) 775 | } 776 | err = r.Close() 777 | if err != terr { 778 | t.Errorf("expected error %v on Close, got %v", terr, err) 779 | } 780 | } 781 | 782 | var goodRegionsTests = []struct { 783 | domain string 784 | region string 785 | err error 786 | }{ 787 | {domain: "s3.amazonaws.com", region: "us-east-1"}, 788 | {domain: "s3-external-1.amazonaws.com", region: "us-east-1"}, 789 | {domain: "s3-sa-east-1.amazonaws.com", region: "sa-east-1"}, 790 | } 791 | 792 | func TestGoodRegion(t *testing.T) { 793 | for _, tt := range goodRegionsTests { 794 | s3 := &S3{Domain: tt.domain} 795 | region := s3.Region() 796 | if region != tt.region { 797 | t.Errorf("wrong region detected, got '%s', expected '%s'", region, tt.region) 798 | } 799 | } 800 | } 801 | 802 | var badRegionsTests = []struct { 803 | domain string 804 | region string 805 | err error 806 | }{ 807 | {domain: "bad-amazonaws.com", region: "sa-east-1"}, 808 | {domain: "s3-sss.amazonaws#com", region: "sa-east-1"}, 809 | } 810 | 811 | // Given some bad domains as input 812 | // When the Region method is called on the domain 813 | // Then the method should panic because the domain is not recognized 814 | func TestBadRegion(t *testing.T) { 815 | defer func() { 816 | if r := recover(); r == nil { 817 | t.Errorf("The code did not panic") 818 | } 819 | }() 820 | 821 | for _, tt := range badRegionsTests { 822 | s3 := &S3{Domain: tt.domain} 823 | s3.Region() 824 | } 825 | } 826 | 827 | func TestBucketURL(t *testing.T) { 828 | var urlTests = []struct { 829 | bucket string 830 | path string 831 | config *Config 832 | url string 833 | }{ 834 | {"bucket1", "path", DefaultConfig, "https://bucket1.s3.amazonaws.com/path"}, 835 | {"bucket1", "#path", DefaultConfig, `https://bucket1.s3.amazonaws.com/%23path`}, 836 | {"bucket1", "#path ", DefaultConfig, `https://bucket1.s3.amazonaws.com/%23path%20`}, 837 | {"bucket.2", "path", DefaultConfig, "https://s3.amazonaws.com/bucket.2/path"}, 838 | {"bucket.2", "#path", DefaultConfig, `https://s3.amazonaws.com/bucket.2/%23path`}, 839 | {"bucket.2", "#path?versionId=seQK1YwRAy6Ex25YHb_yJHbo94jSDnpu", DefaultConfig, `https://s3.amazonaws.com/bucket.2/%23path%3FversionId=seQK1YwRAy6Ex25YHb_yJHbo94jSDnpu`}, // versionId-specific handling 840 | } 841 | 842 | for _, tt := range urlTests { 843 | s3 := New("", Keys{}) 844 | b := s3.Bucket(tt.bucket) 845 | u, err := b.url(tt.path, tt.config) 846 | if err != nil { 847 | t.Error(err) 848 | } 849 | if u.String() != tt.url { 850 | t.Errorf("got '%s', expected '%s'", u.String(), tt.url) 851 | } 852 | 853 | } 854 | 855 | } 856 | 857 | // reduce parallelism and part size to benchmark 858 | // memory pool reuse 859 | func benchConfig() *Config { 860 | var conf Config 861 | conf = *DefaultConfig 862 | conf.Concurrency = 4 863 | conf.PartSize = 5 * mb 864 | return &conf 865 | } 866 | 867 | func BenchmarkPut(k *testing.B) { 868 | r := &randSrc{Size: int(300 * mb)} 869 | k.ReportAllocs() 870 | for i := 0; i < k.N; i++ { 871 | w, _ := b.PutWriter("bench_test", nil, benchConfig()) 872 | n, err := io.Copy(w, r) 873 | if err != nil { 874 | k.Fatal(err) 875 | } 876 | k.SetBytes(n) 877 | w.Close() 878 | } 879 | } 880 | 881 | func BenchmarkGet(k *testing.B) { 882 | k.ReportAllocs() 883 | for i := 0; i < k.N; i++ { 884 | r, _, _ := b.GetReader("bench_test", benchConfig()) 885 | n, err := io.Copy(ioutil.Discard, r) 886 | if err != nil { 887 | k.Fatal(err) 888 | } 889 | k.SetBytes(n) 890 | r.Close() 891 | } 892 | } 893 | --------------------------------------------------------------------------------