├── .gitignore ├── README.md ├── filecache_suite_test.go ├── .travis.yml ├── LICENSE ├── Gopkg.toml ├── dropbox.go ├── s3_test.go ├── dropbox_test.go ├── s3.go ├── Gopkg.lock ├── filecache.go └── filecache_test.go /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.swo 3 | foo.pdf 4 | junk 5 | /vendor -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Filecache 2 | ========= 3 | 4 | [![](https://travis-ci.org/Nitro/filecache.svg?branch=master)](https://travis-ci.org/Nitro/filecache) 5 | 6 | Implements a local, rudimentary file cache backed by an S3 bucket. 7 | This is useful for a file caching service. 8 | -------------------------------------------------------------------------------- /filecache_suite_test.go: -------------------------------------------------------------------------------- 1 | package filecache 2 | 3 | import ( 4 | . "github.com/onsi/ginkgo" 5 | . "github.com/onsi/gomega" 6 | 7 | "testing" 8 | ) 9 | 10 | func TestFilecache(t *testing.T) { 11 | RegisterFailHandler(Fail) 12 | RunSpecs(t, "Filecache Suite") 13 | } 14 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | go: 4 | - 1.10.x 5 | 6 | sudo: required 7 | 8 | before_install: 9 | - sudo apt-get install -y ca-certificates 10 | - curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | sh -s -- -b $GOPATH/bin v1.10.2 11 | 12 | install: 13 | - go get github.com/golang/dep/cmd/dep && dep ensure 14 | 15 | script: 16 | - golangci-lint run 17 | - go test -v -race --timeout 1m 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 Nitro Software 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /Gopkg.toml: -------------------------------------------------------------------------------- 1 | # Gopkg.toml example 2 | # 3 | # Refer to https://golang.github.io/dep/docs/Gopkg.toml.html 4 | # for detailed Gopkg.toml documentation. 5 | # 6 | # required = ["github.com/user/thing/cmd/thing"] 7 | # ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"] 8 | # 9 | # [[constraint]] 10 | # name = "github.com/user/project" 11 | # version = "1.0.0" 12 | # 13 | # [[constraint]] 14 | # name = "github.com/user/project2" 15 | # branch = "dev" 16 | # source = "github.com/myfork/project2" 17 | # 18 | # [[override]] 19 | # name = "github.com/x/y" 20 | # version = "2.4.0" 21 | # 22 | # [prune] 23 | # non-go = false 24 | # go-tests = true 25 | # unused-packages = true 26 | 27 | 28 | [[constraint]] 29 | name = "github.com/aws/aws-sdk-go" 30 | version = "1.15.46" 31 | 32 | [[constraint]] 33 | name = "github.com/djherbis/times" 34 | version = "1.0.1" 35 | 36 | [[constraint]] 37 | name = "github.com/hashicorp/golang-lru" 38 | version = "0.5.0" 39 | 40 | [[constraint]] 41 | name = "github.com/onsi/ginkgo" 42 | version = "1.6.0" 43 | 44 | [[constraint]] 45 | name = "github.com/onsi/gomega" 46 | version = "1.4.2" 47 | 48 | [[constraint]] 49 | name = "github.com/sirupsen/logrus" 50 | version = "1.1.0" 51 | 52 | [prune] 53 | go-tests = true 54 | unused-packages = true 55 | -------------------------------------------------------------------------------- /dropbox.go: -------------------------------------------------------------------------------- 1 | package filecache 2 | 3 | import ( 4 | "context" 5 | "encoding/base64" 6 | "fmt" 7 | "io" 8 | "net/http" 9 | "strings" 10 | "time" 11 | 12 | log "github.com/sirupsen/logrus" 13 | ) 14 | 15 | // DropboxDownload will download a file from the specified Dropbox location into localFile 16 | func DropboxDownload(dr *DownloadRecord, localFile io.Writer, downloadTimeout time.Duration) error { 17 | // In the case of Dropbox files, the path will contain the base64-encoded file URL after dropbox/ 18 | fileURL, err := base64.RawURLEncoding.DecodeString(strings.TrimPrefix(dr.Path, "dropbox/")) 19 | 20 | if err != nil { 21 | return fmt.Errorf("could not base64 decode file URL: %s", err) 22 | } 23 | 24 | startTime := time.Now() 25 | ctx, cancelFunc := context.WithTimeout(context.Background(), downloadTimeout) 26 | defer cancelFunc() 27 | 28 | req, err := http.NewRequest(http.MethodGet, string(fileURL), nil) 29 | if err != nil { 30 | return fmt.Errorf("could not create HTTP request for URL %q: %s", fileURL, err) 31 | } 32 | 33 | resp, err := http.DefaultClient.Do(req.WithContext(ctx)) 34 | if err != nil { 35 | return fmt.Errorf("failed to download file %q: %s", fileURL, err) 36 | } 37 | defer resp.Body.Close() 38 | 39 | numBytes, err := io.Copy(localFile, resp.Body) 40 | if err != nil { 41 | return fmt.Errorf("failed to write local file: %s", err) 42 | } 43 | 44 | log.Debugf("Took %.2fms to download %d bytes from Dropbox for %s", time.Since(startTime).Seconds()*1000, numBytes, dr.Path) 45 | 46 | return nil 47 | } 48 | -------------------------------------------------------------------------------- /s3_test.go: -------------------------------------------------------------------------------- 1 | package filecache_test 2 | 3 | import ( 4 | "context" 5 | "os" 6 | "time" 7 | 8 | . "github.com/Nitro/filecache" 9 | 10 | . "github.com/onsi/ginkgo" 11 | . "github.com/onsi/gomega" 12 | ) 13 | 14 | var _ = Describe("S3", func() { 15 | var ( 16 | manager *S3RegionManagedDownloader 17 | 18 | localFile *os.File 19 | ) 20 | 21 | BeforeEach(func() { 22 | // Reset between runs 23 | manager = NewS3RegionManagedDownloader("us-west-2") 24 | 25 | var err error 26 | localFile, err = os.Create("foo.pdf") 27 | Expect(err).To(BeNil()) 28 | }) 29 | 30 | AfterEach(func() { localFile.Close() }) 31 | 32 | Describe("NewS3RegionManagedDownloader()", func() { 33 | It("returns a properly configured instance", func() { 34 | Expect(manager).NotTo(BeNil()) 35 | Expect(manager.DefaultRegion).To(Equal("us-west-2")) 36 | Expect(manager.DownloaderCache).NotTo(BeNil()) 37 | }) 38 | }) 39 | 40 | // This test will actually contact S3... not in love with that 41 | // but don't want to mock it out, either. Could be mocked out: 42 | // https://docs.aws.amazon.com/sdk-for-go/api/service/s3/s3iface/#S3API 43 | Describe("GetDownloader()", func() { 44 | It("returns a newly created downloader", func() { 45 | dLoader, err := manager.GetDownloader(context.Background(), "nitro-public") 46 | 47 | Expect(err).To(BeNil()) 48 | Expect(dLoader).NotTo(BeNil()) 49 | Expect(dLoader.S3).NotTo(BeNil()) 50 | }) 51 | 52 | It("returns a cached downloader", func() { 53 | dLoader1, err := manager.GetDownloader(context.Background(), "nitro-public") 54 | Expect(err).To(BeNil()) 55 | 56 | dLoader2, err := manager.GetDownloader(context.Background(), "nitro-public") 57 | Expect(err).To(BeNil()) 58 | 59 | Expect(dLoader1).To(Equal(dLoader2)) 60 | }) 61 | 62 | It("returns an error when trying to fetch a file from a non-existent bucket", func() { 63 | err := manager.Download(&DownloadRecord{Path: "non-existent-bucket/foo.pdf"}, localFile, 10*time.Second) 64 | Expect(err.Error()).To(ContainSubstring("Unable to get downloader for non-existent-bucket: Region for non-existent-bucket not found")) 65 | }) 66 | 67 | It("returns an error when trying to fetch a file which doesn't exist", func() { 68 | err := manager.Download(&DownloadRecord{Path: "nitro-junk/non-existent-foo.pdf"}, localFile, 10*time.Second) 69 | Expect(err.Error()).To(ContainSubstring("Could not fetch from S3")) 70 | }) 71 | 72 | It("returns an error when getting a 0 length file", func() { 73 | err := manager.Download(&DownloadRecord{Path: "nitro-junk/foo.pdf"}, localFile, 10*time.Second) 74 | Expect(err.Error()).NotTo(BeNil()) 75 | }) 76 | }) 77 | }) 78 | -------------------------------------------------------------------------------- /dropbox_test.go: -------------------------------------------------------------------------------- 1 | package filecache_test 2 | 3 | import ( 4 | "encoding/base64" 5 | "errors" 6 | "fmt" 7 | "net/http" 8 | "net/http/httptest" 9 | "time" 10 | 11 | . "github.com/Nitro/filecache" 12 | 13 | . "github.com/onsi/ginkgo" 14 | . "github.com/onsi/gomega" 15 | ) 16 | 17 | type dummyWriter struct { 18 | receivedData string 19 | writeError error 20 | } 21 | 22 | func (dw *dummyWriter) Write(p []byte) (n int, err error) { 23 | if dw.writeError != nil { 24 | return 0, dw.writeError 25 | } 26 | 27 | dw.receivedData = string(p) 28 | return len(p), nil 29 | } 30 | 31 | var _ = Describe("DropboxDownload", func() { 32 | It("downloads a file successfully", func() { 33 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 34 | _, err := w.Write([]byte("dummy_content")) 35 | Expect(err).To(BeNil()) 36 | })) 37 | defer ts.Close() 38 | url := fmt.Sprintf( 39 | "dropbox/%s", 40 | base64.RawURLEncoding.EncodeToString([]byte(ts.URL)), 41 | ) 42 | 43 | dr, err := NewDownloadRecord(url, nil) 44 | Expect(err).To(BeNil()) 45 | 46 | writer := &dummyWriter{} 47 | err = DropboxDownload(dr, writer, 100*time.Millisecond) 48 | Expect(err).ShouldNot(HaveOccurred()) 49 | Expect(writer.receivedData).To(ContainSubstring("dummy_content")) 50 | }) 51 | 52 | It("fails to decode an invalid base64-encoded Dropbox URL", func() { 53 | dr, err := NewDownloadRecord("dropbox/foo.bar", nil) 54 | Expect(err).To(BeNil()) 55 | 56 | err = DropboxDownload(dr, &dummyWriter{}, 100*time.Millisecond) 57 | Expect(err).Should(HaveOccurred()) 58 | }) 59 | 60 | It("fails to decode an URL encoded with an invalid base64-encoding", func() { 61 | url := fmt.Sprintf( 62 | "dropbox/%s", 63 | base64.StdEncoding.EncodeToString([]byte("http://dropbox.com/foo.bar")), 64 | ) 65 | dr, err := NewDownloadRecord(url, nil) 66 | Expect(err).To(BeNil()) 67 | 68 | err = DropboxDownload(dr, &dummyWriter{}, 100*time.Millisecond) 69 | Expect(err).Should(HaveOccurred()) 70 | }) 71 | 72 | It("fails to create a HTTP request for an invalid URL", func() { 73 | url := fmt.Sprintf( 74 | "dropbox/%s", 75 | base64.RawURLEncoding.EncodeToString([]byte("ht$tp://invalid_url")), 76 | ) 77 | 78 | dr, err := NewDownloadRecord(url, nil) 79 | Expect(err).To(BeNil()) 80 | 81 | err = DropboxDownload(dr, &dummyWriter{}, 100*time.Millisecond) 82 | Expect(err).Should(HaveOccurred()) 83 | }) 84 | 85 | It("returns an error when trying to download from an unreachable domain", func() { 86 | url := fmt.Sprintf( 87 | "dropbox/%s", 88 | base64.RawURLEncoding.EncodeToString([]byte("http://some_dummy_domain.com")), 89 | ) 90 | 91 | dr, err := NewDownloadRecord(url, nil) 92 | Expect(err).To(BeNil()) 93 | 94 | err = DropboxDownload(dr, &dummyWriter{}, 100*time.Millisecond) 95 | Expect(err).Should(HaveOccurred()) 96 | }) 97 | 98 | It("returns an error when streaming the file to disk fails", func() { 99 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 100 | _, err := w.Write([]byte("dummy_content")) 101 | Expect(err).To(BeNil()) 102 | })) 103 | defer ts.Close() 104 | url := fmt.Sprintf( 105 | "dropbox/%s", 106 | base64.RawURLEncoding.EncodeToString([]byte(ts.URL)), 107 | ) 108 | 109 | dr, err := NewDownloadRecord(url, nil) 110 | Expect(err).To(BeNil()) 111 | 112 | writer := &dummyWriter{writeError: errors.New("dummy_error")} 113 | err = DropboxDownload(dr, writer, 100*time.Millisecond) 114 | Expect(err).Should(HaveOccurred()) 115 | Expect(err.Error()).To(ContainSubstring("dummy_error")) 116 | }) 117 | 118 | It("fails to download when timing out", func() { 119 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 120 | _, err := w.Write([]byte("dummy_content")) 121 | Expect(err).To(BeNil()) 122 | })) 123 | defer ts.Close() 124 | url := fmt.Sprintf( 125 | "dropbox/%s", 126 | base64.RawURLEncoding.EncodeToString([]byte(ts.URL)), 127 | ) 128 | 129 | dr, err := NewDownloadRecord(url, nil) 130 | Expect(err).To(BeNil()) 131 | 132 | writer := &dummyWriter{} 133 | err = DropboxDownload(dr, writer, 0*time.Millisecond) 134 | Expect(err).Should(HaveOccurred()) 135 | Expect(err.Error()).To(ContainSubstring("context deadline exceeded")) 136 | }) 137 | }) 138 | -------------------------------------------------------------------------------- /s3.go: -------------------------------------------------------------------------------- 1 | package filecache 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "os" 8 | "strings" 9 | "sync" 10 | "time" 11 | 12 | "github.com/aws/aws-sdk-go/aws" 13 | "github.com/aws/aws-sdk-go/aws/awserr" 14 | "github.com/aws/aws-sdk-go/aws/request" 15 | "github.com/aws/aws-sdk-go/aws/session" 16 | "github.com/aws/aws-sdk-go/service/s3" 17 | "github.com/aws/aws-sdk-go/service/s3/s3manager" 18 | log "github.com/sirupsen/logrus" 19 | ) 20 | 21 | // Manages a cache of s3manager.Downloader s that have been configured 22 | // for their correct region. 23 | type S3RegionManagedDownloader struct { 24 | sync.RWMutex 25 | DefaultRegion string 26 | DownloaderCache map[string]*s3manager.Downloader // Map buckets to regions 27 | } 28 | 29 | // NewS3RegionManagedDownloader returns a configured instance where the default 30 | // bucket region will be as passed. This means bucket lookups from the cache 31 | // will prefer that region when hinting to S3 which region they believe a bucket 32 | // lives in. 33 | func NewS3RegionManagedDownloader(defaultRegion string) *S3RegionManagedDownloader { 34 | return &S3RegionManagedDownloader{ 35 | DefaultRegion: defaultRegion, 36 | DownloaderCache: make(map[string]*s3manager.Downloader), 37 | } 38 | } 39 | 40 | // GetDownloader looks up a bucket in the cache and returns a configured 41 | // s3manager.Downloader for it or provisions a new one and returns that. 42 | // NOTE! This is never flushed and so should not be used with an unlimited 43 | // number of buckets! The first few requests will incur an additional 44 | // penalty of roundtrips to Amazon to look up the region fo the requested 45 | // S3 bucket. 46 | func (m *S3RegionManagedDownloader) GetDownloader(ctx context.Context, bucket string) (*s3manager.Downloader, error) { 47 | 48 | m.RLock() 49 | // Look it up in the cache first 50 | if dLoader, ok := m.DownloaderCache[bucket]; ok { 51 | m.RUnlock() 52 | return dLoader, nil 53 | } 54 | m.RUnlock() 55 | 56 | // We need an arbitrary, region-less session 57 | sess := session.Must(session.NewSession()) 58 | 59 | region, err := s3manager.GetBucketRegion(ctx, sess, bucket, m.DefaultRegion) 60 | if err != nil { 61 | if aerr, ok := err.(awserr.Error); ok && aerr.Code() == "NotFound" { 62 | return nil, fmt.Errorf("Region for %s not found", bucket) 63 | } 64 | return nil, err 65 | } 66 | log.Debugf("Bucket '%s' is in region: %s", bucket, region) 67 | 68 | sess, err = session.NewSession(&aws.Config{Region: aws.String(region)}) 69 | if err != nil { 70 | return nil, fmt.Errorf("Could not create S3 session for region '%s': %s", region, err) 71 | } 72 | 73 | // Configure and then cache the downloader 74 | dLoader := s3manager.NewDownloader(sess) 75 | m.Lock() 76 | m.DownloaderCache[bucket] = dLoader 77 | m.Unlock() 78 | 79 | return dLoader, nil 80 | } 81 | 82 | // Download will download a file from the specified S3 bucket into localFile 83 | func (m *S3RegionManagedDownloader) Download(dr *DownloadRecord, localFile *os.File, downloadTimeout time.Duration) error { 84 | fname := dr.Path 85 | 86 | // The S3 bucket is the first part of the path, everything else is filename 87 | parts := strings.Split(fname, "/") 88 | if len(parts) < 2 { 89 | return fmt.Errorf("Not enough path to fetch a file! Expected /") 90 | } 91 | bucket := parts[0] 92 | fname = strings.Join(parts[1:], "/") 93 | 94 | ctx, cancelFunc := context.WithTimeout(context.Background(), downloadTimeout) 95 | defer cancelFunc() 96 | 97 | log.Debugf("Getting downloader for %s", bucket) 98 | downloader, err := m.GetDownloader(ctx, bucket) 99 | if err != nil { 100 | return fmt.Errorf("Unable to get downloader for %s: %s", bucket, err) 101 | } 102 | 103 | var requestID, hostID string 104 | requestInspectorFunc := func(r *request.Request) { 105 | r.Handlers.Complete.PushBack(func(req *request.Request) { 106 | requestID = req.RequestID 107 | if req.HTTPResponse != nil && req.HTTPResponse.Header != nil { 108 | hostID = req.HTTPResponse.Header.Get("X-Amz-Id-2") 109 | } 110 | }) 111 | } 112 | 113 | startTime := time.Now() 114 | numBytes, err := downloader.DownloadWithContext( 115 | ctx, 116 | localFile, 117 | &s3.GetObjectInput{ 118 | Bucket: aws.String(bucket), 119 | Key: aws.String(fname), 120 | }, 121 | s3manager.WithDownloaderRequestOptions( 122 | requestInspectorFunc, 123 | ), 124 | ) 125 | if err != nil { 126 | errMessage := err.Error() 127 | if s3Err, ok := err.(s3.RequestFailure); ok { 128 | errMessage = fmt.Sprintf( 129 | "Request ID %q on host %q failed: %s", s3Err.RequestID(), s3Err.HostID(), errMessage, 130 | ) 131 | } 132 | return fmt.Errorf("Could not fetch from S3: %s", errMessage) 133 | } 134 | 135 | log.Infof( 136 | "Took %.2fms to download s3://%s/%s (%d bytes) with request ID %q and host ID %q", 137 | time.Since(startTime).Seconds()*1000, bucket, fname, numBytes, requestID, hostID, 138 | ) 139 | 140 | if numBytes < 1 { 141 | return errors.New("0 length file received from S3") 142 | } 143 | 144 | return nil 145 | } 146 | -------------------------------------------------------------------------------- /Gopkg.lock: -------------------------------------------------------------------------------- 1 | # This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'. 2 | 3 | 4 | [[projects]] 5 | digest = "1:924818e2217379333e0ba0f0010d296c5c7cf21168e50d82aee5cd6f5c3603b1" 6 | name = "github.com/aws/aws-sdk-go" 7 | packages = [ 8 | "aws", 9 | "aws/awserr", 10 | "aws/awsutil", 11 | "aws/client", 12 | "aws/client/metadata", 13 | "aws/corehandlers", 14 | "aws/credentials", 15 | "aws/credentials/ec2rolecreds", 16 | "aws/credentials/endpointcreds", 17 | "aws/credentials/stscreds", 18 | "aws/csm", 19 | "aws/defaults", 20 | "aws/ec2metadata", 21 | "aws/endpoints", 22 | "aws/request", 23 | "aws/session", 24 | "aws/signer/v4", 25 | "internal/sdkio", 26 | "internal/sdkrand", 27 | "internal/sdkuri", 28 | "internal/shareddefaults", 29 | "private/protocol", 30 | "private/protocol/eventstream", 31 | "private/protocol/eventstream/eventstreamapi", 32 | "private/protocol/query", 33 | "private/protocol/query/queryutil", 34 | "private/protocol/rest", 35 | "private/protocol/restxml", 36 | "private/protocol/xml/xmlutil", 37 | "service/s3", 38 | "service/s3/s3iface", 39 | "service/s3/s3manager", 40 | "service/sts", 41 | ] 42 | pruneopts = "UT" 43 | revision = "53e36ebb6b2c26d228ba9563d06caf306227f110" 44 | version = "v1.15.46" 45 | 46 | [[projects]] 47 | digest = "1:aca0109d4521779dc155378582ae8ecf1e6cb59b97e385eb17e1f363e2b799c0" 48 | name = "github.com/djherbis/times" 49 | packages = ["."] 50 | pruneopts = "UT" 51 | revision = "95292e44976d1217cf3611dc7c8d9466877d3ed5" 52 | version = "v1.0.1" 53 | 54 | [[projects]] 55 | digest = "1:b98e7574fc27ec166fb31195ec72c3bd0bffd73926d3612eb4c929bc5236f75b" 56 | name = "github.com/go-ini/ini" 57 | packages = ["."] 58 | pruneopts = "UT" 59 | revision = "7b294651033cd7d9e7f0d9ffa1b75ed1e198e737" 60 | version = "v1.38.3" 61 | 62 | [[projects]] 63 | digest = "1:8ec8d88c248041a6df5f6574b87bc00e7e0b493881dad2e7ef47b11dc69093b5" 64 | name = "github.com/hashicorp/golang-lru" 65 | packages = [ 66 | ".", 67 | "simplelru", 68 | ] 69 | pruneopts = "UT" 70 | revision = "20f1fb78b0740ba8c3cb143a61e86ba5c8669768" 71 | version = "v0.5.0" 72 | 73 | [[projects]] 74 | branch = "master" 75 | digest = "1:59392ed8afb901aab4287d4894df8191722e34f3957716f4350c8c133ce99046" 76 | name = "github.com/hpcloud/tail" 77 | packages = [ 78 | ".", 79 | "ratelimiter", 80 | "util", 81 | "watch", 82 | "winfile", 83 | ] 84 | pruneopts = "UT" 85 | revision = "a1dbeea552b7c8df4b542c66073e393de198a800" 86 | 87 | [[projects]] 88 | digest = "1:e22af8c7518e1eab6f2eab2b7d7558927f816262586cd6ed9f349c97a6c285c4" 89 | name = "github.com/jmespath/go-jmespath" 90 | packages = ["."] 91 | pruneopts = "UT" 92 | revision = "0b12d6b5" 93 | 94 | [[projects]] 95 | branch = "master" 96 | digest = "1:f44d34fda864bed6d6c71514cd40b2ee097e6e67f745d5d014113e1faa5af8b7" 97 | name = "github.com/konsorten/go-windows-terminal-sequences" 98 | packages = ["."] 99 | pruneopts = "UT" 100 | revision = "b729f2633dfe35f4d1d8a32385f6685610ce1cb5" 101 | 102 | [[projects]] 103 | digest = "1:42e29deef12327a69123b9cb2cb45fee4af5c12c2a23c6e477338279a052703f" 104 | name = "github.com/onsi/ginkgo" 105 | packages = [ 106 | ".", 107 | "config", 108 | "internal/codelocation", 109 | "internal/containernode", 110 | "internal/failer", 111 | "internal/leafnodes", 112 | "internal/remote", 113 | "internal/spec", 114 | "internal/spec_iterator", 115 | "internal/specrunner", 116 | "internal/suite", 117 | "internal/testingtproxy", 118 | "internal/writer", 119 | "reporters", 120 | "reporters/stenographer", 121 | "reporters/stenographer/support/go-colorable", 122 | "reporters/stenographer/support/go-isatty", 123 | "types", 124 | ] 125 | pruneopts = "UT" 126 | revision = "3774a09d95489ccaa16032e0770d08ea77ba6184" 127 | version = "v1.6.0" 128 | 129 | [[projects]] 130 | digest = "1:ab54eea8d482272009e9e4af07d4d9b5236c27b4d8c54a3f2c99d163be883eca" 131 | name = "github.com/onsi/gomega" 132 | packages = [ 133 | ".", 134 | "format", 135 | "internal/assertion", 136 | "internal/asyncassertion", 137 | "internal/oraclematcher", 138 | "internal/testingtsupport", 139 | "matchers", 140 | "matchers/support/goraph/bipartitegraph", 141 | "matchers/support/goraph/edge", 142 | "matchers/support/goraph/node", 143 | "matchers/support/goraph/util", 144 | "types", 145 | ] 146 | pruneopts = "UT" 147 | revision = "7615b9433f86a8bdf29709bf288bc4fd0636a369" 148 | version = "v1.4.2" 149 | 150 | [[projects]] 151 | digest = "1:dc2d85c13ac22c22a1f3170a41a8e1b897fa05134aaf533f16df44f66a25b4a1" 152 | name = "github.com/sirupsen/logrus" 153 | packages = ["."] 154 | pruneopts = "UT" 155 | revision = "a67f783a3814b8729bd2dac5780b5f78f8dbd64d" 156 | version = "v1.1.0" 157 | 158 | [[projects]] 159 | branch = "master" 160 | digest = "1:3f3a05ae0b95893d90b9b3b5afdb79a9b3d96e4e36e099d841ae602e4aca0da8" 161 | name = "golang.org/x/crypto" 162 | packages = ["ssh/terminal"] 163 | pruneopts = "UT" 164 | revision = "e3636079e1a4c1f337f212cc5cd2aca108f6c900" 165 | 166 | [[projects]] 167 | branch = "master" 168 | digest = "1:2394b7d142aba21cd69bdb719c1588dae1aebf115d8f4c033ec0338f30684309" 169 | name = "golang.org/x/net" 170 | packages = [ 171 | "html", 172 | "html/atom", 173 | "html/charset", 174 | ] 175 | pruneopts = "UT" 176 | revision = "4dfa2610cdf3b287375bbba5b8f2a14d3b01d8de" 177 | 178 | [[projects]] 179 | branch = "master" 180 | digest = "1:6f82ed211591ecb407897ca46ff6149d618223088aecad72675804f106033629" 181 | name = "golang.org/x/sys" 182 | packages = [ 183 | "unix", 184 | "windows", 185 | ] 186 | pruneopts = "UT" 187 | revision = "e4b3c5e9061176387e7cea65e4dc5853801f3fb7" 188 | 189 | [[projects]] 190 | digest = "1:aa4d6967a3237f8367b6bf91503964a77183ecf696f1273e8ad3551bb4412b5f" 191 | name = "golang.org/x/text" 192 | packages = [ 193 | "encoding", 194 | "encoding/charmap", 195 | "encoding/htmlindex", 196 | "encoding/internal", 197 | "encoding/internal/identifier", 198 | "encoding/japanese", 199 | "encoding/korean", 200 | "encoding/simplifiedchinese", 201 | "encoding/traditionalchinese", 202 | "encoding/unicode", 203 | "internal/gen", 204 | "internal/tag", 205 | "internal/utf8internal", 206 | "language", 207 | "runes", 208 | "transform", 209 | "unicode/cldr", 210 | ] 211 | pruneopts = "UT" 212 | revision = "f21a4dfb5e38f5895301dc265a8def02365cc3d0" 213 | version = "v0.3.0" 214 | 215 | [[projects]] 216 | digest = "1:abeb38ade3f32a92943e5be54f55ed6d6e3b6602761d74b4aab4c9dd45c18abd" 217 | name = "gopkg.in/fsnotify/fsnotify.v1" 218 | packages = ["."] 219 | pruneopts = "UT" 220 | revision = "c2828203cd70a50dcccfb2761f8b1f8ceef9a8e9" 221 | version = "v1.4.7" 222 | 223 | [[projects]] 224 | digest = "1:3c839a777de0e6da035c9de900b60cbec463b0a89351192c1ea083eaf9e0fce0" 225 | name = "gopkg.in/tomb.v1" 226 | packages = ["."] 227 | pruneopts = "UT" 228 | revision = "c131134a1947e9afd9cecfe11f4c6dff0732ae58" 229 | 230 | [[projects]] 231 | digest = "1:342378ac4dcb378a5448dd723f0784ae519383532f5e70ade24132c4c8693202" 232 | name = "gopkg.in/yaml.v2" 233 | packages = ["."] 234 | pruneopts = "UT" 235 | revision = "5420a8b6744d3b0345ab293f6fcba19c978f1183" 236 | version = "v2.2.1" 237 | 238 | [solve-meta] 239 | analyzer-name = "dep" 240 | analyzer-version = 1 241 | input-imports = [ 242 | "github.com/aws/aws-sdk-go/aws", 243 | "github.com/aws/aws-sdk-go/aws/awserr", 244 | "github.com/aws/aws-sdk-go/aws/request", 245 | "github.com/aws/aws-sdk-go/aws/session", 246 | "github.com/aws/aws-sdk-go/service/s3", 247 | "github.com/aws/aws-sdk-go/service/s3/s3manager", 248 | "github.com/djherbis/times", 249 | "github.com/hashicorp/golang-lru", 250 | "github.com/onsi/ginkgo", 251 | "github.com/onsi/gomega", 252 | "github.com/sirupsen/logrus", 253 | ] 254 | solver-name = "gps-cdcl" 255 | solver-version = 1 256 | -------------------------------------------------------------------------------- /filecache.go: -------------------------------------------------------------------------------- 1 | package filecache 2 | 3 | import ( 4 | "crypto/md5" 5 | "errors" 6 | "fmt" 7 | "hash/fnv" 8 | "os" 9 | "path" 10 | "path/filepath" 11 | "strings" 12 | "sync" 13 | "time" 14 | 15 | "github.com/djherbis/times" 16 | "github.com/hashicorp/golang-lru" 17 | log "github.com/sirupsen/logrus" 18 | ) 19 | 20 | const ( 21 | DownloadMangerS3 = iota 22 | DownloadMangerDropbox 23 | ) 24 | 25 | var ( 26 | errInvalidURLPath = errors.New("invalid URL path") 27 | // HashableArgs allows us to support various authentication headers in the future 28 | HashableArgs = map[string]struct{}{} 29 | ) 30 | 31 | type DownloadManager int 32 | 33 | // DownloadRecord contains information about a file which will be downloaded 34 | type DownloadRecord struct { 35 | Manager DownloadManager 36 | Path string 37 | Args map[string]string 38 | HashedArgs string 39 | } 40 | 41 | type RecordDownloaderFunc = func(dr *DownloadRecord, localFile *os.File) error 42 | 43 | // FileCache is a wrapper for hashicorp/golang-lru 44 | type FileCache struct { 45 | BaseDir string 46 | Cache *lru.Cache 47 | Waiting map[string]chan struct{} 48 | WaitLock sync.Mutex 49 | DownloadFunc func(dr *DownloadRecord, localPath string) error 50 | OnEvict func(key interface{}, value interface{}) 51 | DefaultExtension string 52 | DownloadTimeout time.Duration 53 | downloaders map[DownloadManager]RecordDownloaderFunc 54 | } 55 | 56 | type option func(*FileCache) error 57 | 58 | func setSize(size int) option { 59 | return func(c *FileCache) error { 60 | cache, err := lru.NewWithEvict(size, c.onEvictDelete) 61 | if err != nil { 62 | return fmt.Errorf("invalid size: %s", err) 63 | } 64 | 65 | c.Cache = cache 66 | 67 | return nil 68 | } 69 | } 70 | 71 | func setBaseDir(baseDir string) option { 72 | return func(c *FileCache) error { 73 | if baseDir == "" { 74 | return errors.New("empty baseDir") 75 | } 76 | 77 | c.BaseDir = baseDir 78 | 79 | return nil 80 | } 81 | } 82 | 83 | // DownloadTimeout sets the file download timeout 84 | func DownloadTimeout(timeout time.Duration) option { 85 | return func(c *FileCache) error { 86 | c.DownloadTimeout = timeout 87 | 88 | return nil 89 | } 90 | } 91 | 92 | // DefaultExtension sets the default extension which will be appended to 93 | // cached files in the local directory 94 | func DefaultExtension(ext string) option { 95 | return func(c *FileCache) error { 96 | c.DefaultExtension = ext 97 | 98 | return nil 99 | } 100 | } 101 | 102 | // S3Downloader allows the DownloadFunc to pull files from S3 buckets. 103 | // Bucket names are passed at the first part of the path in files requested 104 | // from the cache. Bubbles up errors from the Hashicrorp LRU library 105 | // when something goes wrong there. 106 | func S3Downloader(awsRegion string) option { 107 | return func(c *FileCache) error { 108 | c.downloaders[DownloadMangerS3] = func(dr *DownloadRecord, localFile *os.File) error { 109 | return NewS3RegionManagedDownloader(awsRegion).Download( 110 | dr, localFile, c.DownloadTimeout, 111 | ) 112 | } 113 | 114 | return nil 115 | } 116 | } 117 | 118 | // DropboxDownloader allows the DownloadFunc to pull files from Dropbox 119 | // accounts. Bubbles up errors from the Hashicrorp LRU library when 120 | // something goes wrong there. 121 | func DropboxDownloader() option { 122 | return func(c *FileCache) error { 123 | c.downloaders[DownloadMangerDropbox] = func(dr *DownloadRecord, localFile *os.File) error { 124 | return DropboxDownload(dr, localFile, c.DownloadTimeout) 125 | } 126 | 127 | return nil 128 | } 129 | } 130 | 131 | // download is a generic wrapper which performs common actions before delegating to the 132 | // specific downloader implementations 133 | func (c *FileCache) download(dr *DownloadRecord, localPath string) error { 134 | directory := filepath.Dir(localPath) 135 | if directory != "." { 136 | // Make sure the path to the local file exists 137 | log.Debugf("MkdirAll() on %s", filepath.Dir(localPath)) 138 | err := os.MkdirAll(filepath.Dir(localPath), 0755) 139 | if err != nil { 140 | return fmt.Errorf("could not create local directory: %s", err) 141 | } 142 | } 143 | 144 | localFile, err := os.Create(localPath) 145 | if err != nil { 146 | return fmt.Errorf("could not create local file: %s", err) 147 | } 148 | defer localFile.Close() 149 | 150 | if downloader, ok := c.downloaders[dr.Manager]; ok { 151 | return downloader(dr, localFile) 152 | } 153 | 154 | return fmt.Errorf("no dowloader found for %q", dr.Path) 155 | } 156 | 157 | // New returns a properly configured cache. Bubbles up errors from the Hashicrorp 158 | // LRU library when something goes wrong there. The configured cache will have a 159 | // noop DownloadFunc, which should be replaced if you want to actually get files 160 | // from somewhere. Or, look at NewS3Cache() which is backed by Amazon S3. 161 | func New(size int, baseDir string, opts ...option) (*FileCache, error) { 162 | fCache := &FileCache{ 163 | Waiting: make(map[string]chan struct{}), 164 | downloaders: make(map[DownloadManager]RecordDownloaderFunc), 165 | } 166 | fCache.DownloadFunc = fCache.download 167 | 168 | if err := setSize(size)(fCache); err != nil { 169 | return nil, err 170 | } 171 | 172 | if err := setBaseDir(baseDir)(fCache); err != nil { 173 | return nil, err 174 | } 175 | 176 | for _, opt := range opts { 177 | err := opt(fCache) 178 | if err != nil { 179 | return nil, fmt.Errorf("invalid option: %s", err) 180 | } 181 | } 182 | 183 | return fCache, nil 184 | } 185 | 186 | // FetchNewerThan will look in the cache for a file, make sure it's newer than 187 | // timestamp, and if so return true. Otherwise it will possibly download the file 188 | // and only return false if it's unable to do so. 189 | func (c *FileCache) FetchNewerThan(dr *DownloadRecord, timestamp time.Time) bool { 190 | if !c.Contains(dr) { 191 | return c.Fetch(dr) 192 | } 193 | 194 | storagePath := c.GetFileName(dr) 195 | stat, err := times.Stat(storagePath) 196 | if err != nil { 197 | return c.Fetch(dr) 198 | } 199 | 200 | // We use mtime because the file could have been overwritten with new data 201 | // Compare the timestamp, and need to check the cache again... could have changed 202 | if c.Contains(dr) && timestamp.Before(stat.ModTime()) { 203 | return true 204 | } 205 | 206 | return c.Reload(dr) 207 | } 208 | 209 | // Fetch will return true if we have the file, or will go download the file and 210 | // return true if we can. It will return false only if it's unable to fetch the 211 | // file from the backing store (S3). 212 | func (c *FileCache) Fetch(dr *DownloadRecord) bool { 213 | if c.Contains(dr) { 214 | return true 215 | } 216 | 217 | err := c.MaybeDownload(dr) 218 | if err != nil { 219 | log.Errorf("Tried to fetch file %s, got '%s'", dr.Path, err) 220 | return false 221 | } 222 | 223 | return true 224 | } 225 | 226 | // Reload will remove a file from the cache and attempt to reload from the 227 | // backing store, calling MaybeDownload(). 228 | func (c *FileCache) Reload(dr *DownloadRecord) bool { 229 | c.Cache.Remove(dr.GetUniqueName()) 230 | 231 | err := c.MaybeDownload(dr) 232 | if err != nil { 233 | log.Errorf("Tried to fetch file %s, got '%s'", dr.Path, err) 234 | return false 235 | } 236 | 237 | return true 238 | } 239 | 240 | // Contains looks to see if we have an entry in the cache for this file. 241 | func (c *FileCache) Contains(dr *DownloadRecord) bool { 242 | return c.Cache.Contains(dr.GetUniqueName()) 243 | } 244 | 245 | // MaybeDownload might go out to the backing store (S3) and get the file if the 246 | // file isn't already being downloaded in another routine. In both cases it will 247 | // block until the download is completed either by this goroutine or another one. 248 | func (c *FileCache) MaybeDownload(dr *DownloadRecord) error { 249 | // See if someone is already downloading 250 | c.WaitLock.Lock() 251 | if waitChan, ok := c.Waiting[dr.GetUniqueName()]; ok { 252 | c.WaitLock.Unlock() 253 | 254 | log.Debugf("Awaiting download of %s", dr.Path) 255 | <-waitChan 256 | return nil 257 | } 258 | 259 | // The file could have arrived while we were getting here 260 | if c.Contains(dr) { 261 | c.WaitLock.Unlock() 262 | return nil 263 | } 264 | 265 | // Still don't have it, let's fetch it. 266 | // This tells other goroutines that we're fetching, and 267 | // lets us signal completion. 268 | log.Debugf("Making channel for %s", dr.Path) 269 | c.Waiting[dr.GetUniqueName()] = make(chan struct{}) 270 | c.WaitLock.Unlock() 271 | 272 | // Ensure we don't leave the channel open when leaving this function 273 | defer func() { 274 | c.WaitLock.Lock() 275 | log.Debugf("Deleting channel for %s", dr.Path) 276 | close(c.Waiting[dr.GetUniqueName()]) // Notify anyone waiting on us 277 | delete(c.Waiting, dr.GetUniqueName()) // Remove it from the waiting map 278 | c.WaitLock.Unlock() 279 | }() 280 | 281 | storagePath := c.GetFileName(dr) 282 | err := c.DownloadFunc(dr, storagePath) 283 | if err != nil { 284 | return err 285 | } 286 | 287 | c.Cache.Add(dr.GetUniqueName(), storagePath) 288 | 289 | return nil 290 | } 291 | 292 | // onEvictDelete is a callback that is triggered when the LRU cache expires an 293 | // entry. 294 | func (c *FileCache) onEvictDelete(key interface{}, value interface{}) { 295 | filename := key.(string) 296 | storagePath := value.(string) 297 | 298 | if c.OnEvict != nil { 299 | c.OnEvict(key, value) 300 | } 301 | 302 | log.Debugf("Got eviction notice for '%s', removing", key) 303 | 304 | err := os.Remove(storagePath) 305 | if err != nil { 306 | log.Errorf("Unable to evict '%s' at local path '%s': %s", filename, storagePath, err) 307 | return 308 | } 309 | } 310 | 311 | // Purge clears all the files from the cache (via the onEvict callback for each key). 312 | func (c *FileCache) Purge() { 313 | c.Cache.Purge() 314 | } 315 | 316 | // PurgeAsync clears all the files from the cache and takes an optional channel 317 | // to close when the purge has completed. 318 | func (c *FileCache) PurgeAsync(doneChan chan struct{}) { 319 | go func() { 320 | c.Purge() 321 | if doneChan != nil { 322 | close(doneChan) 323 | } 324 | }() 325 | } 326 | 327 | // GetFileName returns the full storage path and file name for a file, if it were 328 | // in the cache. This does _not_ check to see if the file is actually _in_ the 329 | // cache. This builds a cache structure of up to 256 directories, each beginning 330 | // with the first 2 letters of the FNV32 hash of the filename. This is then joined 331 | // to the base dir and MD5 hashed filename to form the cache path for each file. 332 | // It preserves the file extension (if present) 333 | // 334 | // e.g. /base_dir/2b/b0804ec967f48520697662a204f5fe72 335 | // 336 | func (c *FileCache) GetFileName(dr *DownloadRecord) string { 337 | hashedFilename := md5.Sum([]byte(dr.Path)) 338 | fnvHasher := fnv.New32() 339 | // The current implementation of fnv.New32().Write never returns a non-nil error 340 | _, err := fnvHasher.Write([]byte(dr.Path)) 341 | if err != nil { 342 | log.Errorf("Failed to compute the fnv hash: %s", err) 343 | } 344 | hashedDir := fnvHasher.Sum(nil) 345 | 346 | // If we don't find an original file extension, we'll default to this one 347 | extension := c.DefaultExtension 348 | 349 | // Look in the last 5 characters for a . and extension 350 | lastDot := strings.LastIndexByte(dr.Path, '.') 351 | if lastDot > len(dr.Path)-6 { 352 | extension = dr.Path[lastDot:] 353 | } 354 | 355 | var fileName string 356 | if len(dr.Args) != 0 { 357 | // in order to avoid file cache collision on the same filename, if we 358 | // have existing HTTP headers into the dr.Args append their 359 | // hashed value between the hashedFilename and extension with _ prefix 360 | fileName = fmt.Sprintf("%x_%s%s", hashedFilename, dr.HashedArgs, extension) 361 | } else { 362 | fileName = fmt.Sprintf("%x%s", hashedFilename, extension) 363 | } 364 | 365 | dir := fmt.Sprintf("%x", hashedDir[:1]) 366 | return filepath.Join(c.BaseDir, dir, filepath.FromSlash(path.Clean("/"+fileName))) 367 | } 368 | 369 | // getHashedArgs computes the MD5 sum of the arguments existing in a DownloadRecord 370 | // matching HashableArgs array and return the hashed value as a hex-encoded string 371 | func getHashedArgs(args map[string]string) string { 372 | if len(args) == 0 { 373 | return "" 374 | } 375 | 376 | var builder strings.Builder 377 | for hashableArg := range HashableArgs { 378 | if arg, ok := args[hashableArg]; ok { 379 | _, err := builder.WriteString(arg) 380 | if err != nil { 381 | continue 382 | } 383 | } 384 | } 385 | 386 | if builder.Len() == 0 { 387 | return "" 388 | } 389 | 390 | hashedArgs := md5.Sum([]byte(builder.String())) 391 | 392 | return fmt.Sprintf("%x", string(hashedArgs[:])) 393 | } 394 | 395 | // bucketToDownloadManager matches the given bucket to a suitable download manager 396 | // TODO: Implement this in a more robust / generic way 397 | func bucketToDownloadManager(bucket string) DownloadManager { 398 | switch bucket { 399 | case "dropbox": 400 | return DownloadMangerDropbox 401 | default: 402 | return DownloadMangerS3 403 | } 404 | } 405 | 406 | // NewDownloadRecord converts the incoming URL path into a download record containing a cached 407 | // filename (this is the filename on the backing store, not the cached filename locally) 408 | // together with the args needed for authentication 409 | func NewDownloadRecord(url string, args map[string]string) (*DownloadRecord, error) { 410 | pathParts := strings.Split(strings.TrimPrefix(url, "/documents/"), "/") 411 | 412 | // We need at least a bucket and filename 413 | if len(pathParts) < 2 { 414 | return nil, errInvalidURLPath 415 | } 416 | 417 | path := strings.Join(pathParts, "/") 418 | 419 | if path == "" || path == "/" { 420 | return nil, errInvalidURLPath 421 | } 422 | 423 | // Make sure all arg names are lower case and contain only the ones we recognise 424 | normalisedArgs := make(map[string]string, len(args)) 425 | for arg, value := range args { 426 | normalisedArg := strings.ToLower(arg) 427 | if _, ok := HashableArgs[normalisedArg]; !ok { 428 | continue 429 | } 430 | normalisedArgs[normalisedArg] = value 431 | } 432 | 433 | return &DownloadRecord{ 434 | Manager: bucketToDownloadManager(pathParts[0]), 435 | Path: path, 436 | Args: normalisedArgs, 437 | HashedArgs: getHashedArgs(normalisedArgs), 438 | }, nil 439 | } 440 | 441 | // GetUniqueName returns a *HOPEFULLY* unique name for the download record 442 | func (dr *DownloadRecord) GetUniqueName() string { 443 | if len(dr.Args) > 0 { 444 | return fmt.Sprintf("%s_%s", dr.Path, dr.HashedArgs) 445 | } 446 | 447 | return dr.Path 448 | } 449 | -------------------------------------------------------------------------------- /filecache_test.go: -------------------------------------------------------------------------------- 1 | package filecache 2 | 3 | import ( 4 | "crypto/md5" 5 | "errors" 6 | "fmt" 7 | "io/ioutil" 8 | "os" 9 | "path/filepath" 10 | "strings" 11 | "sync" 12 | "time" 13 | 14 | . "github.com/onsi/ginkgo" 15 | . "github.com/onsi/gomega" 16 | ) 17 | 18 | var _ = Describe("Filecache", func() { 19 | var ( 20 | cache *FileCache 21 | err error 22 | 23 | didDownload bool 24 | downloadShouldSleep bool 25 | downloadShouldError bool 26 | downloadCount int 27 | countLock sync.Mutex 28 | cacheFile string 29 | s3FilePath = "/documents/test-bucket/foo.bar" 30 | dropboxFilePath = "/documents/dropbox/foo.bar" 31 | dropboxAccessToken = strings.ToLower("DropboxAccessToken") 32 | ) 33 | 34 | mockDownloader := func(dr *DownloadRecord, localPath string) error { 35 | if downloadShouldError { 36 | return errors.New("Oh no! Tragedy!") 37 | } 38 | if downloadShouldSleep { 39 | time.Sleep(10 * time.Millisecond) 40 | } 41 | countLock.Lock() 42 | downloadCount += 1 43 | countLock.Unlock() 44 | didDownload = true 45 | return nil 46 | } 47 | 48 | // Set the dummy dropboxAccessToken in the global HashableArgs map 49 | HashableArgs[dropboxAccessToken] = struct{}{} 50 | 51 | BeforeEach(func() { 52 | cache, err = New(10, ".", DownloadTimeout(1*time.Millisecond), S3Downloader("gondor-north-1")) 53 | Expect(err).To(BeNil()) 54 | 55 | // Reset between runs 56 | didDownload = false 57 | downloadShouldError = false 58 | downloadShouldSleep = false 59 | }) 60 | 61 | Describe("New()", func() { 62 | BeforeEach(func() { 63 | cache, err = New(10, ".") 64 | Expect(err).To(BeNil()) 65 | }) 66 | It("returns a properly configured instance", func() { 67 | Expect(cache.Waiting).NotTo(BeNil()) 68 | Expect(cache.Cache).NotTo(BeNil()) 69 | Expect(cache.Cache.Len()).To(Equal(0)) 70 | Expect(cache.BaseDir).To(Equal(".")) 71 | }) 72 | 73 | It("fails to download stuff", func() { 74 | Expect(cache.DownloadFunc(&DownloadRecord{Path: "junk"}, "junk")).Should(Not(Succeed())) 75 | }) 76 | }) 77 | 78 | Describe("New() with S3Downloader and DropboxDownloader", func() { 79 | It("returns a properly configured instance", func() { 80 | cache, err = New(10, ".", S3Downloader("gondor-north-1"), DropboxDownloader()) 81 | Expect(err).To(BeNil()) 82 | Expect(cache.downloaders[DownloadMangerS3]).To(Not(BeNil())) 83 | Expect(cache.downloaders[DownloadMangerDropbox]).To(Not(BeNil())) 84 | }) 85 | }) 86 | 87 | Describe("Contains()", func() { 88 | It("identifies keys that are not present", func() { 89 | Expect(cache.Contains(&DownloadRecord{Path: "gandalf"})).To(BeFalse()) 90 | }) 91 | 92 | It("identifies keys that are present", func() { 93 | cache.Cache.Add("gandalf", true) 94 | Expect(cache.Contains(&DownloadRecord{Path: "gandalf"})).To(BeTrue()) 95 | }) 96 | }) 97 | 98 | Describe("MaybeDownload()", func() { 99 | BeforeEach(func() { 100 | cache, err = New(10, ".", S3Downloader("gondor-north-1"), DownloadTimeout(1*time.Millisecond)) 101 | Expect(err).ShouldNot(HaveOccurred()) 102 | cache.DownloadFunc = mockDownloader 103 | 104 | downloadCount = 0 105 | }) 106 | 107 | It("downloads a file that's not in the cache", func() { 108 | err = cache.MaybeDownload(&DownloadRecord{Path: "bilbo"}) 109 | 110 | Expect(err).To(BeNil()) 111 | Expect(didDownload).To(BeTrue()) 112 | Expect(cache.Contains(&DownloadRecord{Path: "bilbo"})).To(BeTrue()) 113 | }) 114 | 115 | It("returns an error when the backing downloader failed", func() { 116 | downloadShouldError = true 117 | 118 | err = cache.MaybeDownload(&DownloadRecord{Path: "bilbo"}) 119 | Expect(err).To(HaveOccurred()) 120 | }) 121 | 122 | It("does not leave garbage in 'Waiting'", func() { 123 | err = cache.MaybeDownload(&DownloadRecord{Path: "bilbo"}) 124 | Expect(err).ShouldNot(HaveOccurred()) 125 | 126 | _, ok := cache.Waiting["bilbo"] 127 | Expect(ok).To(BeFalse()) 128 | }) 129 | 130 | It("adds entries to the cache after downloading", func() { 131 | Expect(cache.Contains(&DownloadRecord{Path: "bilbo"})).NotTo(BeTrue()) 132 | 133 | err = cache.MaybeDownload(&DownloadRecord{Path: "bilbo"}) 134 | Expect(err).ShouldNot(HaveOccurred()) 135 | 136 | Expect(cache.Contains(&DownloadRecord{Path: "bilbo"})).To(BeTrue()) 137 | }) 138 | 139 | It("doesn't duplicate a download that started already", func() { 140 | // If the download doesn't take any time then we end up 141 | // falling back to the test case scenario "re-download on 142 | // a data race" below. 143 | downloadShouldSleep = true 144 | 145 | var wg sync.WaitGroup 146 | for i := 0; i < 10; i++ { 147 | wg.Add(1) 148 | go func() { 149 | err := cache.MaybeDownload(&DownloadRecord{Path: "bilbo"}) 150 | Expect(err).ShouldNot(HaveOccurred()) 151 | 152 | wg.Done() 153 | }() 154 | } 155 | wg.Wait() 156 | 157 | Expect(didDownload).To(BeTrue()) 158 | Expect(downloadCount).To(Equal(1)) 159 | Expect(err).NotTo(HaveOccurred()) 160 | }) 161 | 162 | It("doesn't re-download on a data race", func() { 163 | var wg sync.WaitGroup 164 | for i := 0; i < 10; i++ { 165 | wg.Add(1) 166 | go func() { 167 | err := cache.MaybeDownload(&DownloadRecord{Path: "bilbo"}) 168 | Expect(err).ShouldNot(HaveOccurred()) 169 | 170 | wg.Done() 171 | }() 172 | } 173 | wg.Wait() 174 | 175 | Expect(didDownload).To(BeTrue()) 176 | Expect(downloadCount).To(Equal(1)) 177 | Expect(err).NotTo(HaveOccurred()) 178 | }) 179 | }) 180 | 181 | Describe("Fetch()", func() { 182 | BeforeEach(func() { 183 | cache, err = New(10, ".", S3Downloader("gondor-north-1"), DownloadTimeout(1*time.Millisecond)) 184 | cache.DownloadFunc = mockDownloader 185 | didDownload = false 186 | }) 187 | 188 | It("doesn't try to download files we already have", func() { 189 | cache.Cache.Add("aragorn", true) 190 | 191 | Expect(cache.Fetch(&DownloadRecord{Path: "aragorn"})).To(BeTrue()) 192 | Expect(didDownload).To(BeFalse()) 193 | }) 194 | 195 | It("downloads the file when we don't have it", func() { 196 | Expect(cache.Fetch(&DownloadRecord{Path: "aragorn"})).To(BeTrue()) 197 | Expect(didDownload).To(BeTrue()) 198 | }) 199 | 200 | It("downloads a new file for records with the same path but different args", func() { 201 | args := map[string]string{ 202 | dropboxAccessToken: "KnockKnock", 203 | } 204 | 205 | fooRec, _ := NewDownloadRecord(s3FilePath, args) 206 | Expect(cache.Fetch(fooRec)).To(BeTrue()) 207 | Expect(didDownload).To(BeTrue()) 208 | 209 | // It should be in the cache now 210 | didDownload = false 211 | Expect(cache.Fetch(fooRec)).To(BeTrue()) 212 | Expect(didDownload).To(BeFalse()) 213 | 214 | // Using different args should create a new cache entry 215 | didDownload = false 216 | args[dropboxAccessToken] = "ComeIn" 217 | fooRec, _ = NewDownloadRecord(dropboxFilePath, args) 218 | Expect(cache.Fetch(fooRec)).To(BeTrue()) 219 | Expect(didDownload).To(BeTrue()) 220 | }) 221 | }) 222 | 223 | Describe("FetchNewerThan()", func() { 224 | BeforeEach(func() { 225 | cache, err = New(10, os.TempDir(), S3Downloader("gondor-north-1"), DownloadTimeout(1*time.Millisecond)) 226 | cache.DownloadFunc = mockDownloader 227 | didDownload = false 228 | 229 | // Manually write the file to the cache 230 | cacheFile = filepath.Join(os.TempDir(), cache.GetFileName(&DownloadRecord{Path: "aragorn"})) 231 | err = os.MkdirAll(filepath.Dir(cacheFile), 0755) 232 | Expect(err).ShouldNot(HaveOccurred()) 233 | err = ioutil.WriteFile(cacheFile, []byte(`some bytes`), 0644) 234 | Expect(err).ShouldNot(HaveOccurred()) 235 | }) 236 | 237 | AfterEach(func() { 238 | os.RemoveAll(cacheFile) 239 | }) 240 | 241 | It("doesn't try to download files we already have if they are new enough", func() { 242 | cache.Cache.Add("aragorn", cache.GetFileName(&DownloadRecord{Path: "aragorn"})) 243 | err = os.MkdirAll(filepath.Dir(cache.GetFileName(&DownloadRecord{Path: "aragorn"})), 0755) 244 | Expect(err).ShouldNot(HaveOccurred()) 245 | err = ioutil.WriteFile(cache.GetFileName(&DownloadRecord{Path: "aragorn"}), []byte("aragorn"), 0644) 246 | Expect(err).ShouldNot(HaveOccurred()) 247 | 248 | Expect(cache.FetchNewerThan(&DownloadRecord{Path: "aragorn"}, time.Now().Add(-10*time.Minute))).To(BeTrue()) 249 | Expect(didDownload).To(BeFalse()) 250 | }) 251 | 252 | It("downloads the file when it's too old", func() { 253 | cache.Cache.Add("aragorn", cache.GetFileName(&DownloadRecord{Path: "aragorn"})) 254 | Expect(cache.FetchNewerThan(&DownloadRecord{Path: "aragorn"}, time.Now().Add(10*time.Minute))).To(BeTrue()) 255 | Expect(didDownload).To(BeTrue()) 256 | }) 257 | }) 258 | 259 | Describe("Reload()", func() { 260 | BeforeEach(func() { 261 | cache, err = New(10, os.TempDir(), S3Downloader("gondor-north-1"), DownloadTimeout(1*time.Millisecond)) 262 | cache.DownloadFunc = mockDownloader 263 | f, _ := os.OpenFile(cache.GetFileName(&DownloadRecord{Path: "aragorn"}), os.O_CREATE, 0644) 264 | f.Close() 265 | didDownload = false 266 | }) 267 | 268 | It("downloads the file even when we have it", func() { 269 | cache.Cache.Add("aragorn", cache.GetFileName(&DownloadRecord{Path: "aragorn"})) 270 | Expect(cache.Reload(&DownloadRecord{Path: "aragorn"})).To(BeTrue()) 271 | Expect(didDownload).To(BeTrue()) 272 | }) 273 | }) 274 | 275 | Describe("onEvictDelete()", func() { 276 | BeforeEach(func() { 277 | cache, _ = New(10, ".", S3Downloader("gondor-north-1"), DownloadTimeout(1*time.Millisecond)) 278 | }) 279 | 280 | It("calls the downstream eviction callback if it's configured", func() { 281 | var didRun bool 282 | 283 | cache.Cache.Add("test-entry", "cache-tmp") 284 | 285 | // We add a file here to the filesystem so we can delete it on purge 286 | file, err := os.Create("cache-tmp") 287 | Expect(err).To(BeNil()) 288 | err = file.Close() 289 | Expect(err).To(BeNil()) 290 | 291 | cache.OnEvict = func(key interface{}, value interface{}) { 292 | didRun = true 293 | } 294 | 295 | cache.Cache.Purge() 296 | 297 | Expect(didRun).To(BeTrue()) 298 | }) 299 | }) 300 | 301 | Describe("GetFileName()", func() { 302 | BeforeEach(func() { 303 | cache, _ = New(10, ".", S3Downloader("gondor-north-1"), DownloadTimeout(1*time.Millisecond)) 304 | }) 305 | 306 | It("fetches the expected file name for S3 downloads with nil args", func() { 307 | dr, _ := NewDownloadRecord(s3FilePath, nil) 308 | fname := cache.GetFileName(dr) 309 | 310 | Expect(fname).To(Equal("4f/a197d51bc70c732281b46e122ff7af17.bar")) 311 | }) 312 | 313 | It("fetches the expected file name for S3 downloads with non-nil args", func() { 314 | args := map[string]string{ 315 | "DummyHeader": "SomeValue", 316 | } 317 | dr, _ := NewDownloadRecord(s3FilePath, args) 318 | fname := cache.GetFileName(dr) 319 | 320 | Expect(fname).To(Equal("4f/a197d51bc70c732281b46e122ff7af17.bar")) 321 | }) 322 | 323 | It("fetches the expected file name for Dropbox downloads", func() { 324 | args := map[string]string{ 325 | dropboxAccessToken: "KnockKnock", 326 | "DummyHeader": "SomeValue", 327 | } 328 | dr, _ := NewDownloadRecord(dropboxFilePath, args) 329 | fname := cache.GetFileName(dr) 330 | 331 | Expect(fname).To(Equal("8b/5e92c8291b661710e0d1d25db4053f0d_1ff55f50db16da0ad21b8d68ce5aa8cb.bar")) 332 | }) 333 | 334 | It("appends a default extension when there is not one on the original file", func() { 335 | cache.DefaultExtension = ".foo" 336 | fname := cache.GetFileName(&DownloadRecord{Path: "missing-an-extension"}) 337 | 338 | Expect(fname).To(HaveSuffix(".foo")) 339 | }) 340 | 341 | It("doesn't append the default extension when the original has one", func() { 342 | cache.DefaultExtension = ".foo" 343 | fname := cache.GetFileName(&DownloadRecord{Path: "has-an-extension.asdf"}) 344 | 345 | Expect(fname).To(HaveSuffix(".asdf")) 346 | }) 347 | 348 | It("prepends a directory to the file path with its name being the first byte of the FNV32 hash of the file name", func() { 349 | fname1 := cache.GetFileName(&DownloadRecord{Path: "james_joyce.pdf"}) 350 | fname2 := cache.GetFileName(&DownloadRecord{Path: "oscar_wilde.pdf"}) 351 | 352 | dir1 := filepath.Dir(fname1) 353 | dir2 := filepath.Dir(fname2) 354 | 355 | Expect(dir1).To(Equal("d3")) 356 | Expect(dir2).To(Equal("dc")) 357 | }) 358 | 359 | Context("With DowloadRecord with existing Args", func() { 360 | It("should include the hashed arguments and extension with _ prefix", func() { 361 | cache, _ = New(10, "mordor-south-1", DropboxDownloader(), DownloadTimeout(1*time.Millisecond)) 362 | args := map[string]string{ 363 | "Location": "Mordor", 364 | "Character": "Gollum", 365 | } 366 | fname := cache.GetFileName(&DownloadRecord{Path: "golum-arrived.pub", Args: args}) 367 | 368 | Expect(fname).To(HavePrefix("mordor-south-1")) 369 | Expect(len(strings.Split(fname, "_"))).To(Equal(2)) 370 | Expect(fname).To(ContainSubstring("_")) 371 | Expect(fname).To(HaveSuffix(".pub")) 372 | 373 | }) 374 | 375 | It("should not included hashed arguments and _ when Args is nil", func() { 376 | cache, _ = New(10, "mordor-south-1", DropboxDownloader(), DownloadTimeout(1*time.Millisecond)) 377 | fname := cache.GetFileName(&DownloadRecord{Path: "golum-arrived.pub", Args: nil}) 378 | Expect(fname).To(HavePrefix("mordor-south-1")) 379 | Expect(fname).NotTo(ContainSubstring("_")) 380 | Expect(fname).To(HaveSuffix(".pub")) 381 | }) 382 | }) 383 | }) 384 | 385 | Describe("NewDownloadRecord()", func() { 386 | dr, err := NewDownloadRecord(s3FilePath, nil) 387 | 388 | It("should not return an error", func() { 389 | Expect(err).NotTo(HaveOccurred()) 390 | }) 391 | 392 | It("strips leading '/documents'", func() { 393 | Expect(dr.Path).To(Not(ContainSubstring("/documents"))) 394 | }) 395 | 396 | // TODO: Revisit this in the future! 397 | It("doesn't strip the bucket name from the path", func() { 398 | Expect(dr.Path).To(ContainSubstring("test-bucket/")) 399 | }) 400 | 401 | It("doesn't return a leading slash", func() { 402 | Expect(dr.Path).To(Not(HavePrefix("/"))) 403 | }) 404 | 405 | It("returns an error if the filename doesn't have enough components", func() { 406 | dr, err = NewDownloadRecord("/documents/foo-file.pdf", nil) 407 | Expect(err).Should(HaveOccurred()) 408 | }) 409 | 410 | It("uses the dropbox downloader for documents with bucket = 'dropbox'", func() { 411 | dr, err = NewDownloadRecord(dropboxFilePath, nil) 412 | Expect(err).Should(Succeed()) 413 | Expect(dr.Manager).Should(BeEquivalentTo(DownloadMangerDropbox)) 414 | }) 415 | 416 | It("HashedArgs is empty if no HashableArgs args are passed in", func() { 417 | Expect(dr.HashedArgs).To(BeEmpty()) 418 | }) 419 | }) 420 | 421 | Describe("HashedArgs", func() { 422 | It("should hash only the HashableArgs", func() { 423 | args := map[string]string{ 424 | "DropboxAccessToken": "Frodo", 425 | "FoobarAccessToken": "Bilbo", 426 | } 427 | mockRecord, _ := NewDownloadRecord(dropboxFilePath, args) 428 | sum := md5.Sum([]byte(args["DropboxAccessToken"])) 429 | want := fmt.Sprintf("%x", sum[:]) 430 | 431 | Expect(mockRecord.HashedArgs).To(Equal(want)) 432 | }) 433 | 434 | It("should ignore header name casing", func() { 435 | args := map[string]string{ 436 | "Dropboxaccesstoken": "Frodo", 437 | } 438 | mockRecord, _ := NewDownloadRecord(dropboxFilePath, args) 439 | sum := md5.Sum([]byte(args["Dropboxaccesstoken"])) 440 | want := fmt.Sprintf("%x", sum[:]) 441 | 442 | Expect(mockRecord.HashedArgs).To(Equal(want)) 443 | }) 444 | }) 445 | }) 446 | --------------------------------------------------------------------------------