├── .gitignore ├── test.sh ├── go.test.sh ├── google ├── storeutils │ ├── get_test.go │ ├── utils.go │ ├── const.go │ ├── get.go │ ├── utils_test.go │ └── transfer.go ├── README.md ├── google_test.go ├── apistore.go ├── client.go └── store.go ├── file_helper_test.go ├── csbufio ├── reader_test.go ├── writer_test.go ├── reader.go └── writer.go ├── registry.go ├── registry_test.go ├── TODO.md ├── LICENSE ├── cachecleaner.go ├── doc.go ├── azure ├── example │ └── main.go ├── store_test.go ├── README.md └── store.go ├── updatepkgs.sh ├── query.go ├── store_test.go ├── sftp └── store_test.go ├── file_helper.go ├── awss3 ├── store_test.go └── store.go ├── localfs ├── emptydir_test.go ├── store_test.go └── store.go ├── iterator.go ├── go.mod ├── README.md ├── store.go └── go.sum /.gitignore: -------------------------------------------------------------------------------- 1 | google_jwt.json 2 | .env 3 | 4 | sniff* 5 | vendor/ 6 | coverage.* 7 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | go get -t -v ./... 4 | 5 | ./go.test.sh 6 | 7 | bash <(curl -s https://codecov.io/bash) 8 | -------------------------------------------------------------------------------- /go.test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | echo "" > coverage.txt 5 | 6 | for d in $(go list ./... | grep -v vendor | sort -r); do 7 | go test -timeout=5m -race -coverprofile=profile.out -covermode=atomic $d 8 | if [ -f profile.out ]; then 9 | cat profile.out >> coverage.txt 10 | rm profile.out 11 | fi 12 | done -------------------------------------------------------------------------------- /google/storeutils/get_test.go: -------------------------------------------------------------------------------- 1 | package storeutils 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestGetObject(t *testing.T) { 8 | if testGetFile == "" { 9 | t.Skip("TESTFILE EnvVar must be set to run test") 10 | } 11 | gsc := Setup(t) 12 | 13 | buff, err := GetObject(gsc, testBucket, testGetFile) 14 | if err != nil { 15 | t.Errorf("Error reading file %s: %v", testGetFile, err) 16 | } 17 | str := buff.String() 18 | if len(str) == 0 { 19 | t.Errorf("No bytes read from GCS") 20 | } 21 | t.Logf("%s", str) 22 | } 23 | -------------------------------------------------------------------------------- /google/storeutils/utils.go: -------------------------------------------------------------------------------- 1 | package storeutils 2 | 3 | import ( 4 | "github.com/lytics/cloudstorage" 5 | "golang.org/x/net/context" 6 | ) 7 | 8 | // GetAndOpen is a convenience method that combines Store.Get() and Object.Open() into 9 | // a single call. 10 | func GetAndOpen(s cloudstorage.Store, o string, level cloudstorage.AccessLevel) (cloudstorage.Object, error) { 11 | obj, err := s.Get(context.Background(), o) 12 | if err != nil { 13 | return nil, err 14 | } 15 | 16 | _, err = obj.Open(level) 17 | if err != nil { 18 | return nil, err 19 | } 20 | return obj, nil 21 | } 22 | -------------------------------------------------------------------------------- /file_helper_test.go: -------------------------------------------------------------------------------- 1 | package cloudstorage 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/require" 7 | ) 8 | 9 | func TestETAG(t *testing.T) { 10 | require.Equal(t, "hello", CleanETag("hello")) 11 | require.Equal(t, "hello", CleanETag(`"hello"`)) 12 | require.Equal(t, "hello", CleanETag(`\"hello\"`)) 13 | require.Equal(t, "hello", CleanETag("\"hello\"")) 14 | } 15 | func TestContentType(t *testing.T) { 16 | require.Equal(t, "text/csv; charset=utf-8", ContentType("data.csv")) 17 | require.Equal(t, "application/json", ContentType("data.json")) 18 | require.Equal(t, "application/octet-stream", ContentType("data.unknown")) 19 | } 20 | -------------------------------------------------------------------------------- /csbufio/reader_test.go: -------------------------------------------------------------------------------- 1 | package csbufio 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/acomagu/bufpipe" 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | func TestReaderContextDone(t *testing.T) { 12 | t.Parallel() 13 | 14 | ctx, cancel := context.WithCancel(context.Background()) 15 | cancel() 16 | 17 | pr, pw := bufpipe.New([]byte("some-data")) 18 | pw.Close() 19 | rc := NewReader(ctx, pr) 20 | 21 | var p []byte 22 | n, err := rc.Read(p) 23 | require.ErrorIs(t, err, context.Canceled) 24 | require.Equal(t, 0, n) 25 | require.Len(t, p, 0) 26 | 27 | err = rc.Close() 28 | require.ErrorIs(t, err, context.Canceled) 29 | } 30 | -------------------------------------------------------------------------------- /registry.go: -------------------------------------------------------------------------------- 1 | package cloudstorage 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | ) 7 | 8 | var ( 9 | // global registry lock 10 | registryMu sync.RWMutex 11 | // store provider registry 12 | storeProviders = make(map[string]StoreProvider) 13 | ) 14 | 15 | // StoreProvider a provider function for creating New Stores 16 | type StoreProvider func(*Config) (Store, error) 17 | 18 | // Register adds a store type provider. 19 | func Register(storeType string, provider StoreProvider) { 20 | registryMu.Lock() 21 | defer registryMu.Unlock() 22 | if _, ok := storeProviders[storeType]; ok { 23 | panic(fmt.Sprintf("Cannot provide duplicate store %q", storeType)) 24 | } 25 | storeProviders[storeType] = provider 26 | } 27 | -------------------------------------------------------------------------------- /registry_test.go: -------------------------------------------------------------------------------- 1 | package cloudstorage_test 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/lytics/cloudstorage" 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | func TestRegistry(t *testing.T) { 12 | cloudstorage.Register("teststore", fakeProvider) 13 | paniced := didPanic(func() { 14 | cloudstorage.Register("teststore", fakeProvider) 15 | }) 16 | require.True(t, paniced) 17 | } 18 | func didPanic(f func()) (dp bool) { 19 | defer func() { 20 | if r := recover(); r != nil { 21 | dp = true 22 | } 23 | }() 24 | f() 25 | return dp 26 | } 27 | 28 | func fakeProvider(conf *cloudstorage.Config) (cloudstorage.Store, error) { 29 | return nil, fmt.Errorf("Not Implemented") 30 | } 31 | -------------------------------------------------------------------------------- /csbufio/writer_test.go: -------------------------------------------------------------------------------- 1 | package csbufio 2 | 3 | import ( 4 | "context" 5 | "io" 6 | "testing" 7 | 8 | "github.com/acomagu/bufpipe" 9 | "github.com/stretchr/testify/require" 10 | ) 11 | 12 | func TestWriterContextDone(t *testing.T) { 13 | t.Parallel() 14 | 15 | ctx, cancel := context.WithCancel(context.Background()) 16 | cancel() 17 | 18 | pr, pw := bufpipe.New(nil) 19 | wc := NewWriter(ctx, pw) 20 | 21 | n, err := wc.Write([]byte("some-data")) 22 | require.ErrorIs(t, err, context.Canceled) 23 | require.Equal(t, 0, n) 24 | err = pw.Close() 25 | require.NoError(t, err) 26 | 27 | b, err := io.ReadAll(pr) 28 | require.NoError(t, err, "error reading") 29 | require.Equal(t, 0, len(b), "") 30 | 31 | err = wc.Close() 32 | require.ErrorIs(t, err, context.Canceled) 33 | } 34 | -------------------------------------------------------------------------------- /google/storeutils/const.go: -------------------------------------------------------------------------------- 1 | package storeutils 2 | 3 | import ( 4 | "math" 5 | "math/rand" 6 | "time" 7 | ) 8 | 9 | // Copied from cloudstorage 10 | var GCSRetries = 10 11 | 12 | //backoff sleeps a random amount so we can. 13 | //retry failed requests using a randomized exponential backoff: 14 | //wait a random period between [0..1] seconds and retry; if that fails, 15 | //wait a random period between [0..2] seconds and retry; if that fails, 16 | //wait a random period between [0..4] seconds and retry, and so on, 17 | //with an upper bounds to the wait period being 16 seconds. 18 | //http://play.golang.org/p/l9aUHgiR8J 19 | func backoff(try int) { 20 | nf := math.Pow(2, float64(try)) 21 | nf = math.Max(1, nf) 22 | nf = math.Min(nf, 16) 23 | r := rand.Int31n(int32(nf)) 24 | d := time.Duration(r) * time.Second 25 | time.Sleep(d) 26 | } 27 | -------------------------------------------------------------------------------- /csbufio/reader.go: -------------------------------------------------------------------------------- 1 | package csbufio 2 | 3 | import ( 4 | "bufio" 5 | "context" 6 | "io" 7 | "os" 8 | ) 9 | 10 | func OpenReader(ctx context.Context, name string) (io.ReadCloser, error) { 11 | f, err := os.Open(name) 12 | if err != nil { 13 | return nil, err 14 | } 15 | return NewReader(ctx, f), nil 16 | } 17 | 18 | func NewReader(ctx context.Context, rc io.ReadCloser) io.ReadCloser { 19 | return &bufReadCloser{ctx, bufio.NewReader(rc), rc} 20 | } 21 | 22 | type bufReadCloser struct { 23 | ctx context.Context 24 | r io.Reader 25 | c io.Closer 26 | } 27 | 28 | func (b *bufReadCloser) Read(p []byte) (int, error) { 29 | if err := b.ctx.Err(); err != nil { 30 | return 0, err 31 | } 32 | return b.r.Read(p) 33 | } 34 | 35 | func (b *bufReadCloser) Close() error { 36 | if err := b.ctx.Err(); err != nil { 37 | return err 38 | } 39 | return b.c.Close() 40 | } 41 | -------------------------------------------------------------------------------- /csbufio/writer.go: -------------------------------------------------------------------------------- 1 | package csbufio 2 | 3 | import ( 4 | "bufio" 5 | "context" 6 | "io" 7 | "os" 8 | ) 9 | 10 | type bufWriteCloser struct { 11 | ctx context.Context 12 | w *bufio.Writer 13 | c io.Closer 14 | } 15 | 16 | func OpenWriter(ctx context.Context, name string) (io.WriteCloser, error) { 17 | f, err := os.OpenFile(name, os.O_RDWR|os.O_CREATE, 0665) 18 | if err != nil { 19 | return nil, err 20 | } 21 | return NewWriter(ctx, f), nil 22 | } 23 | 24 | // NewWriter is a io.WriteCloser. 25 | func NewWriter(ctx context.Context, rc io.WriteCloser) io.WriteCloser { 26 | return &bufWriteCloser{ctx, bufio.NewWriter(rc), rc} 27 | } 28 | 29 | func (b *bufWriteCloser) Write(p []byte) (int, error) { 30 | if err := b.ctx.Err(); err != nil { 31 | return 0, err 32 | } 33 | return b.w.Write(p) 34 | } 35 | 36 | func (b *bufWriteCloser) Close() error { 37 | if err := b.ctx.Err(); err != nil { 38 | return err 39 | } 40 | if err := b.w.Flush(); err != nil { 41 | return err 42 | } 43 | return b.c.Close() 44 | } 45 | -------------------------------------------------------------------------------- /google/storeutils/get.go: -------------------------------------------------------------------------------- 1 | package storeutils 2 | 3 | import ( 4 | "bytes" 5 | "errors" 6 | "io" 7 | 8 | "cloud.google.com/go/storage" 9 | "golang.org/x/net/context" 10 | 11 | "github.com/lytics/cloudstorage" 12 | ) 13 | 14 | // GetObject Gets a single object's bytes based on bucket and name parameters 15 | func GetObject(gc *storage.Client, bucket, name string) (*bytes.Buffer, error) { 16 | return GetObjectWithContext(context.Background(), gc, bucket, name) 17 | } 18 | 19 | // GetObject Gets a single object's bytes based on bucket and name parameters 20 | func GetObjectWithContext(ctx context.Context, gc *storage.Client, bucket, name string) (*bytes.Buffer, error) { 21 | rc, err := gc.Bucket(bucket).Object(name).NewReader(ctx) 22 | if err != nil { 23 | if errors.Is(err, storage.ErrObjectNotExist) { 24 | return nil, cloudstorage.ErrObjectNotFound 25 | } 26 | return nil, err 27 | } 28 | by, err := io.ReadAll(rc) 29 | if err != nil { 30 | return nil, err 31 | } 32 | return bytes.NewBuffer(by), nil 33 | } 34 | -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | - [ ] Working some util for prefetching files as a background job (i.e. a cointinus stream of files). The prefetched files would be handed a list of files (or query) and then it will begin prefetching the files as a consumer processes from the other end of the pipe(channel). Backpressuring if the consumer slows down. When downloading hundreds in a download->process->close loop, the loop will experince pauses as the fetching of files from the cloudstore take time. Prefetching elements removes most of the pausing ,espiecially if the files take a long time to process. 5 | 6 | - [ ] Add support for stream reading/writing without a local tmp file. Like https://golang.org/pkg/bufio/ 7 | 8 | - [ ] Research supporting multipart uploads to mutiple files, then using the https://cloud.google.com/storage/docs/json_api/v1/objects/compose api to concatenate them into one. 9 | - https://cloud.google.com/storage/docs/composite-objects 10 | 11 | - [ ] Create a store to store functions. 12 | - [ ] Rsync: `store.Rsync(s *Store)`, Rsync could be used to sync all files between GCS and S3. 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015,2016,2017 Lytics Inc 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. -------------------------------------------------------------------------------- /cachecleaner.go: -------------------------------------------------------------------------------- 1 | package cloudstorage 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | "time" 7 | 8 | "github.com/araddon/gou" 9 | ) 10 | 11 | // CleanupCacheFiles cleans up old store cache files 12 | // if your process crashes all it's old cache files, the local copies of the cloudfiles, 13 | // will left behind. 14 | // This function is a convenience func to help clean up those old files. 15 | // 16 | // I suggest you call this behind a package var sync.Once struct, so its only called at the 17 | // startup of your application. 18 | func CleanupCacheFiles(maxage time.Duration, TmpDir string) (err error) { 19 | defer func() { 20 | if r := recover(); r != nil { 21 | gou.Errorf("CleanupOldStoreCacheFiles cleanup old files: panic recovery %v\n %s", r, gou.PrettyStack(12)) 22 | } 23 | }() 24 | cleanoldfiles := func(path string, f os.FileInfo, err error) error { 25 | if filepath.Ext(path) == StoreCacheFileExt { 26 | if f.ModTime().Before(time.Now().Add(-(maxage))) { 27 | // delete if the files is older than 1 day 28 | err = os.Remove(path) 29 | if err != nil { 30 | gou.Errorf("CleanupOldStoreCacheFiles error removing an old files: %v", err) 31 | } 32 | } 33 | } 34 | return nil 35 | } 36 | filepath.Walk(TmpDir, cleanoldfiles) 37 | return err 38 | } 39 | -------------------------------------------------------------------------------- /doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | Package cloudstorage is an interface to make Local, Google, s3 file storage 3 | share a common interface to aid testing local as well as 4 | running in the cloud. 5 | 6 | The primary goal is to create a Store which is a common interface 7 | over each of the (google, s3, local-file-system, azure) etc file storage 8 | systems. Then the methods (Query, filter, get, put) are common, as are 9 | the Files (Objects) themselves. Writing code that supports multiple 10 | backends is now simple. 11 | 12 | 13 | Creating and iterating files 14 | 15 | In this example we are going to create a local-filesystem 16 | store. 17 | 18 | // This is an example of a local-storage (local filesystem) provider: 19 | config := &cloudstorage.Config{ 20 | Type: localfs.StoreType, 21 | TokenSource: localfs.AuthFileSystem, 22 | LocalFS: "/tmp/mockcloud", 23 | TmpDir: "/tmp/localcache", 24 | } 25 | store, _ := cloudstorage.NewStore(config) 26 | 27 | // Create a query to define the search path 28 | q := cloudstorage.NewQuery("list-test/") 29 | 30 | // Create an Iterator to list files 31 | iter := store.Objects(context.Background(), q) 32 | for { 33 | o, err := iter.Next() 34 | if err == iterator.Done { 35 | break 36 | } 37 | log.Println("found object %v", o.Name()) 38 | } 39 | */ 40 | package cloudstorage 41 | -------------------------------------------------------------------------------- /google/README.md: -------------------------------------------------------------------------------- 1 | google cloud storage store 2 | -------------------------- 3 | Cloudstorage abstraction package for gcs. 4 | 5 | 6 | 7 | ```sh 8 | # the CloudStorage GCS JWT key is an env with full jwt token json encoded. 9 | export CS_GCS_JWTKEY="{\"project_id\": \"lio-testing\", \"private_key_id\": \" 10 | 11 | ``` 12 | 13 | 14 | ## Example 15 | ```go 16 | 17 | // example with CS_GCS_JWTKEY env var 18 | conf := &cloudstorage.Config{ 19 | Type: google.StoreType, 20 | AuthMethod: google.AuthJWTKeySource, 21 | Project: "my-google-project", 22 | Bucket: "integration-tests-nl", 23 | TmpDir: "/tmp/localcache/google", 24 | } 25 | 26 | // OR read from machine oauth locations 27 | conf := &cloudstorage.Config{ 28 | Type: google.StoreType, 29 | AuthMethod: google.AuthGCEDefaultOAuthToken, 30 | Project: "my-google-project", 31 | Bucket: "integration-tests-nl", 32 | TmpDir: "/tmp/localcache/google", 33 | } 34 | 35 | // OR metadata api if on google cloud 36 | conf := &cloudstorage.Config{ 37 | Type: google.StoreType, 38 | AuthMethod: google.AuthGCEMetaKeySource, 39 | Project: "my-google-project", 40 | Bucket: "integration-tests-nl", 41 | TmpDir: "/tmp/localcache/google", 42 | } 43 | 44 | // create store 45 | store, err := cloudstorage.NewStore(conf) 46 | if err != nil { 47 | return err 48 | } 49 | 50 | 51 | ``` -------------------------------------------------------------------------------- /google/storeutils/utils_test.go: -------------------------------------------------------------------------------- 1 | package storeutils 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | 7 | "cloud.google.com/go/storage" 8 | "golang.org/x/net/context" 9 | "google.golang.org/api/option" 10 | 11 | "github.com/lytics/cloudstorage" 12 | "github.com/lytics/cloudstorage/google" 13 | ) 14 | 15 | var testBucket = os.Getenv("TESTBUCKET") 16 | var testProject = os.Getenv("TESTPROJECT") 17 | var testGetFile = os.Getenv("TESTFILE") 18 | 19 | func Setup(t *testing.T) *storage.Client { 20 | if testProject == "" || testBucket == "" { 21 | t.Skip("TESTPROJECT, and TESTBUCKET EnvVars must be set to perform integration test") 22 | } 23 | 24 | conf := &cloudstorage.Config{ 25 | Type: google.StoreType, 26 | AuthMethod: google.AuthGCEDefaultOAuthToken, 27 | Project: testProject, 28 | Bucket: testBucket, 29 | } 30 | 31 | // Create http client with Google context auth 32 | googleClient, err := google.NewGoogleClient(conf) 33 | if err != nil { 34 | t.Errorf("Failed to create Google Client: %v\n", err) 35 | } 36 | 37 | gsc, err := storage.NewClient(context.Background(), option.WithHTTPClient(googleClient.Client())) 38 | if err != nil { 39 | t.Errorf("Error creating Google cloud storage client. project:%s gs://%s/ err:%v\n", 40 | conf.Project, conf.Bucket, err) 41 | 42 | } 43 | if gsc == nil { 44 | t.Errorf("storage Client returned is nil!") 45 | } 46 | return gsc 47 | } 48 | -------------------------------------------------------------------------------- /azure/example/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "os" 7 | "path/filepath" 8 | 9 | "github.com/araddon/gou" 10 | "google.golang.org/api/iterator" 11 | 12 | "github.com/lytics/cloudstorage" 13 | "github.com/lytics/cloudstorage/azure" 14 | ) 15 | 16 | /* 17 | 18 | # to use azure tests ensure you have exported 19 | 20 | export AZURE_KEY="aaa" 21 | export AZURE_PROJECT="bbb" 22 | export AZURE_BUCKET="cloudstorageunittests" 23 | 24 | */ 25 | 26 | func main() { 27 | tmpDir, err := os.MkdirTemp("/tmp", "azure_example") 28 | if err != nil { 29 | fmt.Println("Could not create temp dir", err) 30 | os.Exit(1) 31 | } 32 | defer os.RemoveAll(tmpDir) 33 | 34 | conf := &cloudstorage.Config{ 35 | Type: azure.StoreType, 36 | AuthMethod: azure.AuthKey, 37 | Bucket: os.Getenv("AZURE_BUCKET"), 38 | Project: os.Getenv("AZURE_PROJECT"), 39 | TmpDir: filepath.Join(tmpDir, "localcache", "azure"), 40 | Settings: make(gou.JsonHelper), 41 | } 42 | 43 | conf.Settings[azure.ConfKeyAuthKey] = os.Getenv("AZURE_KEY") 44 | 45 | // Should error with empty config 46 | store, err := cloudstorage.NewStore(conf) 47 | if err != nil { 48 | fmt.Println("Could not get azure store ", err) 49 | os.Exit(1) 50 | } 51 | 52 | folders, err := store.Folders(context.Background(), cloudstorage.NewQueryForFolders("")) 53 | if err != nil { 54 | fmt.Println("Could not get folders ", err) 55 | os.Exit(1) 56 | } 57 | for _, folder := range folders { 58 | fmt.Println("found folder: ", folder) 59 | } 60 | 61 | // Create a search query for all objects 62 | q := cloudstorage.NewQuery("") 63 | // Create an Iterator 64 | iter, err := store.Objects(context.Background(), q) 65 | if err != nil { 66 | fmt.Println("Could not get iter ", err) 67 | os.Exit(1) 68 | } 69 | 70 | for { 71 | o, err := iter.Next() 72 | if err == iterator.Done { 73 | fmt.Println("done, exiting iterator") 74 | break 75 | } 76 | fmt.Println("found object", o.Name()) 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /updatepkgs.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | 3 | cd $GOPATH/src/cloud.google.com/go/ && git checkout master && git pull 4 | cd $GOPATH/src/github.com/Azure/azure-sdk-for-go && git checkout master && git pull 5 | cd $GOPATH/src/github.com/Azure/go-autorest && git checkout master && git pull 6 | cd $GOPATH/src/github.com/araddon/gou && git checkout master && git pull 7 | cd $GOPATH/src/github.com/aws/aws-sdk-go && git checkout master && git pull 8 | cd $GOPATH/src/github.com/bmizerany/assert && git checkout master && git pull 9 | cd $GOPATH/src/github.com/davecgh/go-spew && git checkout master && git pull 10 | cd $GOPATH/src/github.com/dgrijalva/jwt-go && git checkout master && git pull 11 | cd $GOPATH/src/github.com/go-ini/ini && git checkout master && git pull 12 | cd $GOPATH/src/github.com/golang/protobuf && git checkout master && git pull 13 | cd $GOPATH/src/github.com/googleapis/gax-go && git checkout master && git pull 14 | cd $GOPATH/src/github.com/jmespath/go-jmespath && git checkout master && git pull 15 | cd $GOPATH/src/github.com/kr/fs && git checkout master && git pull 16 | cd $GOPATH/src/github.com/kr/pretty && git checkout master && git pull 17 | cd $GOPATH/src/github.com/kr/text && git checkout master && git pull 18 | cd $GOPATH/src/github.com/marstr/guid && git checkout master && git pull 19 | cd $GOPATH/src/github.com/pborman/uuid && git checkout master && git pull 20 | cd $GOPATH/src/golang.org/pkg/errors && git checkout master && git pull 21 | cd $GOPATH/src/golang.org/pkg/sftp && git checkout master && git pull 22 | cd $GOPATH/src/github.com/pmezard/go-difflib && git checkout master && git pull 23 | cd $GOPATH/src/github.com/satori/go.uuid && git checkout master && git pull 24 | cd $GOPATH/src/github.com/stretchr/testify && git checkout master && git pull 25 | cd $GOPATH/src/github.com/go.opencensus.io && git checkout master && git pull 26 | cd $GOPATH/src/golang.org/x/crypto && git checkout master && git pull 27 | cd $GOPATH/src/golang.org/x/net && git checkout master && git pull 28 | cd $GOPATH/src/golang.org/x/oauth2 && git checkout master && git pull 29 | cd $GOPATH/src/golang.org/x/text && git checkout master && git pull 30 | cd $GOPATH/src/google.golang.org/api && git checkout master && git pull 31 | cd $GOPATH/src/google.golang.org/appengine && git checkout master && git pull 32 | cd $GOPATH/src/google.golang.org/genproto && git checkout master && git pull 33 | cd $GOPATH/src/google.golang.org/grpc && git checkout master && git pull 34 | -------------------------------------------------------------------------------- /azure/store_test.go: -------------------------------------------------------------------------------- 1 | package azure_test 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | 7 | "github.com/araddon/gou" 8 | "github.com/stretchr/testify/require" 9 | 10 | "github.com/lytics/cloudstorage" 11 | "github.com/lytics/cloudstorage/azure" 12 | "github.com/lytics/cloudstorage/testutils" 13 | ) 14 | 15 | /* 16 | # to use azure tests ensure you have exported 17 | 18 | export AZURE_KEY="aaa" 19 | export AZURE_PROJECT="bbb" 20 | export AZURE_BUCKET="cloudstorageunittests" 21 | */ 22 | var config = &cloudstorage.Config{ 23 | Type: azure.StoreType, 24 | AuthMethod: azure.AuthKey, 25 | Bucket: os.Getenv("AZURE_BUCKET"), 26 | Settings: make(gou.JsonHelper), 27 | } 28 | 29 | func TestConfig(t *testing.T) { 30 | if config.Bucket == "" { 31 | t.Logf("must provide AZURE_PROJECT, AZURE_KEY, AZURE_PROJECT env vars") 32 | t.Skip() 33 | return 34 | } 35 | 36 | conf := &cloudstorage.Config{ 37 | Type: azure.StoreType, 38 | Project: os.Getenv("AZURE_PROJECT"), 39 | Settings: make(gou.JsonHelper), 40 | } 41 | // Should error with empty config 42 | _, err := cloudstorage.NewStore(conf) 43 | require.Error(t, err) 44 | 45 | conf.AuthMethod = azure.AuthKey 46 | conf.Settings[azure.ConfKeyAuthKey] = "" 47 | _, err = cloudstorage.NewStore(conf) 48 | require.Error(t, err) 49 | 50 | conf.Settings[azure.ConfKeyAuthKey] = "bad" 51 | _, err = cloudstorage.NewStore(conf) 52 | require.Error(t, err) 53 | 54 | conf.Settings[azure.ConfKeyAuthKey] = os.Getenv("AZURE_KEY") 55 | client, sess, err := azure.NewClient(conf) 56 | require.NoError(t, err) 57 | require.NotNil(t, client) 58 | conf.TmpDir = "" 59 | _, err = azure.NewStore(client, sess, conf) 60 | require.Error(t, err) 61 | 62 | // Trying to find dir they don't have access to? 63 | conf.TmpDir = "/home/fake" 64 | _, err = cloudstorage.NewStore(conf) 65 | require.Error(t, err) 66 | } 67 | 68 | func TestAll(t *testing.T) { 69 | config.Project = os.Getenv("AZURE_PROJECT") 70 | if config.Project == "" { 71 | t.Logf("must provide AZURE_PROJECT") 72 | t.Skip() 73 | return 74 | } 75 | 76 | config.TmpDir = t.TempDir() 77 | 78 | config.Settings[azure.ConfKeyAuthKey] = os.Getenv("AZURE_KEY") 79 | store, err := cloudstorage.NewStore(config) 80 | if err != nil { 81 | t.Logf("No valid auth provided, skipping azure testing %v", err) 82 | t.Skip() 83 | return 84 | } 85 | client := store.Client() 86 | require.NotNil(t, client) 87 | 88 | testutils.RunTests(t, store, config) 89 | } 90 | -------------------------------------------------------------------------------- /query.go: -------------------------------------------------------------------------------- 1 | package cloudstorage 2 | 3 | import ( 4 | "sort" 5 | ) 6 | 7 | // Filter func type definition for filtering objects 8 | type Filter func(objects Objects) Objects 9 | 10 | // Query used to query the cloud source. The primary query is a prefix query like 11 | // `ls /my-csv-files/baseball/*`. This is the Request, and includes the 12 | // PageSize, cursor/next token as well. 13 | type Query struct { 14 | Delimiter string // Delimiter is most likely "/" 15 | Prefix string // prefix (directory) to search for or object name if one file 16 | StartOffset string // (gcs/localfs only) "bar/", Only list objects lexicographically >= "bar/" 17 | EndOffset string // (gcs/localfs only) "foo/", Only list objects lexicographically < "foo/" 18 | Marker string // Next Page Marker if provided is a start next page fetch bookmark. 19 | ShowHidden bool // Show hidden files? 20 | Filters []Filter // Applied to the result sets to filter out Objects (i.e. remove objects by extension) 21 | PageSize int // PageSize defaults to global, or you can supply an override 22 | } 23 | 24 | // NewQuery create a query for finding files under given prefix. 25 | func NewQuery(prefix string) Query { 26 | return Query{ 27 | Prefix: prefix, 28 | } 29 | } 30 | 31 | // NewQueryAll query for all objects/files. 32 | func NewQueryAll() Query { 33 | return Query{} 34 | } 35 | 36 | // NewQueryForFolders create a query for finding Folders under given path. 37 | func NewQueryForFolders(folderPath string) Query { 38 | return Query{ 39 | Delimiter: "/", 40 | Prefix: folderPath, 41 | } 42 | } 43 | 44 | // AddFilter adds a post prefix query, that can be used to alter results set 45 | // from the prefix query. 46 | func (q *Query) AddFilter(f Filter) *Query { 47 | if q.Filters == nil { 48 | q.Filters = make([]Filter, 0) 49 | } 50 | q.Filters = append(q.Filters, f) 51 | return q 52 | } 53 | 54 | // Sorted added a sort Filter to the filter chain, if its not the last call 55 | // while building your query, Then sorting is only guaranteed for the next 56 | // filter in the chain. 57 | func (q *Query) Sorted() *Query { 58 | q.AddFilter(ObjectSortFilter) 59 | return q 60 | } 61 | 62 | // ApplyFilters is called as the last step in store.List() to filter out the 63 | // results before they are returned. 64 | func (q *Query) ApplyFilters(objects Objects) Objects { 65 | for _, f := range q.Filters { 66 | objects = f(objects) 67 | } 68 | return objects 69 | } 70 | 71 | var ObjectSortFilter = func(objs Objects) Objects { 72 | sort.Stable(objs) 73 | return objs 74 | } 75 | -------------------------------------------------------------------------------- /azure/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | azure blog store 4 | -------------------------- 5 | Cloudstorage abstraction to Azure Blob storage. 6 | 7 | 8 | 9 | config 10 | ----------------- 11 | 12 | Login to your https://portal.azure.com account and click "Storage Accounts" in menu. Then Click the storage account you want. 13 | 14 | * *config.Project* is required. use "Account" in azure portal. This is the "Name" of cloudstorageazuretesting https://cloudstorageazuretesting.blob.core.windows.net/ 15 | * *azure_key* from your storage account go to the menu "Access Keys" 16 | * *Bucket* go to *Containers* in the azure storage and get this name. 17 | 18 | 19 | 20 | Example in Go: 21 | ```go 22 | package main 23 | 24 | import ( 25 | "context" 26 | "fmt" 27 | "os" 28 | 29 | "github.com/araddon/gou" 30 | "google.golang.org/api/iterator" 31 | 32 | "github.com/lytics/cloudstorage" 33 | "github.com/lytics/cloudstorage/azure" 34 | ) 35 | 36 | /* 37 | 38 | # to use azure tests ensure you have exported 39 | 40 | export AZURE_KEY="aaa" 41 | export AZURE_PROJECT="bbb" 42 | export AZURE_BUCKET="cloudstorageunittests" 43 | 44 | */ 45 | 46 | func main() { 47 | tmpDir, err := os.MkdirTemp("/tmp", "azure_example") 48 | if err != nil { 49 | fmt.Println("Could not create temp dir", err) 50 | os.Exit(1) 51 | } 52 | defer os.RemoveAll(tmpDir) 53 | 54 | conf := &cloudstorage.Config{ 55 | Type: azure.StoreType, 56 | AuthMethod: azure.AuthKey, 57 | Bucket: os.Getenv("AZURE_BUCKET"), 58 | Project: os.Getenv("AZURE_PROJECT"), 59 | TmpDir: filepath.Join(tempDir, "localcache", "azure"), 60 | Settings: make(gou.JsonHelper), 61 | } 62 | 63 | conf.Settings[azure.ConfKeyAuthKey] = os.Getenv("AZURE_KEY") 64 | 65 | // Should error with empty config 66 | store, err := cloudstorage.NewStore(conf) 67 | if err != nil { 68 | fmt.Println("Could not get azure store ", err) 69 | os.Exit(1) 70 | } 71 | 72 | folders, err := store.Folders(context.Background(), cloudstorage.NewQueryForFolders("")) 73 | if err != nil { 74 | fmt.Println("Could not get folders ", err) 75 | os.Exit(1) 76 | } 77 | for _, folder := range folders { 78 | fmt.Println("found folder: ", folder) 79 | } 80 | 81 | // Create a search query for all objects 82 | q := cloudstorage.NewQuery("") 83 | // Create an Iterator 84 | iter, err := store.Objects(context.Background(), q) 85 | if err != nil { 86 | fmt.Println("Could not get iter ", err) 87 | os.Exit(1) 88 | } 89 | 90 | for { 91 | o, err := iter.Next() 92 | if err == iterator.Done { 93 | fmt.Println("done, exiting iterator") 94 | break 95 | } 96 | fmt.Println("found object", o.Name()) 97 | } 98 | } 99 | 100 | ``` 101 | 102 | 103 | -------------------------------------------------------------------------------- /store_test.go: -------------------------------------------------------------------------------- 1 | package cloudstorage_test 2 | 3 | import ( 4 | "encoding/json" 5 | "path/filepath" 6 | "testing" 7 | 8 | "github.com/lytics/cloudstorage" 9 | "github.com/lytics/cloudstorage/localfs" 10 | "github.com/stretchr/testify/require" 11 | ) 12 | 13 | func TestStore(t *testing.T) { 14 | tmpDir := t.TempDir() 15 | 16 | invalidConf := &cloudstorage.Config{} 17 | 18 | store, err := cloudstorage.NewStore(invalidConf) 19 | require.Error(t, err) 20 | require.Nil(t, store) 21 | 22 | missingStoreConf := &cloudstorage.Config{ 23 | Type: "non-existent-store", 24 | } 25 | 26 | store, err = cloudstorage.NewStore(missingStoreConf) 27 | require.Error(t, err) 28 | require.Nil(t, store) 29 | 30 | // test missing temp dir, assign local temp 31 | localFsConf := &cloudstorage.Config{ 32 | Type: localfs.StoreType, 33 | AuthMethod: localfs.AuthFileSystem, 34 | LocalFS: filepath.Join(tmpDir, "mockcloud"), 35 | } 36 | 37 | store, err = cloudstorage.NewStore(localFsConf) 38 | require.Nil(t, err) 39 | require.NotNil(t, store) 40 | } 41 | 42 | func TestJwtConf(t *testing.T) { 43 | configInput := ` 44 | { 45 | "JwtConf": { 46 | "type": "service_account", 47 | "project_id": "testing", 48 | "private_key_id": "abcdefg", 49 | "private_key": "aGVsbG8td29ybGQ=", 50 | "client_email": "testing@testing.iam.gserviceaccount.com", 51 | "client_id": "117058426251532209964", 52 | "scopes": [ 53 | "https://www.googleapis.com/auth/devstorage.read_write" 54 | ] 55 | } 56 | }` 57 | 58 | // v := base64.StdEncoding.EncodeToString([]byte("hello-world")) 59 | // t.Logf("b64 %q", v) 60 | conf := &cloudstorage.Config{} 61 | err := json.Unmarshal([]byte(configInput), conf) 62 | require.Nil(t, err) 63 | conf.JwtConf.PrivateKey = "------helo-------\naGVsbG8td29ybGQ=\n-----------------end--------" 64 | require.NotNil(t, conf.JwtConf) 65 | require.Nil(t, conf.JwtConf.Validate()) 66 | require.Equal(t, "aGVsbG8td29ybGQ=", conf.JwtConf.PrivateKey) 67 | require.Equal(t, "service_account", conf.JwtConf.Type) 68 | 69 | // note on this one the "keytype" & "private_keybase64" 70 | configInput = ` 71 | { 72 | "JwtConf": { 73 | "keytype": "service_account", 74 | "project_id": "testing", 75 | "private_key_id": "abcdefg", 76 | "private_keybase64": "aGVsbG8td29ybGQ=", 77 | "client_email": "testing@testing.iam.gserviceaccount.com", 78 | "client_id": "117058426251532209964", 79 | "scopes": [ 80 | "https://www.googleapis.com/auth/devstorage.read_write" 81 | ] 82 | } 83 | }` 84 | conf = &cloudstorage.Config{} 85 | err = json.Unmarshal([]byte(configInput), conf) 86 | require.Nil(t, err) 87 | require.NotNil(t, conf.JwtConf) 88 | require.Nil(t, conf.JwtConf.Validate()) 89 | require.Equal(t, "aGVsbG8td29ybGQ=", conf.JwtConf.PrivateKey) 90 | require.Equal(t, "service_account", conf.JwtConf.Type) 91 | } 92 | -------------------------------------------------------------------------------- /sftp/store_test.go: -------------------------------------------------------------------------------- 1 | package sftp_test 2 | 3 | import ( 4 | "bytes" 5 | "crypto/rand" 6 | "crypto/rsa" 7 | "crypto/x509" 8 | "encoding/pem" 9 | "os" 10 | "path/filepath" 11 | "testing" 12 | 13 | "github.com/araddon/gou" 14 | "github.com/stretchr/testify/require" 15 | 16 | "github.com/lytics/cloudstorage" 17 | "github.com/lytics/cloudstorage/sftp" 18 | "github.com/lytics/cloudstorage/testutils" 19 | ) 20 | 21 | /* 22 | 23 | # to use sftp tests ensure you have exported 24 | 25 | export SFTP_USER="aaa" 26 | export SFTP_PASSWORD="bbb" 27 | export SFTP_FOLDER="bucket" 28 | 29 | */ 30 | 31 | var config = &cloudstorage.Config{ 32 | Type: sftp.StoreType, 33 | AuthMethod: sftp.AuthUserPass, 34 | Bucket: os.Getenv("SFTP_FOLDER"), 35 | Settings: make(gou.JsonHelper), 36 | LogPrefix: "sftp-testing", 37 | } 38 | 39 | func getKey() string { 40 | privateKey, err := rsa.GenerateKey(rand.Reader, 1024) 41 | if err != nil { 42 | return "" 43 | } 44 | // pub, err := ssh.NewPublicKey(&privateKey.PublicKey) 45 | // if err != nil { 46 | // return "" 47 | // } 48 | // return string(ssh.MarshalAuthorizedKey(pub)) 49 | //return string(x509.MarshalPKCS1PrivateKey(privateKey)) 50 | privateKeyPEM := &pem.Block{Type: "RSA PRIVATE KEY", Bytes: x509.MarshalPKCS1PrivateKey(privateKey)} 51 | buf := bytes.NewBuffer(nil) 52 | if err := pem.Encode(buf, privateKeyPEM); err != nil { 53 | return "" 54 | } 55 | return buf.String() 56 | } 57 | func TestConfig(t *testing.T) { 58 | tmpDir := t.TempDir() 59 | 60 | sshConf, err := sftp.ConfigUserKey("user", getKey()) 61 | require.NoError(t, err) 62 | require.NotNil(t, sshConf) 63 | 64 | conf := &cloudstorage.Config{ 65 | Type: sftp.StoreType, 66 | AuthMethod: sftp.AuthUserKey, 67 | Bucket: os.Getenv("SFTP_FOLDER"), 68 | TmpDir: filepath.Join(tmpDir, "localcache", "sftp"), 69 | Settings: make(gou.JsonHelper), 70 | LogPrefix: "sftp-testing", 71 | } 72 | conf.Settings[sftp.ConfKeyPrivateKey] = getKey() 73 | conf.Settings[sftp.ConfKeyUser] = os.Getenv("SFTP_USER") 74 | conf.Settings[sftp.ConfKeyHost] = os.Getenv("SFTP_HOST") 75 | conf.Settings[sftp.ConfKeyPort] = "22" 76 | _, err = sftp.NewStore(conf) 77 | require.Error(t, err) 78 | } 79 | func TestAll(t *testing.T) { 80 | tmpDir := t.TempDir() 81 | config.TmpDir = filepath.Join(tmpDir, "localcache", "sftp") 82 | 83 | config.Settings[sftp.ConfKeyUser] = os.Getenv("SFTP_USER") 84 | config.Settings[sftp.ConfKeyPassword] = os.Getenv("SFTP_PASSWORD") 85 | config.Settings[sftp.ConfKeyHost] = os.Getenv("SFTP_HOST") 86 | config.Settings[sftp.ConfKeyPort] = "22" 87 | //gou.Debugf("config %v", config) 88 | store, err := cloudstorage.NewStore(config) 89 | if err != nil { 90 | t.Logf("No valid auth provided, skipping sftp testing %v", err) 91 | t.Skip() 92 | return 93 | } 94 | if store == nil { 95 | t.Fatalf("No store???") 96 | } 97 | testutils.RunTests(t, store, config) 98 | } 99 | -------------------------------------------------------------------------------- /file_helper.go: -------------------------------------------------------------------------------- 1 | package cloudstorage 2 | 3 | import ( 4 | "fmt" 5 | "mime" 6 | "os" 7 | "path" 8 | "path/filepath" 9 | "strings" 10 | ) 11 | 12 | // CleanETag transforms a string into the full etag spec, removing 13 | // extra quote-marks, whitespace from etag. 14 | // 15 | // per Etag spec https://tools.ietf.org/html/rfc7232#section-2.3 the etag value () may: 16 | // - W/"" 17 | // - "" 18 | // - "" 19 | func CleanETag(etag string) string { 20 | for { 21 | // loop through checking for extra-characters and removing 22 | if strings.HasPrefix(etag, `\"`) { 23 | etag = strings.Trim(etag, `\"`) 24 | } else if strings.HasPrefix(etag, `"`) { 25 | etag = strings.Trim(etag, `"`) 26 | } else if strings.HasPrefix(etag, `W/`) { 27 | etag = strings.Replace(etag, `W/`, "", 1) 28 | } else { 29 | // as soon as no condition matches, we are done 30 | // return 31 | return etag 32 | } 33 | } 34 | } 35 | 36 | // ContentType check content type of file by looking 37 | // at extension (.html, .png) uses package mime for global types. 38 | // Use mime.AddExtensionType to add new global types. 39 | func ContentType(name string) string { 40 | contenttype := "" 41 | ext := filepath.Ext(name) 42 | if contenttype == "" { 43 | contenttype = mime.TypeByExtension(ext) 44 | if contenttype == "" { 45 | contenttype = "application/octet-stream" 46 | } 47 | } 48 | return contenttype 49 | } 50 | 51 | // EnsureContextType read Type of metadata 52 | func EnsureContextType(o string, md map[string]string) string { 53 | ctype, ok := md[ContentTypeKey] 54 | if !ok { 55 | ext := filepath.Ext(o) 56 | if ctype == "" { 57 | ctype = mime.TypeByExtension(ext) 58 | if ctype == "" { 59 | ctype = "application/octet-stream" 60 | } 61 | } 62 | md[ContentTypeKey] = ctype 63 | } 64 | return ctype 65 | } 66 | 67 | // Exists does this file path exists on the local file-system? 68 | func Exists(filename string) bool { 69 | if _, err := os.Stat(filename); os.IsNotExist(err) { 70 | return false 71 | } 72 | return true 73 | } 74 | 75 | // CachePathObj check the cache path. 76 | func CachePathObj(cachepath, oname, storeid string) string { 77 | obase := path.Base(oname) 78 | opath := path.Dir(oname) 79 | ext := path.Ext(oname) 80 | ext2 := fmt.Sprintf("%s.%s%s", ext, storeid, StoreCacheFileExt) 81 | var obase2 string 82 | if ext == "" { 83 | obase2 = obase + ext2 84 | } else { 85 | obase2 = strings.Replace(obase, ext, ext2, 1) 86 | } 87 | return path.Join(cachepath, opath, obase2) 88 | } 89 | 90 | // EnsureDir ensure directory exists 91 | func EnsureDir(filename string) error { 92 | fdir := path.Dir(filename) 93 | if fdir != "" && fdir != filename { 94 | d, err := os.Stat(fdir) 95 | if err == nil { 96 | if !d.IsDir() { 97 | return fmt.Errorf("filename's dir exists but isn't' a directory: filename:%v dir:%v", filename, fdir) 98 | } 99 | } else if os.IsNotExist(err) { 100 | err := os.MkdirAll(fdir, 0775) 101 | if err != nil { 102 | return fmt.Errorf("unable to create path. : filename:%v dir:%v err:%v", filename, fdir, err) 103 | } 104 | } 105 | } 106 | return nil 107 | } 108 | -------------------------------------------------------------------------------- /awss3/store_test.go: -------------------------------------------------------------------------------- 1 | package awss3_test 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | "testing" 7 | 8 | "github.com/araddon/gou" 9 | "github.com/stretchr/testify/require" 10 | 11 | "github.com/lytics/cloudstorage" 12 | "github.com/lytics/cloudstorage/awss3" 13 | "github.com/lytics/cloudstorage/testutils" 14 | ) 15 | 16 | /* 17 | 18 | # to use aws tests ensure you have exported 19 | 20 | export AWS_ACCESS_KEY="aaa" 21 | export AWS_SECRET_KEY="bbb" 22 | export AWS_BUCKET="bucket" 23 | 24 | */ 25 | 26 | func TestS3(t *testing.T) { 27 | if os.Getenv("AWS_SECRET_KEY") == "" || os.Getenv("AWS_ACCESS_KEY") == "" { 28 | t.Logf("No aws credentials, skipping") 29 | t.Skip() 30 | return 31 | } 32 | 33 | tmpDir := t.TempDir() 34 | 35 | conf := &cloudstorage.Config{ 36 | Type: awss3.StoreType, 37 | Settings: gou.JsonHelper{ 38 | "fake": "notused", 39 | }, 40 | } 41 | // Should error with empty config 42 | _, err := cloudstorage.NewStore(conf) 43 | require.Error(t, err) 44 | 45 | conf.AuthMethod = awss3.AuthAccessKey 46 | conf.Settings[awss3.ConfKeyAccessKey] = "" 47 | conf.Settings[awss3.ConfKeyAccessSecret] = os.Getenv("AWS_SECRET_KEY") 48 | conf.Bucket = os.Getenv("AWS_BUCKET") 49 | conf.TmpDir = filepath.Join(tmpDir, "localcache", "aws") 50 | _, err = cloudstorage.NewStore(conf) 51 | require.Error(t, err) 52 | 53 | conf.Settings[awss3.ConfKeyAccessSecret] = "" 54 | _, err = cloudstorage.NewStore(conf) 55 | require.Error(t, err) 56 | 57 | // conf.Settings[awss3.ConfKeyAccessKey] = "bad" 58 | // conf.Settings[awss3.ConfKeyAccessSecret] = "bad" 59 | // _, err = cloudstorage.NewStore(conf) 60 | // require. NotEqual(t, nil, err) 61 | 62 | conf.BaseUrl = "s3.custom.endpoint.com" 63 | conf.Settings[awss3.ConfKeyAccessKey] = os.Getenv("AWS_ACCESS_KEY") 64 | conf.Settings[awss3.ConfKeyAccessSecret] = os.Getenv("AWS_SECRET_KEY") 65 | client, _, err := awss3.NewClient(conf) 66 | require.NoError(t, err) 67 | require.NotNil(t, client) 68 | 69 | conf.Settings[awss3.ConfKeyDisableSSL] = true 70 | client, sess, err := awss3.NewClient(conf) 71 | require.NoError(t, err) 72 | require.NotNil(t, client) 73 | 74 | conf.TmpDir = "" 75 | _, err = awss3.NewStore(client, sess, conf) 76 | require.Error(t, err) 77 | 78 | // Trying to find dir they don't have access to? 79 | conf.TmpDir = "/home/fake" 80 | _, err = cloudstorage.NewStore(conf) 81 | require.Error(t, err) 82 | } 83 | 84 | func TestAll(t *testing.T) { 85 | tmpDir := t.TempDir() 86 | 87 | config := &cloudstorage.Config{ 88 | Type: awss3.StoreType, 89 | AuthMethod: awss3.AuthAccessKey, 90 | Bucket: os.Getenv("AWS_BUCKET"), 91 | TmpDir: filepath.Join(tmpDir, "localcache", "aws"), 92 | Settings: make(gou.JsonHelper), 93 | Region: "us-east-1", 94 | } 95 | config.Settings[awss3.ConfKeyAccessKey] = os.Getenv("AWS_ACCESS_KEY") 96 | config.Settings[awss3.ConfKeyAccessSecret] = os.Getenv("AWS_SECRET_KEY") 97 | //gou.Debugf("config %v", config) 98 | if config.Bucket == "" || os.Getenv("AWS_SECRET_KEY") == "" || os.Getenv("AWS_ACCESS_KEY") == "" { 99 | t.Logf("No aws credentials, skipping") 100 | t.Skip() 101 | return 102 | } 103 | store, err := cloudstorage.NewStore(config) 104 | if err != nil { 105 | t.Logf("No valid auth provided, skipping awss3 testing %v", err) 106 | t.Skip() 107 | return 108 | } 109 | require.NotNil(t, store, "no store?") 110 | testutils.RunTests(t, store, config) 111 | } 112 | -------------------------------------------------------------------------------- /localfs/emptydir_test.go: -------------------------------------------------------------------------------- 1 | package localfs 2 | 3 | import ( 4 | "os" 5 | "path" 6 | "testing" 7 | 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | func TestDirectoryCleanup(t *testing.T) { 12 | testDir := t.TempDir() 13 | 14 | makeDummyFile := func(t *testing.T, filePath string) string { 15 | fullPath := path.Join(testDir, filePath) 16 | dir := path.Dir(fullPath) 17 | require.NotEmpty(t, dir) 18 | err := os.MkdirAll(dir, 0755) 19 | require.NoError(t, err) 20 | err = os.WriteFile(fullPath, []byte("don't delete this folder"), 0755) 21 | require.NoError(t, err) 22 | return fullPath 23 | } 24 | 25 | fileExists := func(t *testing.T, filePath string) bool { 26 | _, err := os.Stat(filePath) 27 | if err == nil { 28 | return true 29 | } 30 | if os.IsNotExist(err) { 31 | return false 32 | } 33 | require.FailNow(t, "failed to get status of file %s", filePath) 34 | return false 35 | } 36 | 37 | require.False(t, fileExists(t, "/heythisdoesntexist/overhere")) 38 | 39 | // /testDir 40 | // a/ 41 | // dummyfile3 42 | // b/ 43 | // c/ 44 | // dummyfile1 45 | // dummyfile2 46 | // d/ 47 | // dummyfile4 48 | 49 | d1 := makeDummyFile(t, "a/b/c/dummyfile1") 50 | d2 := makeDummyFile(t, "a/b/c/dummyfile2") 51 | d3 := makeDummyFile(t, "a/dummyfile3") 52 | d4 := makeDummyFile(t, "a/d/dummyfile4") 53 | 54 | l := &LocalStore{storepath: testDir} 55 | 56 | t.Run("delete-nonempty-dir", func(t *testing.T) { 57 | err := l.deleteParentDirs(path.Join(testDir, "a/d")) 58 | require.NoError(t, err) 59 | require.True(t, fileExists(t, d1)) 60 | require.True(t, fileExists(t, d2)) 61 | require.True(t, fileExists(t, d3)) 62 | require.True(t, fileExists(t, d4)) 63 | }) 64 | 65 | t.Run("delete-nonempty-nested-child-dir", func(t *testing.T) { 66 | err := l.deleteParentDirs(path.Join(testDir, "a/b/c")) 67 | require.NoError(t, err) 68 | require.True(t, fileExists(t, d1)) 69 | require.True(t, fileExists(t, d2)) 70 | require.True(t, fileExists(t, d3)) 71 | require.True(t, fileExists(t, d4)) 72 | }) 73 | 74 | t.Run("delete-nonempty-nested-parent-dir", func(t *testing.T) { 75 | err := l.deleteParentDirs(path.Join(testDir, "a/b")) 76 | require.NoError(t, err) 77 | require.True(t, fileExists(t, d1)) 78 | require.True(t, fileExists(t, d2)) 79 | require.True(t, fileExists(t, d3)) 80 | require.True(t, fileExists(t, d4)) 81 | }) 82 | 83 | require.NoError(t, os.Remove(d4)) 84 | 85 | t.Run("delete-empty-dir", func(t *testing.T) { 86 | err := l.deleteParentDirs(d4) 87 | require.NoError(t, err) 88 | require.True(t, fileExists(t, d1)) 89 | require.True(t, fileExists(t, d2)) 90 | require.True(t, fileExists(t, d3)) 91 | require.False(t, fileExists(t, d4)) 92 | require.False(t, fileExists(t, path.Join(testDir, "a/d"))) 93 | }) 94 | 95 | require.NoError(t, os.Remove(d1)) 96 | require.NoError(t, os.Remove(d2)) 97 | 98 | t.Run("delete-empty-nested-dir", func(t *testing.T) { 99 | err := l.deleteParentDirs(d2) 100 | require.NoError(t, err) 101 | require.False(t, fileExists(t, d1)) 102 | require.False(t, fileExists(t, d2)) 103 | require.False(t, fileExists(t, path.Join(testDir, "a/b/c"))) 104 | require.False(t, fileExists(t, path.Join(testDir, "a/b"))) 105 | require.True(t, fileExists(t, d3)) 106 | require.False(t, fileExists(t, d4)) 107 | require.False(t, fileExists(t, path.Join(testDir, "a/d"))) 108 | }) 109 | 110 | t.Run("delete-missing-dir", func(t *testing.T) { 111 | err := l.deleteParentDirs(path.Join(testDir, "doesntexist/what")) 112 | require.NoError(t, err) 113 | }) 114 | 115 | require.True(t, fileExists(t, testDir)) 116 | } 117 | -------------------------------------------------------------------------------- /iterator.go: -------------------------------------------------------------------------------- 1 | package cloudstorage 2 | 3 | import ( 4 | "math" 5 | "math/rand" 6 | "time" 7 | 8 | "golang.org/x/net/context" 9 | "google.golang.org/api/iterator" 10 | ) 11 | 12 | // ObjectsAll get all objects for an iterator. 13 | func ObjectsAll(iter ObjectIterator) (Objects, error) { 14 | objs := make(Objects, 0) 15 | for { 16 | o, err := iter.Next() 17 | if err == iterator.Done { 18 | break 19 | } else if err != nil { 20 | return nil, err 21 | } 22 | objs = append(objs, o) 23 | } 24 | return objs, nil 25 | } 26 | 27 | // ObjectResponseFromIter get all objects for an iterator. 28 | func ObjectResponseFromIter(iter ObjectIterator) (*ObjectsResponse, error) { 29 | objs, err := ObjectsAll(iter) 30 | if err != nil { 31 | return nil, err 32 | } 33 | return &ObjectsResponse{Objects: objs}, nil 34 | } 35 | 36 | // ObjectPageIterator iterator to facilitate easy paging through store.List() method 37 | // to read all Objects that matched query. 38 | type ObjectPageIterator struct { 39 | s Store 40 | ctx context.Context 41 | cancel context.CancelFunc 42 | q Query 43 | cursor int 44 | page Objects 45 | } 46 | 47 | // NewObjectPageIterator create an iterator that wraps the store List interface. 48 | func NewObjectPageIterator(ctx context.Context, s Store, q Query) ObjectIterator { 49 | 50 | cancelCtx, cancel := context.WithCancel(ctx) 51 | return &ObjectPageIterator{ 52 | s: s, 53 | ctx: cancelCtx, 54 | cancel: cancel, 55 | q: q, 56 | } 57 | } 58 | func (it *ObjectPageIterator) returnPageNext() (Object, error) { 59 | it.cursor++ 60 | return it.page[it.cursor-1], nil 61 | } 62 | 63 | // Close the object iterator. 64 | func (it *ObjectPageIterator) Close() { 65 | defer func() { recover() }() 66 | select { 67 | case <-it.ctx.Done(): 68 | // done 69 | default: 70 | it.cancel() 71 | } 72 | } 73 | 74 | // Next iterator to go to next object or else returns error for done. 75 | func (it *ObjectPageIterator) Next() (Object, error) { 76 | retryCt := 0 77 | 78 | select { 79 | case <-it.ctx.Done(): 80 | // If iterator has been closed 81 | return nil, it.ctx.Err() 82 | default: 83 | if it.cursor < len(it.page) { 84 | return it.returnPageNext() 85 | } else if it.cursor > 0 && it.q.Marker == "" { 86 | // no new page, lets return 87 | return nil, iterator.Done 88 | } 89 | for { 90 | resp, err := it.s.List(it.ctx, it.q) 91 | if err == nil { 92 | it.page = resp.Objects 93 | it.cursor = 0 94 | it.q.Marker = resp.NextMarker 95 | if len(it.page) == 0 { 96 | return nil, iterator.Done 97 | } 98 | return it.returnPageNext() 99 | } else if err == iterator.Done { 100 | return nil, err 101 | } else if err == context.Canceled || err == context.DeadlineExceeded { 102 | // Return to user 103 | return nil, err 104 | } 105 | if retryCt < 5 { 106 | Backoff(retryCt) 107 | } else { 108 | return nil, err 109 | } 110 | retryCt++ 111 | } 112 | } 113 | } 114 | 115 | // Backoff sleeps a random amount so we can. 116 | // retry failed requests using a randomized exponential backoff: 117 | // wait a random period between [0..1] seconds and retry; if that fails, 118 | // wait a random period between [0..2] seconds and retry; if that fails, 119 | // wait a random period between [0..4] seconds and retry, and so on, 120 | // with an upper bounds to the wait period being 16 seconds. 121 | // http://play.golang.org/p/l9aUHgiR8J 122 | func Backoff(try int) { 123 | nf := math.Pow(2, float64(try)) 124 | nf = math.Max(1, nf) 125 | nf = math.Min(nf, 16) 126 | r := rand.Int31n(int32(nf)) 127 | d := time.Duration(r) * time.Second 128 | time.Sleep(d) 129 | } 130 | -------------------------------------------------------------------------------- /google/google_test.go: -------------------------------------------------------------------------------- 1 | package google_test 2 | 3 | import ( 4 | "path/filepath" 5 | "strings" 6 | "testing" 7 | 8 | "cloud.google.com/go/storage" 9 | "github.com/lytics/cloudstorage" 10 | "github.com/lytics/cloudstorage/google" 11 | "github.com/lytics/cloudstorage/testutils" 12 | ) 13 | 14 | /* 15 | 16 | # to use Google Cloud Storage ensure you have application default authentication working 17 | 18 | gcloud auth application-default login 19 | 20 | */ 21 | 22 | func TestAll(t *testing.T) { 23 | config := &cloudstorage.Config{ 24 | Type: google.StoreType, 25 | AuthMethod: google.AuthGCEDefaultOAuthToken, 26 | Project: "lio-testing", 27 | Bucket: "liotesting-int-tests-nl", 28 | TmpDir: t.TempDir(), 29 | } 30 | 31 | store, err := cloudstorage.NewStore(config) 32 | if err != nil { 33 | if strings.Contains(err.Error(), "could not find default credentials") { 34 | t.Skip("could not find default credentials, skipping Google Storage tests") 35 | } 36 | t.Fatalf("Could not create store: config=%+v err=%v", config, err) 37 | } 38 | testutils.RunTests(t, store, config) 39 | 40 | config.EnableCompression = true 41 | store, err = cloudstorage.NewStore(config) 42 | if err != nil { 43 | if strings.Contains(err.Error(), "could not find default credentials") { 44 | t.Skip("could not find default credentials, skipping Google Storage tests") 45 | } 46 | t.Fatalf("Could not create store: config=%+v err=%v", config, err) 47 | } 48 | testutils.RunTests(t, store, config) 49 | } 50 | 51 | func TestConfigValidation(t *testing.T) { 52 | 53 | tmpDir := t.TempDir() 54 | 55 | // VALIDATE errors for AuthJWTKeySource 56 | config := &cloudstorage.Config{} 57 | _, err := cloudstorage.NewStore(config) 58 | if err == nil { 59 | t.Fatalf("expected an error for an empty config: config=%+v", config) 60 | } 61 | 62 | jc := &cloudstorage.JwtConf{} 63 | config.JwtConf = jc 64 | 65 | _, err = cloudstorage.NewStore(config) 66 | if err == nil { 67 | t.Fatalf("expected an error for an empty config: config=%+v", config) 68 | } 69 | 70 | config = &cloudstorage.Config{ 71 | Type: google.StoreType, 72 | AuthMethod: google.AuthJWTKeySource, 73 | Project: "tbd", 74 | Bucket: "liotesting-int-tests-nl", 75 | TmpDir: filepath.Join(tmpDir, "localcache", "google"), 76 | } 77 | 78 | _, err = cloudstorage.NewStore(config) 79 | if err == nil { 80 | t.Fatalf("expected an error for a config without a JwtConfig: config=%+v", config) 81 | } 82 | 83 | config.Type = "" 84 | _, err = cloudstorage.NewStore(config) 85 | if err == nil { 86 | t.Fatalf("expected an error for a config without a Type: config=%+v", config) 87 | } 88 | if !strings.Contains(err.Error(), "Type is required on Config") { 89 | t.Fatalf("expected error `Type is required on Config`: err=%v", err) 90 | } 91 | 92 | config.Type = google.StoreType 93 | config.AuthMethod = "" 94 | _, err = cloudstorage.NewStore(config) 95 | if err == nil { 96 | t.Fatalf("expected an error for a config without a AuthMethod: config=%+v", config) 97 | } 98 | if !strings.Contains(err.Error(), "bad AuthMethod") { 99 | t.Fatalf("expected error `bad AuthMethod`: err=%v", err) 100 | } 101 | 102 | // VALIDATE errors for AuthGoogleJWTKeySource (used to load a config from a JWT file) 103 | config = &cloudstorage.Config{ 104 | Type: google.StoreType, 105 | AuthMethod: google.AuthGoogleJWTKeySource, 106 | Project: "tbd", 107 | Bucket: "tbd", 108 | TmpDir: filepath.Join(tmpDir, "localcache", "google"), 109 | JwtFile: "./jwt.json", 110 | } 111 | _, err = cloudstorage.NewStore(config) 112 | if err == nil { 113 | t.Fatalf("expected an error for a config without a scopes: config=%+v", config) 114 | } 115 | 116 | config.Scope = storage.ScopeReadWrite 117 | _, err = cloudstorage.NewStore(config) 118 | if err == nil { 119 | t.Fatalf("expected an error for a config that points to a non-existent file: config=%+v", config) 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/lytics/cloudstorage 2 | 3 | go 1.24.1 4 | 5 | require ( 6 | cloud.google.com/go/storage v1.55.0 7 | github.com/Azure/azure-sdk-for-go v67.1.0+incompatible 8 | github.com/araddon/gou v0.0.0-20211019181548-e7d08105776c 9 | github.com/aws/aws-sdk-go v1.44.146 10 | github.com/pborman/uuid v1.2.1 11 | github.com/pkg/sftp v1.13.5 12 | github.com/stretchr/testify v1.10.0 13 | golang.org/x/crypto v0.38.0 14 | golang.org/x/net v0.40.0 15 | golang.org/x/oauth2 v0.30.0 16 | golang.org/x/sync v0.14.0 17 | google.golang.org/api v0.235.0 18 | ) 19 | 20 | require github.com/acomagu/bufpipe v1.0.4 21 | 22 | require ( 23 | cel.dev/expr v0.20.0 // indirect 24 | cloud.google.com/go v0.121.1 // indirect 25 | cloud.google.com/go/auth v0.16.1 // indirect 26 | cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect 27 | cloud.google.com/go/compute/metadata v0.7.0 // indirect 28 | cloud.google.com/go/iam v1.5.2 // indirect 29 | cloud.google.com/go/monitoring v1.24.2 // indirect 30 | github.com/Azure/go-autorest v14.2.0+incompatible // indirect 31 | github.com/Azure/go-autorest/autorest v0.11.28 // indirect 32 | github.com/Azure/go-autorest/autorest/adal v0.9.18 // indirect 33 | github.com/Azure/go-autorest/autorest/date v0.3.0 // indirect 34 | github.com/Azure/go-autorest/autorest/to v0.4.0 // indirect 35 | github.com/Azure/go-autorest/logger v0.2.1 // indirect 36 | github.com/Azure/go-autorest/tracing v0.6.0 // indirect 37 | github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.27.0 // indirect 38 | github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.51.0 // indirect 39 | github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.51.0 // indirect 40 | github.com/cespare/xxhash/v2 v2.3.0 // indirect 41 | github.com/cncf/xds/go v0.0.0-20250121191232-2f005788dc42 // indirect 42 | github.com/davecgh/go-spew v1.1.1 // indirect 43 | github.com/dnaeon/go-vcr v1.2.0 // indirect 44 | github.com/envoyproxy/go-control-plane/envoy v1.32.4 // indirect 45 | github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect 46 | github.com/felixge/httpsnoop v1.0.4 // indirect 47 | github.com/go-jose/go-jose/v4 v4.0.4 // indirect 48 | github.com/go-logr/logr v1.4.2 // indirect 49 | github.com/go-logr/stdr v1.2.2 // indirect 50 | github.com/gofrs/uuid v4.3.1+incompatible // indirect 51 | github.com/golang-jwt/jwt/v4 v4.2.0 // indirect 52 | github.com/google/s2a-go v0.1.9 // indirect 53 | github.com/google/uuid v1.6.0 // indirect 54 | github.com/googleapis/enterprise-certificate-proxy v0.3.6 // indirect 55 | github.com/googleapis/gax-go/v2 v2.14.2 // indirect 56 | github.com/jmespath/go-jmespath v0.4.0 // indirect 57 | github.com/kr/fs v0.1.0 // indirect 58 | github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect 59 | github.com/pmezard/go-difflib v1.0.0 // indirect 60 | github.com/spiffe/go-spiffe/v2 v2.5.0 // indirect 61 | github.com/zeebo/errs v1.4.0 // indirect 62 | go.opentelemetry.io/auto/sdk v1.1.0 // indirect 63 | go.opentelemetry.io/contrib/detectors/gcp v1.36.0 // indirect 64 | go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.60.0 // indirect 65 | go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 // indirect 66 | go.opentelemetry.io/otel v1.36.0 // indirect 67 | go.opentelemetry.io/otel/metric v1.36.0 // indirect 68 | go.opentelemetry.io/otel/sdk v1.36.0 // indirect 69 | go.opentelemetry.io/otel/sdk/metric v1.36.0 // indirect 70 | go.opentelemetry.io/otel/trace v1.36.0 // indirect 71 | golang.org/x/sys v0.33.0 // indirect 72 | golang.org/x/text v0.25.0 // indirect 73 | golang.org/x/time v0.11.0 // indirect 74 | google.golang.org/genproto v0.0.0-20250505200425-f936aa4a68b2 // indirect 75 | google.golang.org/genproto/googleapis/api v0.0.0-20250512202823-5a2f75b736a9 // indirect 76 | google.golang.org/genproto/googleapis/rpc v0.0.0-20250512202823-5a2f75b736a9 // indirect 77 | google.golang.org/grpc v1.72.1 // indirect 78 | google.golang.org/protobuf v1.36.6 // indirect 79 | gopkg.in/yaml.v3 v3.0.1 // indirect 80 | ) 81 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | Cloudstorage is an library for working with Cloud Storage (Google, AWS, Azure) and SFTP, Local Files. 3 | It provides a unified api for local files, sftp and Cloud files that aids testing and operating on multiple cloud storage. 4 | 5 | [![GoDoc](https://godoc.org/github.com/lytics/cloudstorage?status.svg)](http://godoc.org/github.com/lytics/cloudstorage) 6 | [![Go ReportCard](https://goreportcard.com/badge/lytics/cloudstorage)](https://goreportcard.com/report/lytics/cloudstorage) 7 | 8 | **Features** 9 | * Provide single unified api for multiple cloud (google, azure, aws) & local files. 10 | * Cloud Upload/Download is unified in api so you don't have to download file to local, work with it, then upload. 11 | * Buffer/Cache files from cloud local so speed of usage is very high. 12 | 13 | 14 | ### Similar/Related works 15 | * https://github.com/google/go-cloud Came out after this, similar allowing blob storage through unified API. Also does other things besides blob. 16 | * https://github.com/graymeta/stow similar to this pkg, library for interacting with cloud services. Less of the buffer/local cache. Different clouds. 17 | * https://github.com/ncw/rclone great cli sync tool, many connections (30+), well tested. Designed as cli tool, config is less suited for use as library. 18 | * https://gocloud.dev/howto/blob/ 19 | * https://github.com/Xuanwo/storage 20 | * https://github.com/spf13/afero 21 | 22 | 23 | # Example usage: 24 | Note: For these examples all errors are ignored, using the `_` for them. 25 | 26 | ##### Creating a Store object: 27 | ```go 28 | // This is an example of a local storage object: 29 | // See(https://github.com/lytics/cloudstorage/blob/master/google/google_test.go) for a GCS example: 30 | config := &cloudstorage.Config{ 31 | Type: localfs.StoreType, 32 | AuthMethod: localfs.AuthFileSystem, 33 | LocalFS: "/tmp/mockcloud", 34 | TmpDir: "/tmp/localcache", 35 | } 36 | store, _ := cloudstorage.NewStore(config) 37 | ``` 38 | 39 | ##### Listing Objects: 40 | 41 | See go Iterator pattern doc for api-design: 42 | https://github.com/GoogleCloudPlatform/google-cloud-go/wiki/Iterator-Guidelines 43 | ```go 44 | // From a store that has been created 45 | 46 | // Create a query 47 | q := cloudstorage.NewQuery("list-test/") 48 | // Create an Iterator 49 | iter, err := store.Objects(context.Background(), q) 50 | if err != nil { 51 | // handle 52 | } 53 | 54 | for { 55 | o, err := iter.Next() 56 | if err == iterator.Done { 57 | break 58 | } 59 | log.Println("found object ", o.Name()) 60 | } 61 | ``` 62 | 63 | ##### Writing an object : 64 | ```go 65 | obj, _ := store.NewObject("prefix/test.csv") 66 | // open for read and writing. f is a filehandle to the local filesystem. 67 | f, _ := obj.Open(cloudstorage.ReadWrite) 68 | w := bufio.NewWriter(f) 69 | _, _ := w.WriteString("Year,Make,Model\n") 70 | _, _ := w.WriteString("1997,Ford,E350\n") 71 | w.Flush() 72 | 73 | // Close sync's the local file to the remote store and removes the local tmp file. 74 | obj.Close() 75 | ``` 76 | 77 | 78 | ##### Reading an existing object: 79 | ```go 80 | // Calling Get on an existing object will return a cloudstorage object or the cloudstorage.ErrObjectNotFound error. 81 | obj2, _ := store.Get(context.Background(), "prefix/test.csv") 82 | // Note, the file is not yet open 83 | f2, _ := obj2.Open(cloudstorage.ReadOnly) 84 | bytes, _ := ioutil.ReadAll(f2) 85 | fmt.Println(string(bytes)) // should print the CSV file from the block above... 86 | ``` 87 | 88 | ##### Transferring an existing object: 89 | ```go 90 | var config = &storeutils.TransferConfig{ 91 | Type: google.StoreType, 92 | AuthMethod: google.AuthGCEDefaultOAuthToken, 93 | ProjectID: "my-project", 94 | DestBucket: "my-destination-bucket", 95 | Src: storeutils.NewGcsSource("my-source-bucket"), 96 | IncludePrefxies: []string{"these", "prefixes"}, 97 | } 98 | 99 | transferer, _ := storeutils.NewTransferer(client) 100 | resp, _ := transferer.NewTransfer(config) 101 | 102 | ``` 103 | 104 | See [testsuite.go](https://github.com/lytics/cloudstorage/blob/master/testutils/testutils.go) for more examples 105 | 106 | ## Testing 107 | 108 | Due to the way integration tests act against a cloud bucket and objects; run tests without parallelization. 109 | 110 | ``` 111 | cd $GOPATH/src/github.com/lytics/cloudstorage 112 | go test -p 1 ./... 113 | ``` 114 | 115 | -------------------------------------------------------------------------------- /google/apistore.go: -------------------------------------------------------------------------------- 1 | package google 2 | 3 | import ( 4 | "github.com/lytics/cloudstorage" 5 | "google.golang.org/api/storage/v1" 6 | ) 7 | 8 | // APIStore a google api store 9 | type APIStore struct { 10 | service *storage.Service 11 | project string 12 | } 13 | 14 | // NewAPIStore create api store. 15 | func NewAPIStore(conf *cloudstorage.Config) (*APIStore, error) { 16 | googleClient, err := NewGoogleClient(conf) 17 | if err != nil { 18 | return nil, err 19 | } 20 | service, err := storage.New(googleClient.Client()) 21 | if err != nil { 22 | return nil, err 23 | } 24 | return &APIStore{service: service, project: conf.Project}, nil 25 | } 26 | 27 | // BucketExists checks for the bucket name 28 | func (c *APIStore) BucketExists(name string) bool { 29 | b, err := c.service.Buckets.Get(name).Do() 30 | if err != nil { 31 | return false 32 | } 33 | 34 | return b.Id != "" 35 | } 36 | 37 | // CreateBucket creates a new bucket in GCS 38 | func (c *APIStore) CreateBucket(name string) error { 39 | return c.CreateBucketWithLocation(name, "") 40 | } 41 | 42 | // CreateBucketWithLocation creates a new bucket in GCS with the specified location 43 | func (c *APIStore) CreateBucketWithLocation(name, location string) error { 44 | bucket := &storage.Bucket{Name: name, Location: location} 45 | _, err := c.service.Buckets.Insert(c.project, bucket).Do() 46 | return err 47 | } 48 | 49 | // AddOwner adds entity as a owner of the object 50 | func (c *APIStore) AddOwner(bucket, object, entity string) error { 51 | ac := &storage.ObjectAccessControl{Entity: entity, Role: "OWNER"} 52 | _, err := c.service.ObjectAccessControls.Insert(bucket, object, ac).Do() 53 | return err 54 | } 55 | 56 | // AddReader adds enitty as a reader of the object 57 | func (c *APIStore) AddReader(bucket, object, entity string) error { 58 | ac := &storage.ObjectAccessControl{Entity: entity, Role: "READER"} 59 | _, err := c.service.ObjectAccessControls.Insert(bucket, object, ac).Do() 60 | return err 61 | } 62 | 63 | // AddBucketReader updates the bucket ACL to add entity as a reader on the bucket 64 | // The bucket must be in fine-grained access control mode, or this will produce an error 65 | func (c *APIStore) AddBucketReader(bucket, entity string) error { 66 | ac := &storage.BucketAccessControl{Entity: entity, Role: "READER"} 67 | _, err := c.service.BucketAccessControls.Insert(bucket, ac).Do() 68 | return err 69 | } 70 | 71 | // AddBucketWriter updates the bucket ACL to add entity as a writer on the bucket 72 | // The bucket must be in fine-grained access control mode, or this will produce an error 73 | func (c *APIStore) AddBucketWriter(bucket, entity string) error { 74 | ac := &storage.BucketAccessControl{Entity: entity, Role: "WRITER"} 75 | _, err := c.service.BucketAccessControls.Insert(bucket, ac).Do() 76 | return err 77 | } 78 | 79 | // SetBucketAgeLifecycle updates a bucket-level lifecycle policy for object age in days 80 | func (c *APIStore) SetBucketAgeLifecycle(name string, days int64) error { 81 | bucket := &storage.Bucket{Name: name} 82 | bucket.Lifecycle = new(storage.BucketLifecycle) 83 | action := &storage.BucketLifecycleRuleAction{Type: "Delete"} 84 | condition := &storage.BucketLifecycleRuleCondition{Age: &days} 85 | bucket.Lifecycle.Rule = make([]*storage.BucketLifecycleRule, 1) 86 | bucket.Lifecycle.Rule[0] = &storage.BucketLifecycleRule{Action: action, Condition: condition} 87 | _, err := c.service.Buckets.Patch(name, bucket).Do() 88 | return err 89 | } 90 | 91 | // GrantObjectViewer updates the IAM policy on the bucket to grant member the roles/storage.objectViewer role 92 | // The existing policy attributes on the bucket are preserved 93 | func (c *APIStore) GrantObjectViewer(bucket, member string) error { 94 | return c.grantRole(bucket, member, "roles/storage.objectViewer") 95 | } 96 | 97 | // GrantObjectCreator updates the IAM policy on the bucket to grant member the roles/storage.objectCreator role 98 | // The existing policy attributes on the bucket are preserved 99 | func (c *APIStore) GrantObjectCreator(bucket, member string) error { 100 | return c.grantRole(bucket, member, "roles/storage.objectCreator") 101 | } 102 | 103 | // GrantObjectAdmin updates the IAM policy on the bucket to grant member the roles/storage.objectAdmin role 104 | // The existing policy attributes on the bucket are preserved 105 | func (c *APIStore) GrantObjectAdmin(bucket, member string) error { 106 | return c.grantRole(bucket, member, "roles/storage.objectAdmin") 107 | } 108 | 109 | // grantRole updates the IAM policy for @bucket in order to rant @role to @member 110 | // we have to retrieve the existing policy in order to modify it, per https://cloud.google.com/storage/docs/json_api/v1/buckets/setIamPolicy 111 | func (c *APIStore) grantRole(bucket, member, role string) error { 112 | existingPolicy, err := c.service.Buckets.GetIamPolicy(bucket).Do() 113 | if err != nil { 114 | return err 115 | } 116 | 117 | var added bool 118 | for _, b := range existingPolicy.Bindings { 119 | if b.Role == role { 120 | for _, m := range b.Members { 121 | if m == member { 122 | // already granted 123 | return nil 124 | } 125 | } 126 | b.Members = append(b.Members, member) 127 | added = true 128 | break 129 | } 130 | } 131 | 132 | if !added { 133 | b := new(storage.PolicyBindings) 134 | b.Role = role 135 | b.Members = []string{member} 136 | existingPolicy.Bindings = append(existingPolicy.Bindings, b) 137 | } 138 | _, err = c.service.Buckets.SetIamPolicy(bucket, existingPolicy).Do() 139 | return err 140 | } 141 | -------------------------------------------------------------------------------- /localfs/store_test.go: -------------------------------------------------------------------------------- 1 | package localfs_test 2 | 3 | import ( 4 | "context" 5 | "path/filepath" 6 | "testing" 7 | 8 | "github.com/lytics/cloudstorage" 9 | "github.com/lytics/cloudstorage/localfs" 10 | "github.com/lytics/cloudstorage/testutils" 11 | "github.com/stretchr/testify/require" 12 | ) 13 | 14 | func TestAll(t *testing.T) { 15 | t.Parallel() 16 | tmpDir := t.TempDir() 17 | 18 | localFsConf := &cloudstorage.Config{ 19 | Type: localfs.StoreType, 20 | AuthMethod: localfs.AuthFileSystem, 21 | LocalFS: filepath.Join(tmpDir, "mockcloud"), 22 | TmpDir: filepath.Join(tmpDir, "localcache"), 23 | Bucket: "all", 24 | } 25 | 26 | store, err := cloudstorage.NewStore(localFsConf) 27 | if err != nil { 28 | t.Fatalf("Could not create store: config=%+v err=%v", localFsConf, err) 29 | return 30 | } 31 | testutils.RunTests(t, store, localFsConf) 32 | 33 | localFsConf.EnableCompression = true 34 | store, err = cloudstorage.NewStore(localFsConf) 35 | if err != nil { 36 | t.Fatalf("Could not create store: config=%+v err=%v", localFsConf, err) 37 | return 38 | } 39 | testutils.RunTests(t, store, localFsConf) 40 | } 41 | 42 | func TestBusted(t *testing.T) { 43 | t.Parallel() 44 | tmpDir := t.TempDir() 45 | 46 | // invalid config: empty/missing LocalFS 47 | localFsConf := &cloudstorage.Config{ 48 | Type: localfs.StoreType, 49 | AuthMethod: localfs.AuthFileSystem, 50 | LocalFS: "", 51 | } 52 | store, err := cloudstorage.NewStore(localFsConf) 53 | require.Error(t, err) 54 | require.Nil(t, store) 55 | 56 | // invalid config: LocalFS = TempDir 57 | localFsConf = &cloudstorage.Config{ 58 | Type: localfs.StoreType, 59 | AuthMethod: localfs.AuthFileSystem, 60 | LocalFS: filepath.Join(tmpDir, "invalid"), 61 | TmpDir: filepath.Join(tmpDir, "invalid"), 62 | } 63 | store, err = cloudstorage.NewStore(localFsConf) 64 | require.Error(t, err) 65 | require.Nil(t, store) 66 | } 67 | 68 | func TestNewReaderDir(t *testing.T) { 69 | t.Parallel() 70 | tmpDir := t.TempDir() 71 | 72 | // When a dir is requested, serve the index.html file instead 73 | localFsConf := &cloudstorage.Config{ 74 | Type: localfs.StoreType, 75 | AuthMethod: localfs.AuthFileSystem, 76 | LocalFS: filepath.Join(tmpDir, "mockcloud"), 77 | TmpDir: filepath.Join(tmpDir, "localcache"), 78 | Bucket: "newreaderdir", 79 | } 80 | store, err := cloudstorage.NewStore(localFsConf) 81 | testutils.MockFile(store, "test/index.html", "test") 82 | require.NoError(t, err) 83 | require.NoError(t, err) 84 | _, err = store.NewReader("test") 85 | require.Equal(t, err, cloudstorage.ErrObjectNotFound) 86 | err = store.Delete(context.Background(), "test/index.html") 87 | require.NoError(t, err) 88 | } 89 | 90 | func TestGetDir(t *testing.T) { 91 | t.Parallel() 92 | tmpDir := t.TempDir() 93 | 94 | // When a dir is requested, serve the index.html file instead 95 | localFsConf := &cloudstorage.Config{ 96 | Type: localfs.StoreType, 97 | AuthMethod: localfs.AuthFileSystem, 98 | LocalFS: filepath.Join(tmpDir, "mockcloud"), 99 | TmpDir: filepath.Join(tmpDir, "localcache"), 100 | Bucket: "getdir", 101 | } 102 | store, err := cloudstorage.NewStore(localFsConf) 103 | require.NoError(t, err) 104 | err = testutils.MockFile(store, "test/index.html", "test") 105 | require.NoError(t, err) 106 | _, err = store.Get(context.Background(), "test") 107 | require.Equal(t, err, cloudstorage.ErrObjectNotFound) 108 | err = store.Delete(context.Background(), "test/index.html") 109 | require.NoError(t, err) 110 | } 111 | 112 | func TestList(t *testing.T) { 113 | t.Parallel() 114 | 115 | for name, tt := range map[string]struct { 116 | objs map[string]string 117 | q cloudstorage.Query 118 | startOffset string 119 | want []string 120 | }{ 121 | "empty": { 122 | objs: nil, 123 | want: nil, 124 | }, 125 | "one": { 126 | objs: map[string]string{ 127 | "nimi": "ijo", 128 | }, 129 | want: []string{"nimi"}, 130 | }, 131 | "many": { 132 | objs: map[string]string{ 133 | "wan": "loje", 134 | "tu": "jelo", 135 | "tu wan": "laso", 136 | }, 137 | want: []string{"wan", "tu", "tu wan"}, 138 | }, 139 | "start-offset-inclusive": { 140 | objs: map[string]string{ 141 | "a": "ijo", 142 | "b": "ijo", 143 | "c": "ijo", 144 | }, 145 | q: cloudstorage.Query{ 146 | StartOffset: "b", 147 | }, 148 | want: []string{"b", "c"}, 149 | }, 150 | "end-offset-exclusive": { 151 | objs: map[string]string{ 152 | "a": "ijo", 153 | "b": "ijo", 154 | "c": "ijo", 155 | }, 156 | q: cloudstorage.Query{ 157 | EndOffset: "b", 158 | }, 159 | want: []string{"a"}, 160 | }, 161 | "start-and-end-offsets-together": { 162 | objs: map[string]string{ 163 | "a": "ijo", 164 | "b": "ijo", 165 | "c": "ijo", 166 | }, 167 | q: cloudstorage.Query{ 168 | StartOffset: "b", 169 | EndOffset: "c", 170 | }, 171 | want: []string{"b"}, 172 | }, 173 | } { 174 | t.Run(name, func(t *testing.T) { 175 | ctx := context.Background() 176 | tmpDir := t.TempDir() 177 | 178 | store, err := localfs.NewLocalStore( 179 | "list", 180 | filepath.Join(tmpDir, "mockcloud"), 181 | filepath.Join(tmpDir, "localcache"), 182 | ) 183 | require.NoError(t, err) 184 | 185 | for k, v := range tt.objs { 186 | w, err := store.NewWriterWithContext(ctx, k, nil) 187 | require.NoError(t, err) 188 | _, err = w.Write([]byte(v)) 189 | require.NoError(t, err) 190 | err = w.Close() 191 | require.NoError(t, err) 192 | } 193 | 194 | got, err := store.List(ctx, tt.q) 195 | require.NoError(t, err) 196 | var names []string 197 | for _, o := range got.Objects { 198 | names = append(names, o.Name()) 199 | } 200 | require.ElementsMatch(t, tt.want, names) 201 | }) 202 | } 203 | } 204 | -------------------------------------------------------------------------------- /google/client.go: -------------------------------------------------------------------------------- 1 | package google 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | "os" 7 | 8 | "cloud.google.com/go/storage" 9 | "golang.org/x/net/context" 10 | "golang.org/x/oauth2" 11 | googleOauth2 "golang.org/x/oauth2/google" 12 | "golang.org/x/oauth2/jwt" 13 | "google.golang.org/api/option" 14 | 15 | "github.com/lytics/cloudstorage" 16 | ) 17 | 18 | const ( 19 | // Authentication Source's 20 | 21 | // AuthJWTKeySource is for a complete string representing json of JWT 22 | AuthJWTKeySource cloudstorage.AuthMethod = "LyticsJWTkey" 23 | // AuthGoogleJWTKeySource is a string representing path to a file of JWT 24 | AuthGoogleJWTKeySource cloudstorage.AuthMethod = "GoogleJWTFile" 25 | // AuthGCEMetaKeySource is flag saying to use gcemetadata 26 | AuthGCEMetaKeySource cloudstorage.AuthMethod = "gcemetadata" 27 | // AuthGCEDefaultOAuthToken means use local auth where it (google client) 28 | // checks variety of locations for local auth tokens. 29 | AuthGCEDefaultOAuthToken cloudstorage.AuthMethod = "gcedefaulttoken" 30 | ) 31 | 32 | // GoogleOAuthClient An interface so we can return any of the 33 | // 3 Google transporter wrapper as a single interface. 34 | type GoogleOAuthClient interface { 35 | Client() *http.Client 36 | } 37 | type gOAuthClient struct { 38 | httpclient *http.Client 39 | } 40 | 41 | func (g *gOAuthClient) Client() *http.Client { 42 | return g.httpclient 43 | } 44 | 45 | func gcsCommonClient(client *http.Client, conf *cloudstorage.Config) (cloudstorage.Store, error) { 46 | gcs, err := storage.NewClient(context.Background(), option.WithHTTPClient(client)) 47 | if err != nil { 48 | return nil, err 49 | } 50 | store, err := NewGCSStore(gcs, conf.Bucket, conf.TmpDir, conf.EnableCompression, cloudstorage.MaxResults) 51 | if err != nil { 52 | return nil, err 53 | } 54 | return store, nil 55 | } 56 | 57 | // BuildGoogleJWTTransporter create a GoogleOAuthClient from jwt config. 58 | func BuildGoogleJWTTransporter(jwtConf *cloudstorage.JwtConf) (GoogleOAuthClient, error) { 59 | key, err := jwtConf.KeyBytes() 60 | if err != nil { 61 | return nil, err 62 | } 63 | 64 | conf := &jwt.Config{ 65 | Email: jwtConf.ClientEmail, 66 | PrivateKey: key, 67 | Scopes: jwtConf.Scopes, 68 | TokenURL: googleOauth2.JWTTokenURL, 69 | } 70 | 71 | client := conf.Client(oauth2.NoContext) 72 | 73 | return &gOAuthClient{ 74 | httpclient: client, 75 | }, nil 76 | } 77 | 78 | // BuildGoogleFileJWTTransporter creates a Google Storage Client using a JWT file for the jwt config. 79 | func BuildGoogleFileJWTTransporter(keyPath string, scope string) (GoogleOAuthClient, error) { 80 | jsonKey, err := os.ReadFile(os.ExpandEnv(keyPath)) 81 | if err != nil { 82 | return nil, err 83 | } 84 | 85 | conf, err := googleOauth2.JWTConfigFromJSON(jsonKey, scope) 86 | if err != nil { 87 | return nil, err 88 | } 89 | 90 | client := conf.Client(oauth2.NoContext) 91 | 92 | return &gOAuthClient{ 93 | httpclient: client, 94 | }, nil 95 | } 96 | 97 | /* 98 | The account may be empty or the string "default" to use the instance's main account. 99 | */ 100 | func BuildGCEMetadatTransporter(serviceAccount string) (GoogleOAuthClient, error) { 101 | client := &http.Client{ 102 | Transport: &oauth2.Transport{ 103 | 104 | Source: googleOauth2.ComputeTokenSource(""), 105 | }, 106 | } 107 | 108 | return &gOAuthClient{ 109 | httpclient: client, 110 | }, nil 111 | } 112 | 113 | // BuildDefaultGoogleTransporter builds a transpoter that wraps the google DefaultClient: 114 | // 115 | // Ref https://github.com/golang/oauth2/blob/master/google/default.go#L33 116 | // 117 | // DefaultClient returns an HTTP Client that uses the 118 | // DefaultTokenSource to obtain authentication credentials 119 | // 120 | // Ref : https://github.com/golang/oauth2/blob/master/google/default.go#L41 121 | // 122 | // DefaultTokenSource is a token source that uses 123 | // "Application Default Credentials". 124 | // 125 | // It looks for credentials in the following places, 126 | // preferring the first location found: 127 | // 128 | // 1. A JSON file whose path is specified by the 129 | // GOOGLE_APPLICATION_CREDENTIALS environment variable. 130 | // 2. A JSON file in a location known to the gcloud command-line tool. 131 | // On other systems, $HOME/.config/gcloud/credentials. 132 | // 3. On Google App Engine it uses the appengine.AccessToken function. 133 | // 4. On Google Compute Engine, it fetches credentials from the metadata server. 134 | // (In this final case any provided scopes are ignored.) 135 | // 136 | // For more details, see: 137 | // https://developers.google.com/accounts/docs/application-default-credentials 138 | // 139 | // Samples of possible scopes: 140 | // Google Cloud Storage : https://github.com/GoogleCloudPlatform/gcloud-golang/blob/69098363d921fa3cf80f930468a41a33edd9ccb9/storage/storage.go#L51 141 | // BigQuery : https://github.com/GoogleCloudPlatform/gcloud-golang/blob/522a8ceb4bb83c2def27baccf31d646bce11a4b2/bigquery/bigquery.go#L52 142 | func BuildDefaultGoogleTransporter(scope ...string) (GoogleOAuthClient, error) { 143 | 144 | client, err := googleOauth2.DefaultClient(context.Background(), scope...) 145 | if err != nil { 146 | return nil, err 147 | } 148 | 149 | return &gOAuthClient{ 150 | httpclient: client, 151 | }, nil 152 | } 153 | 154 | // NewGoogleClient create new Google Storage Client. 155 | func NewGoogleClient(conf *cloudstorage.Config) (client GoogleOAuthClient, err error) { 156 | 157 | switch conf.AuthMethod { 158 | case AuthGCEDefaultOAuthToken: 159 | // This token method uses the default OAuth token with GCS created by tools like gsutils, gcloud, etc... 160 | // See github.com/lytics/lio/src/ext_svcs/google/google_transporter.go : BuildDefaultGoogleTransporter 161 | client, err = BuildDefaultGoogleTransporter("") 162 | if err != nil { 163 | return nil, err 164 | } 165 | case AuthGCEMetaKeySource: 166 | client, err = BuildGCEMetadatTransporter("") 167 | if err != nil { 168 | return nil, err 169 | } 170 | case AuthJWTKeySource: 171 | if conf.JwtConf == nil { 172 | return nil, fmt.Errorf("invalid config: missing jwt config struct") 173 | } 174 | // used if you are providing string of json 175 | client, err = BuildGoogleJWTTransporter(conf.JwtConf) 176 | if err != nil { 177 | return nil, err 178 | } 179 | case AuthGoogleJWTKeySource: 180 | switch conf.Scope { 181 | case "": 182 | // See the list here: https://github.com/GoogleCloudPlatform/google-cloud-go/blob/master/storage/storage.go#L58-L68 183 | return nil, fmt.Errorf("invalid config: missing devstorage scope") 184 | } 185 | client, err = BuildGoogleFileJWTTransporter(conf.JwtFile, conf.Scope) 186 | if err != nil { 187 | return nil, err 188 | } 189 | default: 190 | return nil, fmt.Errorf("bad AuthMethod: %v", conf.AuthMethod) 191 | } 192 | 193 | return client, err 194 | } 195 | -------------------------------------------------------------------------------- /google/storeutils/transfer.go: -------------------------------------------------------------------------------- 1 | package storeutils 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "fmt" 7 | "net/http" 8 | "time" 9 | 10 | "google.golang.org/api/storagetransfer/v1" 11 | ) 12 | 13 | type Status string 14 | 15 | const ( 16 | Enabled Status = "ENABLED" 17 | Disabled Status = "DISABLED" 18 | Unspecified Status = "STATUS_UNSPECIFIED" 19 | Deleted Status = "DELETED" 20 | ) 21 | 22 | var ( 23 | // MaxPrefix is the maximum number of prefix filters allowed when transferring files in GCS buckets 24 | MaxPrefix = 20 25 | 26 | ErrBadFilter = errors.New("too many inclusion/exclusion prefixes") 27 | ErrBadConfig = errors.New("transferconfig not valid") 28 | ) 29 | 30 | // Transferer manages the transfer of data sources to GCS 31 | type Transferer struct { 32 | svc *storagetransfer.TransferJobsService 33 | } 34 | 35 | // NewTransferClient creates a new Transferer using an authed http client 36 | func NewTransferClient(client *http.Client) (*Transferer, error) { 37 | st, err := storagetransfer.New(client) 38 | if err != nil { 39 | return nil, err 40 | } 41 | 42 | return &Transferer{storagetransfer.NewTransferJobsService(st)}, nil 43 | } 44 | 45 | // List returns all of the transferJobs under a specific project. If the variadic argument "statuses" 46 | // is provided, only jobs with the listed statuses are returned 47 | func (t *Transferer) List(project string, statuses ...Status) ([]*storagetransfer.TransferJob, error) { 48 | var jobs []*storagetransfer.TransferJob 49 | var token string 50 | 51 | body, err := json.Marshal(struct { 52 | ProjectID string `json:"project_id"` 53 | JobStatuses []Status `json:"job_statuses,omitempty"` 54 | }{ 55 | ProjectID: project, 56 | JobStatuses: statuses, 57 | }) 58 | 59 | if err != nil { 60 | return nil, err 61 | } 62 | 63 | for { 64 | call := t.svc.List(string(body)) 65 | if token != "" { 66 | call = call.PageToken(token) 67 | } 68 | 69 | resp, err := call.Do() 70 | if err != nil { 71 | return nil, err 72 | } 73 | 74 | for _, job := range resp.TransferJobs { 75 | jobs = append(jobs, job) 76 | } 77 | 78 | token = resp.NextPageToken 79 | if token == "" { 80 | break 81 | } 82 | } 83 | return jobs, nil 84 | } 85 | 86 | // GetJob returns the transferJob with the specified project and job ID 87 | func (t *Transferer) GetJob(project, job string) (*storagetransfer.TransferJob, error) { 88 | resp, err := t.svc.Get(job, project).Do() 89 | if err != nil { 90 | return nil, err 91 | } 92 | return resp, nil 93 | } 94 | 95 | // NewTransfer creates a new transferJob with the specified project, destination GCS bucket and source. 96 | // The include/exclude arguments define the file prefixes in the source bucket to include/exclude 97 | func (t *Transferer) NewTransfer(conf *TransferConfig) (*storagetransfer.TransferJob, error) { 98 | job, err := conf.Job() 99 | if err != nil { 100 | return nil, err 101 | } 102 | 103 | return t.svc.Create(job).Do() 104 | } 105 | 106 | func newTransferJob(project, description string, spec *storagetransfer.TransferSpec, sched *storagetransfer.Schedule) *storagetransfer.TransferJob { 107 | return &storagetransfer.TransferJob{ 108 | ProjectId: project, 109 | Status: string(Enabled), 110 | TransferSpec: spec, 111 | Schedule: sched, 112 | Description: description, 113 | } 114 | } 115 | 116 | // oneTimeJobSchedule returns a storagetransfer job schedule that will only be executed one 117 | func oneTimeJobSchedule(ts time.Time) *storagetransfer.Schedule { 118 | date := toDate(ts) 119 | return &storagetransfer.Schedule{ 120 | ScheduleEndDate: date, 121 | ScheduleStartDate: date, 122 | } 123 | } 124 | 125 | // toDate converts a time into a storagetransfer friendly Date 126 | func toDate(ts time.Time) *storagetransfer.Date { 127 | return &storagetransfer.Date{ 128 | Day: int64(ts.Day()), 129 | Month: int64(ts.Month()), 130 | Year: int64(ts.Year()), 131 | } 132 | } 133 | 134 | // Source defines the data source when transferring data to a GCS bucket. While the sink is restricted to a GCS bucket 135 | // the source can either be another GCS bucket, and AWS S3 source, or a HTTP source 136 | // Each source produces a storagetransfer TransferSpec 137 | type Source interface { 138 | TransferSpec(destBucket string) *storagetransfer.TransferSpec 139 | String() string 140 | } 141 | 142 | // GcsSource is a Source defined by a Gcs bucket 143 | type GcsSource struct { 144 | source string 145 | } 146 | 147 | func NewGcsSource(bucket string) Source { 148 | return &GcsSource{bucket} 149 | } 150 | 151 | func (g *GcsSource) TransferSpec(bucket string) *storagetransfer.TransferSpec { 152 | ts := newTransferSpec(bucket) 153 | ts.GcsDataSource = &storagetransfer.GcsData{BucketName: g.source} 154 | return ts 155 | } 156 | 157 | func (g *GcsSource) String() string { 158 | return g.source 159 | } 160 | 161 | // HttpSource is a Source defined by a HTTP URL data source 162 | type HttpSource struct { 163 | url string 164 | } 165 | 166 | func NewHttpSource(url string) Source { 167 | return &HttpSource{url} 168 | } 169 | 170 | func (h *HttpSource) TransferSpec(bucket string) *storagetransfer.TransferSpec { 171 | ts := newTransferSpec(bucket) 172 | ts.HttpDataSource = &storagetransfer.HttpData{ListUrl: h.url} 173 | return ts 174 | } 175 | 176 | func (h *HttpSource) String() string { 177 | return h.url 178 | } 179 | 180 | // AwsSource is an AWS S3 data source 181 | type AwsSource struct { 182 | bucket string 183 | accessKeyId string 184 | secretAccessKey string 185 | } 186 | 187 | func NewAwsSource(bucket, accesskey, secret string) Source { 188 | return &AwsSource{bucket, accesskey, secret} 189 | } 190 | 191 | func (a *AwsSource) TransferSpec(bucket string) *storagetransfer.TransferSpec { 192 | ts := newTransferSpec(bucket) 193 | ts.AwsS3DataSource = &storagetransfer.AwsS3Data{ 194 | AwsAccessKey: &storagetransfer.AwsAccessKey{ 195 | AccessKeyId: a.accessKeyId, 196 | SecretAccessKey: a.secretAccessKey, 197 | }, 198 | BucketName: a.bucket, 199 | } 200 | return ts 201 | } 202 | 203 | func (a *AwsSource) String() string { 204 | return a.bucket 205 | } 206 | 207 | func newTransferSpec(sink string) *storagetransfer.TransferSpec { 208 | return &storagetransfer.TransferSpec{ 209 | GcsDataSink: &storagetransfer.GcsData{BucketName: sink}, 210 | } 211 | } 212 | 213 | // TransferConfig wraps all of the relevant variables for transfer jobs 214 | // into a unified struct 215 | type TransferConfig struct { 216 | ProjectID string // projectID of destination bucket 217 | DestBucket string 218 | Src Source 219 | IncludePrefixes []string 220 | ExcludePrefixes []string 221 | Schedule *storagetransfer.Schedule 222 | } 223 | 224 | // Job instantiates a Transfer job from the TransferConfig struct 225 | func (t *TransferConfig) Job() (*storagetransfer.TransferJob, error) { 226 | if t.DestBucket == "" || t.Src == nil { 227 | return nil, ErrBadConfig 228 | } 229 | 230 | // Google returns an error if more than 20 inclusionary/exclusionary fields are included 231 | if len(t.IncludePrefixes) > MaxPrefix || len(t.ExcludePrefixes) > MaxPrefix { 232 | return nil, ErrBadFilter 233 | } 234 | 235 | spec := t.Src.TransferSpec(t.DestBucket) 236 | 237 | // Set the file-filters if the conditions are met 238 | if len(t.IncludePrefixes) > 0 || len(t.ExcludePrefixes) > 0 { 239 | spec.ObjectConditions = &storagetransfer.ObjectConditions{ 240 | ExcludePrefixes: t.ExcludePrefixes, 241 | IncludePrefixes: t.IncludePrefixes, 242 | } 243 | } 244 | 245 | // if a schedule is not provided, create a 1-time transfer schedule 246 | var schedule *storagetransfer.Schedule 247 | if t.Schedule == nil { 248 | schedule = oneTimeJobSchedule(time.Now()) 249 | } 250 | 251 | description := fmt.Sprintf("%s_%s_transfer", t.DestBucket, t.Src) 252 | return newTransferJob(t.ProjectID, description, spec, schedule), nil 253 | } 254 | -------------------------------------------------------------------------------- /store.go: -------------------------------------------------------------------------------- 1 | package cloudstorage 2 | 3 | import ( 4 | "encoding/base64" 5 | "fmt" 6 | "io" 7 | "os" 8 | "strings" 9 | "time" 10 | 11 | "github.com/araddon/gou" 12 | "golang.org/x/net/context" 13 | ) 14 | 15 | const ( 16 | // StoreCacheFileExt = ".cache" 17 | StoreCacheFileExt = ".cache" 18 | // ContentTypeKey 19 | ContentTypeKey = "content_type" 20 | // MaxResults default number of objects to retrieve during a list-objects request, 21 | // if more objects exist, then they will need to be paged 22 | MaxResults = 3000 23 | ) 24 | 25 | // AccessLevel is the level of permissions on files 26 | type AccessLevel int 27 | 28 | const ( 29 | // ReadOnly File Permissions Levels 30 | ReadOnly AccessLevel = 0 31 | ReadWrite AccessLevel = 1 32 | ) 33 | 34 | var ( 35 | // ErrObjectNotFound Error of not finding a file(object) 36 | ErrObjectNotFound = fmt.Errorf("object not found") 37 | // ErrObjectExists error trying to create an already existing file. 38 | ErrObjectExists = fmt.Errorf("object already exists in backing store (use store.Get)") 39 | // ErrNotImplemented this feature is not implemented for this store 40 | ErrNotImplemented = fmt.Errorf("Not implemented") 41 | ) 42 | 43 | type ( 44 | Opts struct { 45 | IfNotExists bool 46 | DisableCompression bool 47 | } 48 | 49 | // StoreReader interface to define the Storage Interface abstracting 50 | // the GCS, S3, LocalFile, etc interfaces 51 | StoreReader interface { 52 | // Type is he Store Type [google, s3, azure, localfs, etc] 53 | Type() string 54 | // Client gets access to the underlying native Client for Google, S3, etc 55 | Client() interface{} 56 | // Get returns an object (file) from the cloud store. The object 57 | // isn't opened already, see Object.Open() 58 | // ObjectNotFound will be returned if the object is not found. 59 | Get(ctx context.Context, o string) (Object, error) 60 | // Objects returns an object Iterator to allow paging through object 61 | // which keeps track of page cursors. Query defines the specific set 62 | // of filters to apply to request. 63 | Objects(ctx context.Context, q Query) (ObjectIterator, error) 64 | // List file/objects filter by given query. This just wraps the object-iterator 65 | // returning full list of objects. 66 | List(ctx context.Context, q Query) (*ObjectsResponse, error) 67 | // Folders creates list of folders 68 | Folders(ctx context.Context, q Query) ([]string, error) 69 | // NewReader creates a new Reader to read the contents of the object. 70 | // ErrObjectNotFound will be returned if the object is not found. 71 | NewReader(o string) (io.ReadCloser, error) 72 | // NewReader with context (for cancelation, etc) 73 | NewReaderWithContext(ctx context.Context, o string) (io.ReadCloser, error) 74 | // String default descriptor. 75 | String() string 76 | } 77 | 78 | // StoreCopy Optional interface to fast path copy. Many of the cloud providers 79 | // don't actually copy bytes. Rather they allow a "pointer" that is a fast copy. 80 | StoreCopy interface { 81 | // Copy from object, to object 82 | Copy(ctx context.Context, src, dst Object) error 83 | } 84 | 85 | // StoreMove Optional interface to fast path move. Many of the cloud providers 86 | // don't actually copy bytes. 87 | StoreMove interface { 88 | // Move from object location, to object location. 89 | Move(ctx context.Context, src, dst Object) error 90 | } 91 | 92 | // Store interface to define the Storage Interface abstracting 93 | // the GCS, S3, LocalFile interfaces 94 | Store interface { 95 | StoreReader 96 | 97 | // NewWriter returns a io.Writer that writes to a Cloud object 98 | // associated with this backing Store object. 99 | // 100 | // A new object will be created if an object with this name already exists. 101 | // Otherwise any previous object with the same name will be replaced. 102 | // The object will not be available (and any previous object will remain) 103 | // until Close has been called 104 | NewWriter(o string, metadata map[string]string) (io.WriteCloser, error) 105 | // NewWriter but with context. 106 | NewWriterWithContext(ctx context.Context, o string, metadata map[string]string, opts ...Opts) (io.WriteCloser, error) 107 | 108 | // NewObject creates a new empty object backed by the cloud store 109 | // This new object isn't' synced/created in the backing store 110 | // until the object is Closed/Sync'ed. 111 | NewObject(o string) (Object, error) 112 | 113 | // Delete removes the object from the cloud store. 114 | Delete(ctx context.Context, o string) error 115 | } 116 | 117 | // Object is a handle to a cloud stored file/object. Calling Open will pull the remote file onto 118 | // your local filesystem for reading/writing. Calling Sync/Close will push the local copy 119 | // backup to the cloud store. 120 | Object interface { 121 | // Name of object/file. 122 | Name() string 123 | // String is default descriptor. 124 | String() string 125 | // Updated timestamp. 126 | Updated() time.Time 127 | // MetaData is map of arbitrary name/value pairs about object. 128 | MetaData() map[string]string 129 | // SetMetaData allows you to set key/value pairs. 130 | SetMetaData(meta map[string]string) 131 | // StorageSource is the type of store. 132 | StorageSource() string 133 | // Disable transparent compression on syncs and writes for object 134 | DisableCompression() 135 | // Open copies the remote file to a local cache and opens the cached version 136 | // for read/writing. Calling Close/Sync will push the copy back to the 137 | // backing store. 138 | Open(readonly AccessLevel) (*os.File, error) 139 | // Release will remove the locally cached copy of the file. You most call Close 140 | // before releasing. Release will call os.Remove(local_copy_file) so opened 141 | // filehandles need to be closed. 142 | Release() error 143 | // Implement io.ReadWriteCloser Open most be called before using these 144 | // functions. 145 | Read(p []byte) (n int, err error) 146 | Write(p []byte) (n int, err error) 147 | Sync() error 148 | Close() error 149 | // File returns the cached/local copy of the file 150 | File() *os.File 151 | // Delete removes the object from the cloud store and local cache. 152 | Delete() error 153 | } 154 | 155 | // ObjectIterator interface to page through objects 156 | // See go doc for examples https://github.com/GoogleCloudPlatform/google-cloud-go/wiki/Iterator-Guidelines 157 | ObjectIterator interface { 158 | // Next gets next object, returns google.golang.org/api/iterator iterator.Done error. 159 | Next() (Object, error) 160 | // Close this down (and or context.Close) 161 | Close() 162 | } 163 | 164 | // ObjectsResponse for paged object apis. 165 | ObjectsResponse struct { 166 | Objects Objects 167 | NextMarker string 168 | } 169 | // Objects are just a collection of Object(s). 170 | // Used as the results for store.List commands. 171 | Objects []Object 172 | 173 | // AuthMethod Is the source/location/type of auth token 174 | AuthMethod string 175 | 176 | // Config the cloud store config settings. 177 | Config struct { 178 | // Type is StoreType [gcs,localfs,s3,azure] 179 | Type string 180 | // AuthMethod the methods of authenticating store. Ie, where/how to 181 | // find auth tokens. 182 | AuthMethod AuthMethod 183 | // Cloud Bucket Project 184 | Project string 185 | // Region is the cloud region 186 | Region string 187 | // Endpoint is the api endpoint 188 | Endpoint string 189 | // Bucket is the "path" or named bucket in cloud 190 | Bucket string 191 | // the page size to use with api requests (default 1000) 192 | PageSize int 193 | // used by JWTKeySource 194 | JwtConf *JwtConf 195 | // JwtFile is the file-path to local auth-token file. 196 | JwtFile string `json:"jwtfile,omitempty"` 197 | // BaseUrl is the base-url path for customizing regions etc. IE 198 | // AWS has different url paths per region on some situations. 199 | BaseUrl string `json:"baseurl,omitempty"` 200 | // Permissions scope 201 | Scope string `json:"scope,omitempty"` 202 | // LocalFS is filesystem path to use for the local files 203 | // for Type=localfs 204 | LocalFS string `json:"localfs,omitempty"` 205 | // The filesystem path to save locally cached files as they are 206 | // being read/written from cloud and need a staging area. 207 | TmpDir string `json:"tmpdir,omitempty"` 208 | // Settings are catch-all-bag to allow per-implementation over-rides 209 | Settings gou.JsonHelper `json:"settings,omitempty"` 210 | // LogPrefix Logging Prefix/Context message 211 | LogPrefix string 212 | // EnableCompression turns on transparent compression of objects 213 | // Reading pre-existing non-compressed objects continues to work 214 | EnableCompression bool `json:"enablecompression,omitempty"` 215 | } 216 | 217 | // JwtConf For use with google/google_jwttransporter.go 218 | // Which can be used by the google go sdk's. This struct is based on the Google 219 | // Jwt files json for service accounts. 220 | JwtConf struct { 221 | // Unfortuneately we departed from the standard jwt service account field-naming 222 | // for reasons we forgot. So, during load, we convert from bad->correct format. 223 | PrivateKeyDeprecated string `json:"private_keybase64,omitempty"` 224 | KeyTypeDeprecated string `json:"keytype,omitempty"` 225 | 226 | // Jwt Service Account Fields 227 | ProjectID string `json:"project_id,omitempty"` 228 | PrivateKeyID string `json:"private_key_id,omitempty"` 229 | PrivateKey string `json:"private_key,omitempty"` 230 | ClientEmail string `json:"client_email,omitempty"` 231 | ClientID string `json:"client_id,omitempty"` 232 | Type string `json:"type,omitempty"` 233 | // Scopes is list of what scope to use when the token is created. 234 | // for example https://github.com/google/google-api-go-client/blob/0d3983fb069cb6651353fc44c5cb604e263f2a93/storage/v1/storage-gen.go#L54 235 | Scopes []string `json:"scopes,omitempty"` 236 | } 237 | ) 238 | 239 | // NewStore create new Store from Storage Config/Context. 240 | func NewStore(conf *Config) (Store, error) { 241 | 242 | if conf.Type == "" { 243 | return nil, fmt.Errorf("Type is required on Config") 244 | } 245 | registryMu.RLock() 246 | st, ok := storeProviders[conf.Type] 247 | registryMu.RUnlock() 248 | if !ok { 249 | return nil, fmt.Errorf("config.Type=%q was not found", conf.Type) 250 | } 251 | 252 | if conf.PageSize == 0 { 253 | conf.PageSize = MaxResults 254 | } 255 | 256 | if conf.TmpDir == "" { 257 | conf.TmpDir = os.TempDir() 258 | } 259 | return st(conf) 260 | } 261 | 262 | // Copy source to destination. 263 | func Copy(ctx context.Context, s Store, src, des Object) error { 264 | // for Providers that offer fast path, and use the backend copier 265 | if src.StorageSource() == des.StorageSource() { 266 | if cp, ok := s.(StoreCopy); ok { 267 | return cp.Copy(ctx, src, des) 268 | } 269 | } 270 | 271 | // Slow path, open an io.Reader from the source and copy it to an 272 | // io.Writer to the destination. This is considered a "slow path" because we 273 | // have to act as a broker to relay bytes between the two objects. Some 274 | // stores support moving data using an API call. 275 | fout, err := s.NewWriterWithContext(ctx, des.Name(), src.MetaData()) 276 | if err != nil { 277 | gou.Warnf("Move could not open destination %v", src.Name()) 278 | return err 279 | } 280 | fin, err := s.NewReaderWithContext(ctx, src.Name()) 281 | if err != nil { 282 | gou.Warnf("Move could not open source %v err=%v", src.Name(), err) 283 | return err 284 | } 285 | if _, err = io.Copy(fout, fin); err != nil { 286 | return err 287 | } 288 | if err := fin.Close(); err != nil { 289 | return err 290 | } 291 | if err := fout.Close(); err != nil { //this will flush and sync the file. 292 | return err 293 | } 294 | return nil 295 | } 296 | 297 | // Move source object to destination. 298 | func Move(ctx context.Context, s Store, src, des Object) error { 299 | // take the fast path, and use the store provided mover if available 300 | if src.StorageSource() == des.StorageSource() { 301 | if sm, ok := s.(StoreMove); ok { 302 | return sm.Move(ctx, src, des) 303 | } 304 | } 305 | 306 | if err := Copy(ctx, s, src, des); err != nil { // use Copy() to copy the files 307 | return err 308 | } 309 | 310 | if err := src.Delete(); err != nil { //delete the src, after des has been flushed/synced 311 | return err 312 | } 313 | 314 | return nil 315 | } 316 | 317 | func NewObjectsResponse() *ObjectsResponse { 318 | return &ObjectsResponse{ 319 | Objects: make(Objects, 0), 320 | } 321 | } 322 | func (o Objects) Len() int { return len(o) } 323 | func (o Objects) Less(i, j int) bool { return o[i].Name() < o[j].Name() } 324 | func (o Objects) Swap(i, j int) { o[i], o[j] = o[j], o[i] } 325 | 326 | // Validate that this is a valid jwt conf set of tokens 327 | func (j *JwtConf) Validate() error { 328 | if j.PrivateKeyDeprecated != "" { 329 | j.PrivateKey = j.PrivateKeyDeprecated 330 | j.PrivateKeyDeprecated = "" 331 | } 332 | j.fixKey() 333 | if j.KeyTypeDeprecated != "" { 334 | j.Type = j.KeyTypeDeprecated 335 | j.KeyTypeDeprecated = "" 336 | } 337 | _, err := j.KeyBytes() 338 | if err != nil { 339 | return fmt.Errorf("Invalid JwtConf.PrivateKeyBase64 (error trying to decode base64 err: %v", err) 340 | } 341 | return nil 342 | } 343 | func (j *JwtConf) fixKey() { 344 | parts := strings.Split(j.PrivateKey, "\n") 345 | if len(parts) > 1 { 346 | for _, part := range parts { 347 | if strings.HasPrefix(part, "---") { 348 | continue 349 | } 350 | j.PrivateKey = part 351 | break 352 | } 353 | } 354 | } 355 | func (j *JwtConf) KeyBytes() ([]byte, error) { 356 | if j.PrivateKey == "" { 357 | return nil, fmt.Errorf("invalid config, private key empty") 358 | } 359 | return base64.StdEncoding.DecodeString(j.PrivateKey) 360 | } 361 | -------------------------------------------------------------------------------- /localfs/store.go: -------------------------------------------------------------------------------- 1 | package localfs 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "fmt" 7 | "io" 8 | "io/fs" 9 | "os" 10 | "path" 11 | "path/filepath" 12 | "strings" 13 | "syscall" 14 | "time" 15 | 16 | "github.com/araddon/gou" 17 | "github.com/lytics/cloudstorage" 18 | "github.com/lytics/cloudstorage/csbufio" 19 | "github.com/pborman/uuid" 20 | "golang.org/x/net/context" 21 | "google.golang.org/api/iterator" 22 | ) 23 | 24 | func init() { 25 | cloudstorage.Register(StoreType, localProvider) 26 | } 27 | func localProvider(conf *cloudstorage.Config) (cloudstorage.Store, error) { 28 | store, err := NewLocalStore(conf.Bucket, conf.LocalFS, conf.TmpDir) 29 | if err != nil { 30 | return nil, err 31 | } 32 | return store, nil 33 | } 34 | 35 | var ( 36 | // Ensure Our LocalStore implement CloudStorage interfaces 37 | _ cloudstorage.StoreReader = (*LocalStore)(nil) 38 | ) 39 | 40 | const ( 41 | // AuthFileSystem Authentication Method 42 | AuthFileSystem cloudstorage.AuthMethod = "localfiles" 43 | 44 | // StoreType name of our Local Storage provider = "localfs" 45 | StoreType = "localfs" 46 | ) 47 | 48 | // LocalStore is client to local-filesystem store. 49 | type LocalStore struct { 50 | storepath string // possibly is relative ./tables 51 | cachepath string 52 | Id string 53 | } 54 | 55 | // NewLocalStore create local store from storage path on local filesystem, and cachepath. 56 | func NewLocalStore(bucket, storepath, cachepath string) (*LocalStore, error) { 57 | 58 | if storepath == "" { 59 | return nil, fmt.Errorf("storepath=%q cannot be empty", storepath) 60 | } 61 | 62 | if storepath == cachepath { 63 | return nil, fmt.Errorf("storepath=%q cannot be the same as cachepath=%q", storepath, cachepath) 64 | } 65 | 66 | storepath = filepath.Join(storepath, bucket) 67 | 68 | err := os.MkdirAll(storepath, 0775) 69 | if err != nil { 70 | return nil, fmt.Errorf("unable to create path. path=%s err=%v", storepath, err) 71 | } 72 | 73 | err = os.MkdirAll(cachepath, 0775) 74 | if err != nil { 75 | return nil, fmt.Errorf("unable to create path. path=%s err=%v", cachepath, err) 76 | } 77 | 78 | uid := uuid.NewUUID().String() 79 | uid = strings.Replace(uid, "-", "", -1) 80 | 81 | return &LocalStore{ 82 | storepath: storepath, 83 | cachepath: cachepath, 84 | Id: uid, 85 | }, nil 86 | } 87 | 88 | // Type is store type = "localfs" 89 | func (l *LocalStore) Type() string { 90 | return StoreType 91 | } 92 | func (l *LocalStore) Client() interface{} { 93 | return l 94 | } 95 | 96 | func (o *object) DisableCompression() {} 97 | 98 | // NewObject create new object of given name. 99 | func (l *LocalStore) NewObject(objectname string) (cloudstorage.Object, error) { 100 | obj, err := l.Get(context.Background(), objectname) 101 | if err != nil && err != cloudstorage.ErrObjectNotFound { 102 | return nil, err 103 | } else if obj != nil { 104 | return nil, cloudstorage.ErrObjectExists 105 | } 106 | 107 | of := path.Join(l.storepath, objectname) 108 | err = cloudstorage.EnsureDir(of) 109 | if err != nil { 110 | return nil, err 111 | } 112 | 113 | cf := cloudstorage.CachePathObj(l.cachepath, objectname, l.Id) 114 | 115 | metadata, err := readmeta(of + ".metadata") 116 | if err != nil { 117 | return nil, err 118 | } 119 | 120 | return &object{ 121 | name: objectname, 122 | storepath: of, 123 | cachepath: cf, 124 | metadata: metadata, 125 | }, nil 126 | } 127 | 128 | // List objects at Query location. 129 | func (l *LocalStore) List(ctx context.Context, query cloudstorage.Query) (*cloudstorage.ObjectsResponse, error) { 130 | resp := cloudstorage.NewObjectsResponse() 131 | objects := make(map[string]*object) 132 | metadatas := make(map[string]map[string]string) 133 | 134 | spath := l.storepath 135 | filePre := query.Prefix 136 | li := strings.LastIndex(query.Prefix, "/") 137 | if li > 0 { 138 | spath = path.Join(spath, query.Prefix[:li]) 139 | } 140 | if !cloudstorage.Exists(spath) { 141 | return resp, nil 142 | } 143 | 144 | err := filepath.Walk(spath, func(fo string, f os.FileInfo, err error) error { 145 | if err != nil { 146 | return err 147 | } 148 | 149 | obj := strings.Replace(fo, l.storepath, "", 1) 150 | 151 | if f.IsDir() { 152 | return nil 153 | } else if filepath.Ext(f.Name()) == ".metadata" { 154 | metadata, err := readmeta(f.Name()) 155 | if err != nil { 156 | return err 157 | } 158 | mdkey := strings.Replace(obj, ".metadata", "", 1) 159 | metadatas[mdkey] = metadata 160 | } else { 161 | oname := strings.TrimPrefix(obj, "/") 162 | if filePre != "" && !strings.HasPrefix(oname, filePre) { 163 | return nil 164 | } 165 | 166 | if (query.StartOffset != "" && oname < query.StartOffset) || 167 | (query.EndOffset != "" && oname >= query.EndOffset) { 168 | return nil 169 | } 170 | 171 | objects[obj] = &object{ 172 | name: oname, 173 | updated: f.ModTime(), 174 | storepath: fo, 175 | cachepath: cloudstorage.CachePathObj(l.cachepath, oname, l.Id), 176 | } 177 | } 178 | return err 179 | }) 180 | 181 | if err != nil { 182 | return nil, fmt.Errorf("localfile: error occurred listing files. searchpath=%v err=%v", spath, err) 183 | } 184 | 185 | for objname, obj := range objects { 186 | if md, ok := metadatas[objname]; ok { 187 | obj.metadata = md 188 | } 189 | resp.Objects = append(resp.Objects, obj) 190 | } 191 | 192 | resp.Objects = query.ApplyFilters(resp.Objects) 193 | 194 | return resp, nil 195 | } 196 | 197 | // Objects returns an iterator over the objects in the local folder that match the Query q. 198 | // If q is nil, no filtering is done. 199 | func (l *LocalStore) Objects(ctx context.Context, csq cloudstorage.Query) (cloudstorage.ObjectIterator, error) { 200 | resp, err := l.List(ctx, csq) 201 | if err != nil { 202 | return nil, err 203 | } 204 | return &objectIterator{objects: resp.Objects}, nil 205 | } 206 | 207 | // Folders list of folders for given path query. 208 | func (l *LocalStore) Folders(ctx context.Context, csq cloudstorage.Query) ([]string, error) { 209 | spath := path.Join(l.storepath, csq.Prefix) 210 | if !cloudstorage.Exists(spath) { 211 | return []string{}, nil 212 | } 213 | select { 214 | case <-ctx.Done(): 215 | return nil, ctx.Err() 216 | default: 217 | } 218 | 219 | folders := make([]string, 0) 220 | files, _ := os.ReadDir(spath) 221 | for _, f := range files { 222 | if f.IsDir() { 223 | folders = append(folders, fmt.Sprintf("%s/", path.Join(csq.Prefix, f.Name()))) 224 | } 225 | } 226 | return folders, nil 227 | } 228 | 229 | // NewReader create local file-system store reader. 230 | func (l *LocalStore) NewReader(o string) (io.ReadCloser, error) { 231 | return l.NewReaderWithContext(context.Background(), o) 232 | } 233 | func (l *LocalStore) pathForObject(o string) (string, error) { 234 | fo := path.Join(l.storepath, o) 235 | if !cloudstorage.Exists(fo) { 236 | return "", cloudstorage.ErrObjectNotFound 237 | } 238 | stat, err := os.Stat(fo) 239 | if err != nil { 240 | return "", err 241 | } 242 | if stat.IsDir() { 243 | return "", cloudstorage.ErrObjectNotFound 244 | } 245 | return fo, nil 246 | } 247 | 248 | func (l *LocalStore) NewReaderWithContext(ctx context.Context, o string) (io.ReadCloser, error) { 249 | fo, err := l.pathForObject(o) 250 | if err != nil { 251 | return nil, err 252 | } 253 | return csbufio.OpenReader(ctx, fo) 254 | } 255 | 256 | func (l *LocalStore) NewWriter(o string, metadata map[string]string) (io.WriteCloser, error) { 257 | return l.NewWriterWithContext(context.Background(), o, metadata) 258 | } 259 | func (l *LocalStore) NewWriterWithContext(ctx context.Context, o string, metadata map[string]string, opts ...cloudstorage.Opts) (io.WriteCloser, error) { 260 | fo := path.Join(l.storepath, o) 261 | 262 | err := cloudstorage.EnsureDir(fo) 263 | if err != nil { 264 | return nil, err 265 | } 266 | 267 | if len(metadata) == 0 { 268 | metadata = make(map[string]string) 269 | } 270 | 271 | fmd := fo + ".metadata" 272 | if err := writemeta(fmd, metadata); err != nil { 273 | return nil, err 274 | } 275 | 276 | flag := os.O_RDWR | os.O_CREATE | os.O_TRUNC 277 | if len(opts) > 0 && opts[0].IfNotExists { 278 | flag = flag | os.O_EXCL 279 | } 280 | f, err := os.OpenFile(fo, flag, 0665) 281 | if err != nil { 282 | return nil, err 283 | } 284 | 285 | return csbufio.NewWriter(ctx, f), nil 286 | } 287 | 288 | func (l *LocalStore) Get(ctx context.Context, o string) (cloudstorage.Object, error) { 289 | fo, err := l.pathForObject(o) 290 | if err != nil { 291 | return nil, err 292 | } 293 | 294 | var updated time.Time 295 | if stat, err := os.Stat(fo); err == nil { 296 | updated = stat.ModTime() 297 | } 298 | 299 | metadata, err := readmeta(fo + ".metadata") 300 | if err != nil { 301 | return nil, err 302 | } 303 | 304 | return &object{ 305 | name: o, 306 | updated: updated, 307 | storepath: fo, 308 | metadata: metadata, 309 | cachepath: cloudstorage.CachePathObj(l.cachepath, o, l.Id), 310 | }, nil 311 | } 312 | 313 | // Delete the object from underlying store. 314 | func (l *LocalStore) Delete(ctx context.Context, obj string) error { 315 | fo := path.Join(l.storepath, obj) 316 | if err := os.Remove(fo); err != nil { 317 | return fmt.Errorf("removing file=%s: %w", fo, err) 318 | } 319 | mf := fo + ".metadata" 320 | if cloudstorage.Exists(mf) { 321 | if err := os.Remove(mf); err != nil { 322 | return fmt.Errorf("removing file=%s: %w", mf, err) 323 | } 324 | } 325 | 326 | // When the last item in a folder is deleted, the folder 327 | // should also be deleted. This matches the behavior in GCS. 328 | return l.deleteParentDirs(fo) 329 | } 330 | 331 | // deleteParentDirs deletes all the parent dirs of some filepath 332 | // if those dirs are empty. 333 | func (l *LocalStore) deleteParentDirs(filePath string) error { 334 | 335 | for dirName := path.Dir(filePath); len(dirName) > 0; dirName = path.Dir(dirName) { 336 | if dirName == l.storepath { 337 | // top level, stop deleting 338 | return nil 339 | } 340 | err := os.Remove(dirName) 341 | if errors.Is(err, os.ErrNotExist) { 342 | // it's already deleted; nothing to do. 343 | return nil 344 | } 345 | // There is no equivalent os.ErrNotEmpty in this version of go. 346 | var perr *fs.PathError 347 | if ok := errors.As(err, &perr); ok { 348 | if sysErr, ok := perr.Err.(syscall.Errno); ok && sysErr == syscall.ENOTEMPTY { 349 | // not empty; quit. 350 | return nil 351 | } 352 | } 353 | // unknown error, return it. 354 | if err != nil { 355 | return fmt.Errorf("failed to remove store dir=%s err=%w", dirName, err) 356 | } 357 | // we deleted an empty folder, so continue 358 | } 359 | return nil 360 | } 361 | 362 | func (l *LocalStore) String() string { 363 | return fmt.Sprintf("[id:%s file://%s/]", l.Id, l.storepath) 364 | } 365 | 366 | type objectIterator struct { 367 | objects cloudstorage.Objects 368 | err error 369 | cursor int 370 | } 371 | 372 | func (l *objectIterator) Next() (cloudstorage.Object, error) { 373 | if l.err != nil { 374 | return nil, l.err 375 | } 376 | if l.cursor >= len(l.objects) { 377 | return nil, iterator.Done 378 | } 379 | o := l.objects[l.cursor] 380 | l.cursor++ 381 | return o, nil 382 | } 383 | func (l *objectIterator) Close() {} 384 | 385 | type object struct { 386 | name string 387 | updated time.Time 388 | metadata map[string]string 389 | 390 | storepath string 391 | cachepath string 392 | 393 | cachedcopy *os.File 394 | readonly bool 395 | opened bool 396 | } 397 | 398 | func (o *object) StorageSource() string { 399 | return StoreType 400 | } 401 | func (o *object) Name() string { 402 | return o.name 403 | } 404 | func (o *object) String() string { 405 | return o.name 406 | } 407 | func (o *object) Updated() time.Time { 408 | return o.updated 409 | } 410 | func (o *object) MetaData() map[string]string { 411 | return o.metadata 412 | } 413 | func (o *object) SetMetaData(meta map[string]string) { 414 | o.metadata = meta 415 | } 416 | 417 | func (o *object) Delete() error { 418 | if err := o.Release(); err != nil { 419 | gou.Errorf("could not release %v", err) 420 | } 421 | if err := os.Remove(o.storepath); err != nil { 422 | return err 423 | } 424 | mf := o.storepath + ".metadata" 425 | if cloudstorage.Exists(mf) { 426 | if err := os.Remove(mf); err != nil { 427 | return err 428 | } 429 | } 430 | return nil 431 | } 432 | 433 | func (o *object) Open(accesslevel cloudstorage.AccessLevel) (*os.File, error) { 434 | if o.opened { 435 | return nil, fmt.Errorf("the store object is already opened. %s", o.storepath) 436 | } 437 | 438 | var readonly = accesslevel == cloudstorage.ReadOnly 439 | 440 | storecopy, err := os.OpenFile(o.storepath, os.O_RDWR|os.O_CREATE, 0665) 441 | if err != nil { 442 | return nil, fmt.Errorf("localfs: local=%q could not create storecopy err=%v", o.storepath, err) 443 | } 444 | defer storecopy.Close() 445 | 446 | err = cloudstorage.EnsureDir(o.cachepath) 447 | if err != nil { 448 | return nil, fmt.Errorf("localfs: cachepath=%s could not create cachedcopy dir err=%v", o.cachepath, err) 449 | } 450 | 451 | cachedcopy, err := os.Create(o.cachepath) 452 | if err != nil { 453 | return nil, fmt.Errorf("localfs: cachepath=%s could not create cachedcopy err=%v", o.cachepath, err) 454 | } 455 | 456 | _, err = io.Copy(cachedcopy, storecopy) 457 | if err != nil { 458 | return nil, fmt.Errorf("localfs: storepath=%s cachedcopy=%v could not copy from store to cache err=%v", o.storepath, cachedcopy.Name(), err) 459 | } 460 | 461 | if readonly { 462 | cachedcopy.Close() 463 | cachedcopy, err = os.Open(o.cachepath) 464 | if err != nil { 465 | return nil, fmt.Errorf("localfs: storepath=%s cachedcopy=%v could not opencache err=%v", o.storepath, cachedcopy.Name(), err) 466 | } 467 | } else { 468 | if _, err := cachedcopy.Seek(0, os.SEEK_SET); err != nil { 469 | return nil, fmt.Errorf("error seeking to start of cachedcopy err=%v", err) //don't retry on local fs errors 470 | } 471 | } 472 | 473 | o.cachedcopy = cachedcopy 474 | o.readonly = readonly 475 | o.opened = true 476 | return o.cachedcopy, nil 477 | } 478 | 479 | func (o *object) File() *os.File { 480 | return o.cachedcopy 481 | } 482 | func (o *object) Read(p []byte) (n int, err error) { 483 | return o.cachedcopy.Read(p) 484 | } 485 | 486 | // Write the given bytes to object. Won't be writen until Close() or Sync() called. 487 | func (o *object) Write(p []byte) (n int, err error) { 488 | if o.cachedcopy == nil { 489 | _, err := o.Open(cloudstorage.ReadWrite) 490 | if err != nil { 491 | return 0, err 492 | } 493 | } 494 | return o.cachedcopy.Write(p) 495 | } 496 | 497 | func (o *object) Sync() error { 498 | if !o.opened { 499 | return fmt.Errorf("object isn't opened %s", o.name) 500 | } 501 | if o.readonly { 502 | return fmt.Errorf("trying to Sync a readonly object %s", o.name) 503 | } 504 | 505 | cachedcopy, err := os.OpenFile(o.cachepath, os.O_RDONLY, 0664) 506 | if err != nil { 507 | return err 508 | } 509 | defer cachedcopy.Close() 510 | 511 | storecopy, err := os.OpenFile(o.storepath, os.O_CREATE|os.O_TRUNC|os.O_RDWR, 0664) 512 | if err != nil { 513 | return err 514 | } 515 | defer storecopy.Close() 516 | 517 | if len(o.metadata) == 0 { 518 | o.metadata = make(map[string]string) 519 | } 520 | 521 | _, err = io.Copy(storecopy, cachedcopy) 522 | if err != nil { 523 | return err 524 | } 525 | 526 | fmd := o.storepath + ".metadata" 527 | return writemeta(fmd, o.metadata) 528 | } 529 | 530 | func readmeta(filename string) (map[string]string, error) { 531 | metadata := make(map[string]string) 532 | b, err := os.ReadFile(filename) 533 | if err == nil { 534 | err = json.Unmarshal(b, &metadata) 535 | if err != nil { 536 | return nil, err 537 | } 538 | } else { 539 | if !errors.Is(err, os.ErrNotExist) { 540 | return nil, err 541 | } 542 | } 543 | return metadata, nil 544 | } 545 | 546 | func writemeta(filename string, meta map[string]string) error { 547 | bm, err := json.MarshalIndent(meta, "", " ") 548 | if err != nil { 549 | return err 550 | } 551 | 552 | err = os.WriteFile(filename, bm, 0664) 553 | if err != nil { 554 | return err 555 | } 556 | return nil 557 | } 558 | 559 | func (o *object) Close() error { 560 | if !o.opened { 561 | return nil 562 | } 563 | 564 | defer func() { 565 | if o.cachedcopy != nil { 566 | n := o.cachedcopy.Name() 567 | os.Remove(n) 568 | } 569 | 570 | o.cachedcopy = nil 571 | o.opened = false 572 | }() 573 | 574 | if !o.readonly { 575 | err := o.cachedcopy.Sync() 576 | if err != nil { 577 | return err 578 | } 579 | } 580 | 581 | err := o.cachedcopy.Close() 582 | if err != nil { 583 | if !strings.Contains(err.Error(), os.ErrClosed.Error()) { 584 | return err 585 | } 586 | } 587 | 588 | if o.opened && !o.readonly { 589 | err := o.Sync() 590 | if err != nil { 591 | return err 592 | } 593 | } 594 | 595 | return nil 596 | } 597 | 598 | func (o *object) Release() error { 599 | if o.cachedcopy != nil { 600 | o.cachedcopy.Close() 601 | o.cachedcopy = nil 602 | o.opened = false 603 | err := os.Remove(o.cachepath) 604 | if err != nil { 605 | return err 606 | } 607 | } 608 | // most likely this doesn't exist so don't return error 609 | os.Remove(o.cachepath) 610 | return nil 611 | } 612 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | cel.dev/expr v0.20.0 h1:OunBvVCfvpWlt4dN7zg3FM6TDkzOePe1+foGJ9AXeeI= 2 | cel.dev/expr v0.20.0/go.mod h1:MrpN08Q+lEBs+bGYdLxxHkZoUSsCp0nSKTs0nTymJgw= 3 | cloud.google.com/go v0.121.1 h1:S3kTQSydxmu1JfLRLpKtxRPA7rSrYPRPEUmL/PavVUw= 4 | cloud.google.com/go v0.121.1/go.mod h1:nRFlrHq39MNVWu+zESP2PosMWA0ryJw8KUBZ2iZpxbw= 5 | cloud.google.com/go/auth v0.16.1 h1:XrXauHMd30LhQYVRHLGvJiYeczweKQXZxsTbV9TiguU= 6 | cloud.google.com/go/auth v0.16.1/go.mod h1:1howDHJ5IETh/LwYs3ZxvlkXF48aSqqJUM+5o02dNOI= 7 | cloud.google.com/go/auth/oauth2adapt v0.2.8 h1:keo8NaayQZ6wimpNSmW5OPc283g65QNIiLpZnkHRbnc= 8 | cloud.google.com/go/auth/oauth2adapt v0.2.8/go.mod h1:XQ9y31RkqZCcwJWNSx2Xvric3RrU88hAYYbjDWYDL+c= 9 | cloud.google.com/go/compute/metadata v0.7.0 h1:PBWF+iiAerVNe8UCHxdOt6eHLVc3ydFeOCw78U8ytSU= 10 | cloud.google.com/go/compute/metadata v0.7.0/go.mod h1:j5MvL9PprKL39t166CoB1uVHfQMs4tFQZZcKwksXUjo= 11 | cloud.google.com/go/iam v1.5.2 h1:qgFRAGEmd8z6dJ/qyEchAuL9jpswyODjA2lS+w234g8= 12 | cloud.google.com/go/iam v1.5.2/go.mod h1:SE1vg0N81zQqLzQEwxL2WI6yhetBdbNQuTvIKCSkUHE= 13 | cloud.google.com/go/logging v1.13.0 h1:7j0HgAp0B94o1YRDqiqm26w4q1rDMH7XNRU34lJXHYc= 14 | cloud.google.com/go/logging v1.13.0/go.mod h1:36CoKh6KA/M0PbhPKMq6/qety2DCAErbhXT62TuXALA= 15 | cloud.google.com/go/longrunning v0.6.7 h1:IGtfDWHhQCgCjwQjV9iiLnUta9LBCo8R9QmAFsS/PrE= 16 | cloud.google.com/go/longrunning v0.6.7/go.mod h1:EAFV3IZAKmM56TyiE6VAP3VoTzhZzySwI/YI1s/nRsY= 17 | cloud.google.com/go/monitoring v1.24.2 h1:5OTsoJ1dXYIiMiuL+sYscLc9BumrL3CarVLL7dd7lHM= 18 | cloud.google.com/go/monitoring v1.24.2/go.mod h1:x7yzPWcgDRnPEv3sI+jJGBkwl5qINf+6qY4eq0I9B4U= 19 | cloud.google.com/go/storage v1.55.0 h1:NESjdAToN9u1tmhVqhXCaCwYBuvEhZLLv0gBr+2znf0= 20 | cloud.google.com/go/storage v1.55.0/go.mod h1:ztSmTTwzsdXe5syLVS0YsbFxXuvEmEyZj7v7zChEmuY= 21 | cloud.google.com/go/trace v1.11.6 h1:2O2zjPzqPYAHrn3OKl029qlqG6W8ZdYaOWRyr8NgMT4= 22 | cloud.google.com/go/trace v1.11.6/go.mod h1:GA855OeDEBiBMzcckLPE2kDunIpC72N+Pq8WFieFjnI= 23 | github.com/Azure/azure-sdk-for-go v67.1.0+incompatible h1:oziYcaopbnIKfM69DL05wXdypiqfrUKdxUKrKpynJTw= 24 | github.com/Azure/azure-sdk-for-go v67.1.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc= 25 | github.com/Azure/go-autorest v14.2.0+incompatible h1:V5VMDjClD3GiElqLWO7mz2MxNAK/vTfRHdAubSIPRgs= 26 | github.com/Azure/go-autorest v14.2.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24= 27 | github.com/Azure/go-autorest/autorest v0.11.28 h1:ndAExarwr5Y+GaHE6VCaY1kyS/HwwGGyuimVhWsHOEM= 28 | github.com/Azure/go-autorest/autorest v0.11.28/go.mod h1:MrkzG3Y3AH668QyF9KRk5neJnGgmhQ6krbhR8Q5eMvA= 29 | github.com/Azure/go-autorest/autorest/adal v0.9.18 h1:kLnPsRjzZZUF3K5REu/Kc+qMQrvuza2bwSnNdhmzLfQ= 30 | github.com/Azure/go-autorest/autorest/adal v0.9.18/go.mod h1:XVVeme+LZwABT8K5Lc3hA4nAe8LDBVle26gTrguhhPQ= 31 | github.com/Azure/go-autorest/autorest/date v0.3.0 h1:7gUk1U5M/CQbp9WoqinNzJar+8KY+LPI6wiWrP/myHw= 32 | github.com/Azure/go-autorest/autorest/date v0.3.0/go.mod h1:BI0uouVdmngYNUzGWeSYnokU+TrmwEsOqdt8Y6sso74= 33 | github.com/Azure/go-autorest/autorest/mocks v0.4.1/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k= 34 | github.com/Azure/go-autorest/autorest/mocks v0.4.2 h1:PGN4EDXnuQbojHbU0UWoNvmu9AGVwYHG9/fkDYhtAfw= 35 | github.com/Azure/go-autorest/autorest/mocks v0.4.2/go.mod h1:Vy7OitM9Kei0i1Oj+LvyAWMXJHeKH1MVlzFugfVrmyU= 36 | github.com/Azure/go-autorest/autorest/to v0.4.0 h1:oXVqrxakqqV1UZdSazDOPOLvOIz+XA683u8EctwboHk= 37 | github.com/Azure/go-autorest/autorest/to v0.4.0/go.mod h1:fE8iZBn7LQR7zH/9XU2NcPR4o9jEImooCeWJcYV/zLE= 38 | github.com/Azure/go-autorest/logger v0.2.1 h1:IG7i4p/mDa2Ce4TRyAO8IHnVhAVF3RFU+ZtXWSmf4Tg= 39 | github.com/Azure/go-autorest/logger v0.2.1/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZmbF5NWuPV8+WeEW8= 40 | github.com/Azure/go-autorest/tracing v0.6.0 h1:TYi4+3m5t6K48TGI9AUdb+IzbnSxvnvUMfuitfgcfuo= 41 | github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU= 42 | github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.27.0 h1:ErKg/3iS1AKcTkf3yixlZ54f9U1rljCkQyEXWUnIUxc= 43 | github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.27.0/go.mod h1:yAZHSGnqScoU556rBOVkwLze6WP5N+U11RHuWaGVxwY= 44 | github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.51.0 h1:fYE9p3esPxA/C0rQ0AHhP0drtPXDRhaWiwg1DPqO7IU= 45 | github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.51.0/go.mod h1:BnBReJLvVYx2CS/UHOgVz2BXKXD9wsQPxZug20nZhd0= 46 | github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.51.0 h1:OqVGm6Ei3x5+yZmSJG1Mh2NwHvpVmZ08CB5qJhT9Nuk= 47 | github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.51.0/go.mod h1:SZiPHWGOOk3bl8tkevxkoiwPgsIl6CwrWcbwjfHZpdM= 48 | github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.51.0 h1:6/0iUd0xrnX7qt+mLNRwg5c0PGv8wpE8K90ryANQwMI= 49 | github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.51.0/go.mod h1:otE2jQekW/PqXk1Awf5lmfokJx4uwuqcj1ab5SpGeW0= 50 | github.com/acomagu/bufpipe v1.0.4 h1:e3H4WUzM3npvo5uv95QuJM3cQspFNtFBzvJ2oNjKIDQ= 51 | github.com/acomagu/bufpipe v1.0.4/go.mod h1:mxdxdup/WdsKVreO5GpW4+M/1CE2sMG4jeGJ2sYmHc4= 52 | github.com/araddon/gou v0.0.0-20211019181548-e7d08105776c h1:XUqw//RExYoxW4Eie8MuKp8sEDAZI1gMHX/daUFgZww= 53 | github.com/araddon/gou v0.0.0-20211019181548-e7d08105776c/go.mod h1:ikc1XA58M+Rx7SEbf0bLJCfBkwayZ8T5jBo5FXK8Uz8= 54 | github.com/aws/aws-sdk-go v1.44.146 h1:7YdGgPxDPRJu/yYffzZp/H7yHzQ6AqmuNFZPYraaN8I= 55 | github.com/aws/aws-sdk-go v1.44.146/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= 56 | github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= 57 | github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= 58 | github.com/cncf/xds/go v0.0.0-20250121191232-2f005788dc42 h1:Om6kYQYDUk5wWbT0t0q6pvyM49i9XZAv9dDrkDA7gjk= 59 | github.com/cncf/xds/go v0.0.0-20250121191232-2f005788dc42/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8= 60 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 61 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 62 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 63 | github.com/dnaeon/go-vcr v1.2.0 h1:zHCHvJYTMh1N7xnV7zf1m1GPBF9Ad0Jk/whtQ1663qI= 64 | github.com/dnaeon/go-vcr v1.2.0/go.mod h1:R4UdLID7HZT3taECzJs4YgbbH6PIGXB6W/sc5OLb6RQ= 65 | github.com/envoyproxy/go-control-plane v0.13.4 h1:zEqyPVyku6IvWCFwux4x9RxkLOMUL+1vC9xUFv5l2/M= 66 | github.com/envoyproxy/go-control-plane v0.13.4/go.mod h1:kDfuBlDVsSj2MjrLEtRWtHlsWIFcGyB2RMO44Dc5GZA= 67 | github.com/envoyproxy/go-control-plane/envoy v1.32.4 h1:jb83lalDRZSpPWW2Z7Mck/8kXZ5CQAFYVjQcdVIr83A= 68 | github.com/envoyproxy/go-control-plane/envoy v1.32.4/go.mod h1:Gzjc5k8JcJswLjAx1Zm+wSYE20UrLtt7JZMWiWQXQEw= 69 | github.com/envoyproxy/go-control-plane/ratelimit v0.1.0 h1:/G9QYbddjL25KvtKTv3an9lx6VBE2cnb8wp1vEGNYGI= 70 | github.com/envoyproxy/go-control-plane/ratelimit v0.1.0/go.mod h1:Wk+tMFAFbCXaJPzVVHnPgRKdUdwW/KdbRt94AzgRee4= 71 | github.com/envoyproxy/protoc-gen-validate v1.2.1 h1:DEo3O99U8j4hBFwbJfrz9VtgcDfUKS7KJ7spH3d86P8= 72 | github.com/envoyproxy/protoc-gen-validate v1.2.1/go.mod h1:d/C80l/jxXLdfEIhX1W2TmLfsJ31lvEjwamM4DxlWXU= 73 | github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= 74 | github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= 75 | github.com/go-jose/go-jose/v4 v4.0.4 h1:VsjPI33J0SB9vQM6PLmNjoHqMQNGPiZ0rHL7Ni7Q6/E= 76 | github.com/go-jose/go-jose/v4 v4.0.4/go.mod h1:NKb5HO1EZccyMpiZNbdUw/14tiXNyUJh188dfnMCAfc= 77 | github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= 78 | github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= 79 | github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= 80 | github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= 81 | github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= 82 | github.com/gofrs/uuid v4.3.1+incompatible h1:0/KbAdpx3UXAx1kEOWHJeOkpbgRFGHVgv+CFIY7dBJI= 83 | github.com/gofrs/uuid v4.3.1+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM= 84 | github.com/golang-jwt/jwt/v4 v4.0.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzwAxVc6locg= 85 | github.com/golang-jwt/jwt/v4 v4.2.0 h1:besgBTC8w8HjP6NzQdxwKH9Z5oQMZ24ThTrHp3cZ8eU= 86 | github.com/golang-jwt/jwt/v4 v4.2.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzwAxVc6locg= 87 | github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= 88 | github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= 89 | github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= 90 | github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= 91 | github.com/google/martian/v3 v3.3.3 h1:DIhPTQrbPkgs2yJYdXU/eNACCG5DVQjySNRNlflZ9Fc= 92 | github.com/google/martian/v3 v3.3.3/go.mod h1:iEPrYcgCF7jA9OtScMFQyAlZZ4YXTKEtJ1E6RWzmBA0= 93 | github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0= 94 | github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM= 95 | github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 96 | github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= 97 | github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 98 | github.com/googleapis/enterprise-certificate-proxy v0.3.6 h1:GW/XbdyBFQ8Qe+YAmFU9uHLo7OnF5tL52HFAgMmyrf4= 99 | github.com/googleapis/enterprise-certificate-proxy v0.3.6/go.mod h1:MkHOF77EYAE7qfSuSS9PU6g4Nt4e11cnsDUowfwewLA= 100 | github.com/googleapis/gax-go/v2 v2.14.2 h1:eBLnkZ9635krYIPD+ag1USrOAI0Nr0QYF3+/3GqO0k0= 101 | github.com/googleapis/gax-go/v2 v2.14.2/go.mod h1:ON64QhlJkhVtSqp4v1uaK92VyZ2gmvDQsweuyLV+8+w= 102 | github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= 103 | github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= 104 | github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= 105 | github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= 106 | github.com/kr/fs v0.1.0 h1:Jskdu9ieNAYnjxsi0LbQp1ulIKZV1LAFgK1tWhpZgl8= 107 | github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= 108 | github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= 109 | github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= 110 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 111 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 112 | github.com/matryer/is v1.2.0 h1:92UTHpy8CDwaJ08GqLDzhhuixiBUUD1p3AU6PHddz4A= 113 | github.com/matryer/is v1.2.0/go.mod h1:2fLPjFQM9rhQ15aVEtbuwhJinnOqrmgXPNdZsdwlWXA= 114 | github.com/modocache/gover v0.0.0-20171022184752-b58185e213c5/go.mod h1:caMODM3PzxT8aQXRPkAt8xlV/e7d7w8GM5g0fa5F0D8= 115 | github.com/pborman/uuid v1.2.1 h1:+ZZIw58t/ozdjRaXh/3awHfmWRbzYxJoAdNJxe/3pvw= 116 | github.com/pborman/uuid v1.2.1/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k= 117 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 118 | github.com/pkg/sftp v1.13.5 h1:a3RLUqkyjYRtBTZJZ1VRrKbN3zhuPLlUc3sphVz81go= 119 | github.com/pkg/sftp v1.13.5/go.mod h1:wHDZ0IZX6JcBYRK1TH9bcVq8G7TLpVHYIGJRFnmPfxg= 120 | github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo= 121 | github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8= 122 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 123 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 124 | github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= 125 | github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= 126 | github.com/spiffe/go-spiffe/v2 v2.5.0 h1:N2I01KCUkv1FAjZXJMwh95KK1ZIQLYbPfhaxw8WS0hE= 127 | github.com/spiffe/go-spiffe/v2 v2.5.0/go.mod h1:P+NxobPc6wXhVtINNtFjNWGBTreew1GBUCwT2wPmb7g= 128 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 129 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 130 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= 131 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 132 | github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= 133 | github.com/zeebo/errs v1.4.0 h1:XNdoD/RRMKP7HD0UhJnIzUy74ISdGGxURlYG8HSWSfM= 134 | github.com/zeebo/errs v1.4.0/go.mod h1:sgbWHsvVuTPHcqJJGQ1WhI5KbWlHYz+2+2C/LSEtCw4= 135 | go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= 136 | go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= 137 | go.opentelemetry.io/contrib/detectors/gcp v1.36.0 h1:F7q2tNlCaHY9nMKHR6XH9/qkp8FktLnIcy6jJNyOCQw= 138 | go.opentelemetry.io/contrib/detectors/gcp v1.36.0/go.mod h1:IbBN8uAIIx734PTonTPxAxnjc2pQTxWNkwfstZ+6H2k= 139 | go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.60.0 h1:x7wzEgXfnzJcHDwStJT+mxOz4etr2EcexjqhBvmoakw= 140 | go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.60.0/go.mod h1:rg+RlpR5dKwaS95IyyZqj5Wd4E13lk/msnTS0Xl9lJM= 141 | go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 h1:sbiXRNDSWJOTobXh5HyQKjq6wUC5tNybqjIqDpAY4CU= 142 | go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0/go.mod h1:69uWxva0WgAA/4bu2Yy70SLDBwZXuQ6PbBpbsa5iZrQ= 143 | go.opentelemetry.io/otel v1.36.0 h1:UumtzIklRBY6cI/lllNZlALOF5nNIzJVb16APdvgTXg= 144 | go.opentelemetry.io/otel v1.36.0/go.mod h1:/TcFMXYjyRNh8khOAO9ybYkqaDBb/70aVwkNML4pP8E= 145 | go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.36.0 h1:rixTyDGXFxRy1xzhKrotaHy3/KXdPhlWARrCgK+eqUY= 146 | go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.36.0/go.mod h1:dowW6UsM9MKbJq5JTz2AMVp3/5iW5I/TStsk8S+CfHw= 147 | go.opentelemetry.io/otel/metric v1.36.0 h1:MoWPKVhQvJ+eeXWHFBOPoBOi20jh6Iq2CcCREuTYufE= 148 | go.opentelemetry.io/otel/metric v1.36.0/go.mod h1:zC7Ks+yeyJt4xig9DEw9kuUFe5C3zLbVjV2PzT6qzbs= 149 | go.opentelemetry.io/otel/sdk v1.36.0 h1:b6SYIuLRs88ztox4EyrvRti80uXIFy+Sqzoh9kFULbs= 150 | go.opentelemetry.io/otel/sdk v1.36.0/go.mod h1:+lC+mTgD+MUWfjJubi2vvXWcVxyr9rmlshZni72pXeY= 151 | go.opentelemetry.io/otel/sdk/metric v1.36.0 h1:r0ntwwGosWGaa0CrSt8cuNuTcccMXERFwHX4dThiPis= 152 | go.opentelemetry.io/otel/sdk/metric v1.36.0/go.mod h1:qTNOhFDfKRwX0yXOqJYegL5WRaW376QbB7P4Pb0qva4= 153 | go.opentelemetry.io/otel/trace v1.36.0 h1:ahxWNuqZjpdiFAyrIoQ4GIiAIhxAunQR6MUoKrsNd4w= 154 | go.opentelemetry.io/otel/trace v1.36.0/go.mod h1:gQ+OnDZzrybY4k4seLzPAWNwVBBVlF2szhehOBB/tGA= 155 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 156 | golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= 157 | golang.org/x/crypto v0.0.0-20211215153901-e495a2d5b3d3/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= 158 | golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= 159 | golang.org/x/crypto v0.38.0 h1:jt+WWG8IZlBnVbomuhg2Mdq0+BBQaHbtqHEFEigjUV8= 160 | golang.org/x/crypto v0.38.0/go.mod h1:MvrbAqul58NNYPKnOra203SB9vpuZW0e+RRZV+Ggqjw= 161 | golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= 162 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 163 | golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= 164 | golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= 165 | golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= 166 | golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= 167 | golang.org/x/net v0.40.0 h1:79Xs7wF06Gbdcg4kdCCIQArK11Z1hr5POQ6+fIYHNuY= 168 | golang.org/x/net v0.40.0/go.mod h1:y0hY0exeL2Pku80/zKK7tpntoX23cqL3Oa6njdgRtds= 169 | golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= 170 | golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= 171 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 172 | golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 173 | golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ= 174 | golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= 175 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 176 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 177 | golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 178 | golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 179 | golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 180 | golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 181 | golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 182 | golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 183 | golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= 184 | golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= 185 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= 186 | golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= 187 | golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= 188 | golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg= 189 | golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ= 190 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 191 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 192 | golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 193 | golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= 194 | golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= 195 | golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4= 196 | golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA= 197 | golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0= 198 | golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= 199 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 200 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 201 | golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= 202 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 203 | google.golang.org/api v0.235.0 h1:C3MkpQSRxS1Jy6AkzTGKKrpSCOd2WOGrezZ+icKSkKo= 204 | google.golang.org/api v0.235.0/go.mod h1:QpeJkemzkFKe5VCE/PMv7GsUfn9ZF+u+q1Q7w6ckxTg= 205 | google.golang.org/genproto v0.0.0-20250505200425-f936aa4a68b2 h1:1tXaIXCracvtsRxSBsYDiSBN0cuJvM7QYW+MrpIRY78= 206 | google.golang.org/genproto v0.0.0-20250505200425-f936aa4a68b2/go.mod h1:49MsLSx0oWMOZqcpB3uL8ZOkAh1+TndpJ8ONoCBWiZk= 207 | google.golang.org/genproto/googleapis/api v0.0.0-20250512202823-5a2f75b736a9 h1:WvBuA5rjZx9SNIzgcU53OohgZy6lKSus++uY4xLaWKc= 208 | google.golang.org/genproto/googleapis/api v0.0.0-20250512202823-5a2f75b736a9/go.mod h1:W3S/3np0/dPWsWLi1h/UymYctGXaGBM2StwzD0y140U= 209 | google.golang.org/genproto/googleapis/rpc v0.0.0-20250512202823-5a2f75b736a9 h1:IkAfh6J/yllPtpYFU0zZN1hUPYdT0ogkBT/9hMxHjvg= 210 | google.golang.org/genproto/googleapis/rpc v0.0.0-20250512202823-5a2f75b736a9/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= 211 | google.golang.org/grpc v1.72.1 h1:HR03wO6eyZ7lknl75XlxABNVLLFc2PAb6mHlYh756mA= 212 | google.golang.org/grpc v1.72.1/go.mod h1:wH5Aktxcg25y1I3w7H69nHfXdOG3UiadoBtjh3izSDM= 213 | google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= 214 | google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= 215 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 216 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= 217 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= 218 | gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= 219 | gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 220 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 221 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 222 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 223 | -------------------------------------------------------------------------------- /awss3/store.go: -------------------------------------------------------------------------------- 1 | package awss3 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "net/http" 7 | "os" 8 | "path" 9 | "strings" 10 | "sync" 11 | "time" 12 | 13 | "github.com/araddon/gou" 14 | "github.com/pborman/uuid" 15 | "golang.org/x/net/context" 16 | 17 | "github.com/aws/aws-sdk-go/aws" 18 | "github.com/aws/aws-sdk-go/aws/credentials" 19 | "github.com/aws/aws-sdk-go/aws/session" 20 | "github.com/aws/aws-sdk-go/service/s3" 21 | "github.com/aws/aws-sdk-go/service/s3/s3manager" 22 | 23 | "github.com/lytics/cloudstorage" 24 | "github.com/lytics/cloudstorage/csbufio" 25 | ) 26 | 27 | const ( 28 | // StoreType = "s3" this is used to define the storage type to create 29 | // from cloudstorage.NewStore(config) 30 | StoreType = "s3" 31 | 32 | // Configuration Keys. These are the names of keys 33 | // to look for in the json map[string]string to extract for config. 34 | 35 | // ConfKeyAccessKey config key name of the aws access_key(id) for auth 36 | ConfKeyAccessKey = "access_key" 37 | // ConfKeyAccessSecret config key name of the aws acccess secret 38 | ConfKeyAccessSecret = "access_secret" 39 | // ConfKeyARN config key name of the aws ARN name of user 40 | ConfKeyARN = "arn" 41 | // ConfKeyDisableSSL config key name of disabling ssl flag 42 | ConfKeyDisableSSL = "disable_ssl" 43 | // ConfKeyDebugLog config key to enable LogDebug log level 44 | ConfKeyDebugLog = "debug_log" 45 | // Authentication Source's 46 | 47 | // AuthAccessKey is for using aws access key/secret pairs 48 | AuthAccessKey cloudstorage.AuthMethod = "aws_access_key" 49 | ) 50 | 51 | var ( 52 | // Retries number of times to retry upon failures. 53 | Retries = 3 54 | // PageSize is default page size 55 | PageSize = 2000 56 | 57 | // ErrNoS3Session no valid session 58 | ErrNoS3Session = fmt.Errorf("no valid aws session was created") 59 | // ErrNoAccessKey error for no access_key 60 | ErrNoAccessKey = fmt.Errorf("no settings.access_key") 61 | // ErrNoAccessSecret error for no settings.access_secret 62 | ErrNoAccessSecret = fmt.Errorf("no settings.access_secret") 63 | // ErrNoAuth error for no findable auth 64 | ErrNoAuth = fmt.Errorf("No auth provided") 65 | ) 66 | 67 | func init() { 68 | // Register this Driver (s3) in cloudstorage driver registry. 69 | cloudstorage.Register(StoreType, func(conf *cloudstorage.Config) (cloudstorage.Store, error) { 70 | client, sess, err := NewClient(conf) 71 | if err != nil { 72 | return nil, err 73 | } 74 | return NewStore(client, sess, conf) 75 | }) 76 | } 77 | 78 | type ( 79 | // FS Simple wrapper for accessing s3 files, it doesn't currently implement a 80 | // Reader/Writer interface so not useful for stream reading of large files yet. 81 | FS struct { 82 | PageSize int 83 | ID string 84 | client *s3.S3 85 | sess *session.Session 86 | endpoint string 87 | bucket string 88 | cachepath string 89 | } 90 | 91 | object struct { 92 | fs *FS 93 | o *s3.GetObjectOutput 94 | cachedcopy *os.File 95 | 96 | name string // aka "key" in s3 97 | updated time.Time // LastModifyied in s3 98 | metadata map[string]string 99 | bucket string 100 | readonly bool 101 | opened bool 102 | cachepath string 103 | 104 | infoOnce sync.Once 105 | infoErr error 106 | } 107 | ) 108 | 109 | // NewClient create new AWS s3 Client. Uses cloudstorage.Config to read 110 | // necessary config settings such as bucket, region, auth. 111 | func NewClient(conf *cloudstorage.Config) (*s3.S3, *session.Session, error) { 112 | 113 | awsConf := aws.NewConfig(). 114 | WithHTTPClient(http.DefaultClient). 115 | WithMaxRetries(aws.UseServiceDefaultRetries). 116 | WithLogger(aws.NewDefaultLogger()). 117 | WithLogLevel(aws.LogOff). 118 | WithSleepDelay(time.Sleep) 119 | 120 | if conf.Region != "" { 121 | awsConf.WithRegion(conf.Region) 122 | } else { 123 | awsConf.WithRegion("us-east-1") 124 | } 125 | 126 | if conf.Endpoint != "" { 127 | awsConf.WithEndpoint(conf.Endpoint) 128 | } 129 | 130 | switch conf.AuthMethod { 131 | case AuthAccessKey: 132 | accessKey := conf.Settings.String(ConfKeyAccessKey) 133 | if accessKey == "" { 134 | return nil, nil, ErrNoAccessKey 135 | } 136 | secretKey := conf.Settings.String(ConfKeyAccessSecret) 137 | if secretKey == "" { 138 | return nil, nil, ErrNoAccessSecret 139 | } 140 | awsConf.WithCredentials(credentials.NewStaticCredentials(accessKey, secretKey, "")) 141 | default: 142 | return nil, nil, ErrNoAuth 143 | } 144 | 145 | if conf.BaseUrl != "" { 146 | awsConf.WithEndpoint(conf.BaseUrl).WithS3ForcePathStyle(true) 147 | } 148 | 149 | if conf.Settings.Bool(ConfKeyDebugLog) { 150 | awsConf.WithLogLevel(aws.LogDebug) 151 | } 152 | 153 | disableSSL := conf.Settings.Bool(ConfKeyDisableSSL) 154 | if disableSSL { 155 | awsConf.WithDisableSSL(true) 156 | } 157 | 158 | sess := session.New(awsConf) 159 | if sess == nil { 160 | return nil, nil, ErrNoS3Session 161 | } 162 | 163 | s3Client := s3.New(sess) 164 | 165 | return s3Client, sess, nil 166 | } 167 | 168 | // NewStore Create AWS S3 storage client of type cloudstorage.Store 169 | func NewStore(c *s3.S3, sess *session.Session, conf *cloudstorage.Config) (*FS, error) { 170 | 171 | if conf.TmpDir == "" { 172 | return nil, fmt.Errorf("unable to create cachepath. config.tmpdir=%q", conf.TmpDir) 173 | } 174 | err := os.MkdirAll(conf.TmpDir, 0775) 175 | if err != nil { 176 | return nil, fmt.Errorf("unable to create cachepath. config.tmpdir=%q err=%v", conf.TmpDir, err) 177 | } 178 | 179 | uid := uuid.NewUUID().String() 180 | uid = strings.Replace(uid, "-", "", -1) 181 | 182 | return &FS{ 183 | client: c, 184 | sess: sess, 185 | bucket: conf.Bucket, 186 | cachepath: conf.TmpDir, 187 | ID: uid, 188 | PageSize: cloudstorage.MaxResults, 189 | }, nil 190 | } 191 | 192 | // Type of store = "s3" 193 | func (f *FS) Type() string { 194 | return StoreType 195 | } 196 | 197 | // Client gets access to the underlying s3 cloud storage client. 198 | func (f *FS) Client() interface{} { 199 | return f.client 200 | } 201 | 202 | // String function to provide s3://..../file path 203 | func (f *FS) String() string { 204 | return fmt.Sprintf("s3://%s/", f.bucket) 205 | } 206 | 207 | // NewObject of Type s3. 208 | func (f *FS) NewObject(objectname string) (cloudstorage.Object, error) { 209 | obj, err := f.Get(context.Background(), objectname) 210 | if err != nil && err != cloudstorage.ErrObjectNotFound { 211 | return nil, err 212 | } else if obj != nil { 213 | return nil, cloudstorage.ErrObjectExists 214 | } 215 | 216 | cf := cloudstorage.CachePathObj(f.cachepath, objectname, f.ID) 217 | 218 | return &object{ 219 | fs: f, 220 | name: objectname, 221 | metadata: map[string]string{cloudstorage.ContentTypeKey: cloudstorage.ContentType(objectname)}, 222 | bucket: f.bucket, 223 | cachedcopy: nil, 224 | cachepath: cf, 225 | }, nil 226 | } 227 | 228 | // Get a single File Object 229 | func (f *FS) Get(ctx context.Context, objectpath string) (cloudstorage.Object, error) { 230 | 231 | obj, err := f.getObjectMeta(ctx, objectpath) 232 | if err != nil { 233 | return nil, err 234 | } else if obj == nil { 235 | return nil, cloudstorage.ErrObjectNotFound 236 | } 237 | 238 | return obj, nil 239 | } 240 | 241 | // get single object 242 | func (f *FS) getObjectMeta(ctx context.Context, objectname string) (*object, error) { 243 | 244 | req := &s3.HeadObjectInput{ 245 | Key: aws.String(objectname), 246 | Bucket: aws.String(f.bucket), 247 | } 248 | 249 | res, err := f.client.HeadObjectWithContext(ctx, req) 250 | if err != nil { 251 | // translate the string error to typed error 252 | if strings.Contains(err.Error(), "Not Found") { 253 | return nil, cloudstorage.ErrObjectNotFound 254 | } 255 | return nil, err 256 | } 257 | 258 | return newObjectFromHead(f, objectname, res), nil 259 | } 260 | 261 | func (f *FS) getS3OpenObject(ctx context.Context, objectname string) (*s3.GetObjectOutput, error) { 262 | 263 | res, err := f.client.GetObjectWithContext(ctx, &s3.GetObjectInput{ 264 | Key: aws.String(objectname), 265 | Bucket: aws.String(f.bucket), 266 | }) 267 | if err != nil { 268 | // translate the string error to typed error 269 | if strings.Contains(err.Error(), "NoSuchKey") { 270 | return nil, cloudstorage.ErrObjectNotFound 271 | } 272 | return nil, err 273 | } 274 | return res, nil 275 | } 276 | 277 | func convertMetaData(m map[string]*string) (map[string]string, error) { 278 | result := make(map[string]string, len(m)) 279 | for key, value := range m { 280 | if value != nil { 281 | result[strings.ToLower(key)] = *value 282 | } else { 283 | result[strings.ToLower(key)] = "" 284 | } 285 | 286 | } 287 | return result, nil 288 | } 289 | 290 | // List objects from this store. 291 | func (f *FS) List(ctx context.Context, q cloudstorage.Query) (*cloudstorage.ObjectsResponse, error) { 292 | 293 | itemLimit := int64(f.PageSize) 294 | if q.PageSize > 0 { 295 | itemLimit = int64(q.PageSize) 296 | } 297 | 298 | params := &s3.ListObjectsInput{ 299 | Bucket: aws.String(f.bucket), 300 | Marker: &q.Marker, 301 | MaxKeys: &itemLimit, 302 | Prefix: &q.Prefix, 303 | } 304 | 305 | resp, err := f.client.ListObjects(params) 306 | if err != nil { 307 | gou.Warnf("err = %v", err) 308 | return nil, err 309 | } 310 | 311 | objResp := &cloudstorage.ObjectsResponse{ 312 | Objects: make(cloudstorage.Objects, len(resp.Contents)), 313 | } 314 | 315 | for i, o := range resp.Contents { 316 | objResp.Objects[i] = newObject(f, o) 317 | } 318 | 319 | if resp.IsTruncated != nil && *resp.IsTruncated { 320 | lastObj := *resp.Contents[len(resp.Contents)-1].Key 321 | objResp.NextMarker = lastObj 322 | } 323 | 324 | return objResp, nil 325 | } 326 | 327 | func (o *object) DisableCompression() {} 328 | 329 | // Objects returns an iterator over the objects in the s3 bucket that match the Query q. 330 | // If q is nil, no filtering is done. 331 | func (f *FS) Objects(ctx context.Context, q cloudstorage.Query) (cloudstorage.ObjectIterator, error) { 332 | return cloudstorage.NewObjectPageIterator(ctx, f, q), nil 333 | } 334 | 335 | // Folders get folders list. 336 | func (f *FS) Folders(ctx context.Context, q cloudstorage.Query) ([]string, error) { 337 | 338 | q.Delimiter = "/" 339 | 340 | // Think we should just put 1 here right? 341 | itemLimit := int64(f.PageSize) 342 | if q.PageSize > 0 { 343 | itemLimit = int64(q.PageSize) 344 | } 345 | 346 | params := &s3.ListObjectsInput{ 347 | Bucket: aws.String(f.bucket), 348 | MaxKeys: &itemLimit, 349 | Prefix: &q.Prefix, 350 | Delimiter: &q.Delimiter, 351 | } 352 | 353 | folders := make([]string, 0) 354 | 355 | for { 356 | select { 357 | case <-ctx.Done(): 358 | // If has been closed 359 | return folders, ctx.Err() 360 | default: 361 | if q.Marker != "" { 362 | params.Marker = &q.Marker 363 | } 364 | resp, err := f.client.ListObjectsWithContext(ctx, params) 365 | if err != nil { 366 | return nil, err 367 | } 368 | for _, cp := range resp.CommonPrefixes { 369 | folders = append(folders, strings.TrimPrefix(*cp.Prefix, `/`)) 370 | } 371 | return folders, nil 372 | } 373 | } 374 | } 375 | 376 | /* 377 | // Copy from src to destination 378 | func (f *FS) Copy(ctx context.Context, src, des cloudstorage.Object) error { 379 | 380 | so, ok := src.(*object) 381 | if !ok { 382 | return fmt.Errorf("Copy source file expected s3 but got %T", src) 383 | } 384 | do, ok := des.(*object) 385 | if !ok { 386 | return fmt.Errorf("Copy destination expected s3 but got %T", des) 387 | } 388 | 389 | oh := so.b.Object(so.name) 390 | dh := do.b.Object(do.name) 391 | 392 | _, err := dh.CopierFrom(oh).Run(ctx) 393 | return err 394 | } 395 | 396 | // Move which is a Copy & Delete 397 | func (f *FS) Move(ctx context.Context, src, des cloudstorage.Object) error { 398 | 399 | so, ok := src.(*object) 400 | if !ok { 401 | return fmt.Errorf("Move source file expected s3 but got %T", src) 402 | } 403 | do, ok := des.(*object) 404 | if !ok { 405 | return fmt.Errorf("Move destination expected s3 but got %T", des) 406 | } 407 | 408 | oh := so.b.Object(so.name) 409 | dh := do.b.Object(des.name) 410 | 411 | if _, err := dh.CopierFrom(oh).Run(ctx); err != nil { 412 | return err 413 | } 414 | 415 | return oh.Delete(ctx) 416 | } 417 | */ 418 | 419 | // NewReader create file reader. 420 | func (f *FS) NewReader(o string) (io.ReadCloser, error) { 421 | return f.NewReaderWithContext(context.Background(), o) 422 | } 423 | 424 | // NewReaderWithContext create new File reader with context. 425 | func (f *FS) NewReaderWithContext(ctx context.Context, objectname string) (io.ReadCloser, error) { 426 | res, err := f.client.GetObjectWithContext(ctx, &s3.GetObjectInput{ 427 | Key: aws.String(objectname), 428 | Bucket: aws.String(f.bucket), 429 | }) 430 | if err != nil { 431 | // translate the string error to typed error 432 | if strings.Contains(err.Error(), "NoSuchKey") { 433 | return nil, cloudstorage.ErrObjectNotFound 434 | } 435 | return nil, err 436 | } 437 | return res.Body, nil 438 | } 439 | 440 | // NewWriter create Object Writer. 441 | func (f *FS) NewWriter(objectName string, metadata map[string]string) (io.WriteCloser, error) { 442 | return f.NewWriterWithContext(context.Background(), objectName, metadata) 443 | } 444 | 445 | // NewWriterWithContext create writer with provided context and metadata. 446 | func (f *FS) NewWriterWithContext(ctx context.Context, objectName string, metadata map[string]string, opts ...cloudstorage.Opts) (io.WriteCloser, error) { 447 | if len(opts) > 0 && opts[0].IfNotExists { 448 | return nil, fmt.Errorf("options IfNotExists not supported for store type") 449 | } 450 | 451 | // Create an uploader with the session and default options 452 | uploader := s3manager.NewUploader(f.sess) 453 | 454 | pr, pw := io.Pipe() 455 | bw := csbufio.NewWriter(ctx, pw) 456 | 457 | go func() { 458 | // TODO: this needs to be managed, ie shutdown signals, close, handler err etc. 459 | 460 | // Upload the file to S3. 461 | _, err := uploader.UploadWithContext(ctx, &s3manager.UploadInput{ 462 | Bucket: aws.String(f.bucket), 463 | Key: aws.String(objectName), 464 | Body: pr, 465 | }) 466 | if err != nil { 467 | gou.Warnf("could not upload %v", err) 468 | } 469 | }() 470 | 471 | return bw, nil 472 | } 473 | 474 | // Delete requested object path string. 475 | func (f *FS) Delete(ctx context.Context, obj string) error { 476 | params := &s3.DeleteObjectInput{ 477 | Bucket: aws.String(f.bucket), 478 | Key: aws.String(obj), 479 | } 480 | 481 | _, err := f.client.DeleteObjectWithContext(ctx, params) 482 | if err != nil { 483 | return err 484 | } 485 | return nil 486 | } 487 | 488 | func newObject(f *FS, o *s3.Object) *object { 489 | obj := &object{ 490 | fs: f, 491 | name: *o.Key, 492 | bucket: f.bucket, 493 | cachepath: cloudstorage.CachePathObj(f.cachepath, *o.Key, f.ID), 494 | } 495 | if o.LastModified != nil { 496 | obj.updated = *o.LastModified 497 | } 498 | return obj 499 | } 500 | func newObjectFromHead(f *FS, name string, o *s3.HeadObjectOutput) *object { 501 | obj := &object{ 502 | fs: f, 503 | name: name, 504 | bucket: f.bucket, 505 | cachepath: cloudstorage.CachePathObj(f.cachepath, name, f.ID), 506 | } 507 | if o.LastModified != nil { 508 | obj.updated = *o.LastModified 509 | } 510 | // metadata? 511 | obj.metadata, _ = convertMetaData(o.Metadata) 512 | return obj 513 | } 514 | 515 | func (o *object) StorageSource() string { 516 | return StoreType 517 | } 518 | func (o *object) Name() string { 519 | return o.name 520 | } 521 | func (o *object) String() string { 522 | return o.name 523 | } 524 | func (o *object) Updated() time.Time { 525 | return o.updated 526 | } 527 | func (o *object) MetaData() map[string]string { 528 | return o.metadata 529 | } 530 | func (o *object) SetMetaData(meta map[string]string) { 531 | o.metadata = meta 532 | } 533 | 534 | func (o *object) Delete() error { 535 | return o.fs.Delete(context.Background(), o.name) 536 | } 537 | 538 | func (o *object) Open(accesslevel cloudstorage.AccessLevel) (*os.File, error) { 539 | if o.opened { 540 | return nil, fmt.Errorf("the store object is already opened. %s", o.name) 541 | } 542 | 543 | var errs []error = make([]error, 0) 544 | var cachedcopy *os.File = nil 545 | var err error 546 | var readonly = accesslevel == cloudstorage.ReadOnly 547 | 548 | err = os.MkdirAll(path.Dir(o.cachepath), 0775) 549 | if err != nil { 550 | return nil, fmt.Errorf("error occurred creating cachedcopy dir. cachepath=%s object=%s err=%v", o.cachepath, o.name, err) 551 | } 552 | 553 | err = cloudstorage.EnsureDir(o.cachepath) 554 | if err != nil { 555 | return nil, fmt.Errorf("error occurred creating cachedcopy's dir. cachepath=%s err=%v", o.cachepath, err) 556 | } 557 | 558 | cachedcopy, err = os.Create(o.cachepath) 559 | if err != nil { 560 | return nil, fmt.Errorf("error occurred creating file. local=%s err=%v", o.cachepath, err) 561 | } 562 | 563 | for try := 0; try < Retries; try++ { 564 | if o.o == nil { 565 | obj, err := o.fs.getS3OpenObject(context.Background(), o.name) 566 | if err != nil { 567 | if err == cloudstorage.ErrObjectNotFound { 568 | // New, this is fine 569 | } else { 570 | // lets re-try 571 | errs = append(errs, fmt.Errorf("error getting object err=%v", err)) 572 | cloudstorage.Backoff(try) 573 | continue 574 | } 575 | } 576 | 577 | if obj != nil { 578 | o.o = obj 579 | } 580 | } 581 | 582 | if o.o != nil { 583 | // we have a preexisting object, so lets download it.. 584 | defer o.o.Body.Close() 585 | 586 | if _, err := cachedcopy.Seek(0, os.SEEK_SET); err != nil { 587 | return nil, fmt.Errorf("error seeking to start of cachedcopy err=%v", err) //don't retry on local fs errors 588 | } 589 | 590 | _, err = io.Copy(cachedcopy, o.o.Body) 591 | if err != nil { 592 | errs = append(errs, fmt.Errorf("error coping bytes. err=%v", err)) 593 | //recreate the cachedcopy file incase it has incomplete data 594 | if err := os.Remove(o.cachepath); err != nil { 595 | return nil, fmt.Errorf("error resetting the cachedcopy err=%v", err) //don't retry on local fs errors 596 | } 597 | if cachedcopy, err = os.Create(o.cachepath); err != nil { 598 | return nil, fmt.Errorf("error creating a new cachedcopy file. local=%s err=%v", o.cachepath, err) 599 | } 600 | 601 | cloudstorage.Backoff(try) 602 | continue 603 | } 604 | } 605 | 606 | if readonly { 607 | cachedcopy.Close() 608 | cachedcopy, err = os.Open(o.cachepath) 609 | if err != nil { 610 | name := "unknown" 611 | if cachedcopy != nil { 612 | name = cachedcopy.Name() 613 | } 614 | return nil, fmt.Errorf("error opening file. local=%s object=%s tfile=%v err=%v", o.cachepath, o.name, name, err) 615 | } 616 | } else { 617 | if _, err := cachedcopy.Seek(0, os.SEEK_SET); err != nil { 618 | return nil, fmt.Errorf("error seeking to start of cachedcopy err=%v", err) //don't retry on local fs errors 619 | } 620 | } 621 | 622 | o.cachedcopy = cachedcopy 623 | o.readonly = readonly 624 | o.opened = true 625 | return o.cachedcopy, nil 626 | } 627 | 628 | return nil, fmt.Errorf("fetch error retry cnt reached: obj=%s tfile=%v errs:[%v]", o.name, o.cachepath, errs) 629 | } 630 | 631 | // File get the current file handle for cached copy. 632 | func (o *object) File() *os.File { 633 | return o.cachedcopy 634 | } 635 | 636 | // Read bytes from underlying/cached file 637 | func (o *object) Read(p []byte) (n int, err error) { 638 | return o.cachedcopy.Read(p) 639 | } 640 | 641 | // Write bytes to local file, will be synced on close/sync. 642 | func (o *object) Write(p []byte) (n int, err error) { 643 | if o.cachedcopy == nil { 644 | _, err := o.Open(cloudstorage.ReadWrite) 645 | if err != nil { 646 | return 0, err 647 | } 648 | } 649 | return o.cachedcopy.Write(p) 650 | } 651 | 652 | // Sync syncs any changes in file up to s3. 653 | func (o *object) Sync() error { 654 | 655 | if !o.opened { 656 | return fmt.Errorf("object isn't opened object:%s", o.name) 657 | } 658 | if o.readonly { 659 | return fmt.Errorf("trying to Sync a readonly object:%s", o.name) 660 | } 661 | 662 | cachedcopy, err := os.OpenFile(o.cachepath, os.O_RDWR, 0664) 663 | if err != nil { 664 | return fmt.Errorf("couldn't open localfile for sync'ing. local=%s err=%v", o.cachepath, err) 665 | } 666 | defer cachedcopy.Close() 667 | 668 | // Create an uploader with the session and default options 669 | uploader := s3manager.NewUploader(o.fs.sess) 670 | 671 | if _, err := cachedcopy.Seek(0, os.SEEK_SET); err != nil { 672 | return fmt.Errorf("error seeking to start of cachedcopy err=%v", err) //don't retry on local filesystem errors 673 | } 674 | 675 | // Upload the file to S3. 676 | _, err = uploader.Upload(&s3manager.UploadInput{ 677 | Bucket: aws.String(o.fs.bucket), 678 | Key: aws.String(o.name), 679 | Body: cachedcopy, 680 | }) 681 | if err != nil { 682 | gou.Warnf("could not upload %v", err) 683 | return fmt.Errorf("failed to upload file, %v", err) 684 | } 685 | return nil 686 | } 687 | 688 | // Close this object 689 | func (o *object) Close() error { 690 | if !o.opened { 691 | return nil 692 | } 693 | defer func() { 694 | os.Remove(o.cachepath) 695 | o.cachedcopy = nil 696 | o.opened = false 697 | }() 698 | 699 | if !o.readonly { 700 | err := o.cachedcopy.Sync() 701 | if err != nil { 702 | return err 703 | } 704 | } 705 | 706 | err := o.cachedcopy.Close() 707 | if err != nil { 708 | if !strings.Contains(err.Error(), os.ErrClosed.Error()) { 709 | return err 710 | } 711 | } 712 | 713 | if o.opened && !o.readonly { 714 | err := o.Sync() 715 | if err != nil { 716 | gou.Errorf("error on sync %v err=%v", o.cachepath, err) 717 | return err 718 | } 719 | } 720 | return nil 721 | } 722 | 723 | // Release this object, cleanup cached copy. 724 | func (o *object) Release() error { 725 | if o.cachedcopy != nil { 726 | gou.Infof("release %q vs %q", o.cachedcopy.Name(), o.cachepath) 727 | o.cachedcopy.Close() 728 | return os.Remove(o.cachepath) 729 | } 730 | os.Remove(o.cachepath) 731 | return nil 732 | } 733 | -------------------------------------------------------------------------------- /google/store.go: -------------------------------------------------------------------------------- 1 | package google 2 | 3 | import ( 4 | "bufio" 5 | "compress/gzip" 6 | "errors" 7 | "fmt" 8 | "io" 9 | "os" 10 | "path" 11 | "strconv" 12 | "strings" 13 | "time" 14 | 15 | "cloud.google.com/go/storage" 16 | "github.com/araddon/gou" 17 | "github.com/pborman/uuid" 18 | "golang.org/x/net/context" 19 | "google.golang.org/api/iterator" 20 | 21 | "github.com/lytics/cloudstorage" 22 | ) 23 | 24 | func init() { 25 | cloudstorage.Register(StoreType, provider) 26 | } 27 | func provider(conf *cloudstorage.Config) (cloudstorage.Store, error) { 28 | googleclient, err := NewGoogleClient(conf) 29 | if err != nil { 30 | return nil, err 31 | } 32 | return gcsCommonClient(googleclient.Client(), conf) 33 | } 34 | 35 | // StoreType = "gcs" 36 | const StoreType = "gcs" 37 | 38 | var ( 39 | // GCSRetries number of times to retry for GCS. 40 | GCSRetries int = 55 41 | 42 | // Ensure we implement ObjectIterator 43 | _ cloudstorage.ObjectIterator = (*objectIterator)(nil) 44 | compressionMime = "gzip" 45 | ) 46 | 47 | // GcsFS Simple wrapper for accessing smaller GCS files, it doesn't currently implement a 48 | // Reader/Writer interface so not useful for stream reading of large files yet. 49 | type GcsFS struct { 50 | gcs *storage.Client 51 | bucket string 52 | cachepath string 53 | PageSize int 54 | Id string 55 | enableCompression bool 56 | } 57 | 58 | // NewGCSStore Create Google Cloud Storage Store. 59 | func NewGCSStore(gcs *storage.Client, bucket, cachepath string, enableCompression bool, pagesize int) (*GcsFS, error) { 60 | err := os.MkdirAll(path.Dir(cachepath), 0775) 61 | if err != nil { 62 | return nil, fmt.Errorf("unable to create path. path=%s err=%v", cachepath, err) 63 | } 64 | 65 | uid := uuid.NewUUID().String() 66 | uid = strings.Replace(uid, "-", "", -1) 67 | 68 | return &GcsFS{ 69 | gcs: gcs, 70 | bucket: bucket, 71 | cachepath: cachepath, 72 | Id: uid, 73 | PageSize: pagesize, 74 | enableCompression: enableCompression, 75 | }, nil 76 | } 77 | 78 | // Type of store = "gcs" 79 | func (g *GcsFS) Type() string { 80 | return StoreType 81 | } 82 | 83 | // Client gets access to the underlying google cloud storage client. 84 | func (g *GcsFS) Client() interface{} { 85 | return g.gcs 86 | } 87 | 88 | // String function to provide gs://..../file path 89 | func (g *GcsFS) String() string { 90 | return fmt.Sprintf("gs://%s/", g.bucket) 91 | } 92 | 93 | func (g *GcsFS) gcsb() *storage.BucketHandle { 94 | return g.gcs.Bucket(g.bucket) 95 | } 96 | 97 | func (o *object) DisableCompression() { 98 | o.enableCompression = false 99 | } 100 | 101 | // NewObject of Type GCS. 102 | func (g *GcsFS) NewObject(objectname string) (cloudstorage.Object, error) { 103 | obj, err := g.Get(context.Background(), objectname) 104 | if err != nil && err != cloudstorage.ErrObjectNotFound { 105 | return nil, err 106 | } else if obj != nil { 107 | return nil, cloudstorage.ErrObjectExists 108 | } 109 | 110 | cf := cloudstorage.CachePathObj(g.cachepath, objectname, g.Id) 111 | 112 | return &object{ 113 | name: objectname, 114 | metadata: map[string]string{cloudstorage.ContentTypeKey: cloudstorage.ContentType(objectname)}, 115 | gcsb: g.gcsb(), 116 | bucket: g.bucket, 117 | cachedcopy: nil, 118 | cachepath: cf, 119 | enableCompression: g.enableCompression, 120 | }, nil 121 | } 122 | 123 | // Get Gets a single File Object 124 | func (g *GcsFS) Get(ctx context.Context, objectpath string) (cloudstorage.Object, error) { 125 | 126 | gobj, err := g.gcsb().Object(objectpath).Attrs(context.Background()) // .Objects(context.Background(), q) 127 | if err != nil { 128 | if strings.Contains(err.Error(), "doesn't exist") { 129 | return nil, cloudstorage.ErrObjectNotFound 130 | } 131 | return nil, err 132 | } 133 | 134 | if gobj == nil { 135 | return nil, cloudstorage.ErrObjectNotFound 136 | } 137 | 138 | return newObject(g, gobj), nil 139 | } 140 | 141 | // Objects returns an iterator over the objects in the google bucket that match the Query q. 142 | // If q is nil, no filtering is done. 143 | func (g *GcsFS) Objects(ctx context.Context, csq cloudstorage.Query) (cloudstorage.ObjectIterator, error) { 144 | var q = &storage.Query{Prefix: csq.Prefix} 145 | if csq.StartOffset != "" { 146 | q.StartOffset = csq.StartOffset 147 | } 148 | if csq.EndOffset != "" { 149 | q.EndOffset = csq.EndOffset 150 | } 151 | iter := g.gcsb().Objects(ctx, q) 152 | return &objectIterator{g, ctx, iter}, nil 153 | } 154 | 155 | // List returns an iterator over the objects in the google bucket that match the Query q. 156 | // If q is nil, no filtering is done. 157 | func (g *GcsFS) List(ctx context.Context, csq cloudstorage.Query) (*cloudstorage.ObjectsResponse, error) { 158 | iter, err := g.Objects(ctx, csq) 159 | if err != nil { 160 | return nil, err 161 | } 162 | return cloudstorage.ObjectResponseFromIter(iter) 163 | } 164 | 165 | // Folders get folders list. 166 | func (g *GcsFS) Folders(ctx context.Context, csq cloudstorage.Query) ([]string, error) { 167 | var q = &storage.Query{Delimiter: csq.Delimiter, Prefix: csq.Prefix} 168 | iter := g.gcsb().Objects(ctx, q) 169 | folders := make([]string, 0) 170 | for { 171 | select { 172 | case <-ctx.Done(): 173 | // If has been closed 174 | return folders, ctx.Err() 175 | default: 176 | o, err := iter.Next() 177 | if err == nil { 178 | if o.Prefix != "" { 179 | folders = append(folders, o.Prefix) 180 | } 181 | } else if err == iterator.Done { 182 | return folders, nil 183 | } else if err == context.Canceled || err == context.DeadlineExceeded { 184 | // Return to user 185 | return nil, err 186 | } 187 | } 188 | } 189 | } 190 | 191 | // Copy from src to destination 192 | func (g *GcsFS) Copy(ctx context.Context, src, des cloudstorage.Object) error { 193 | 194 | srcgcs, ok := src.(*object) 195 | if !ok { 196 | return fmt.Errorf("Copy source file expected GCS but got %T", src) 197 | } 198 | desgcs, ok := des.(*object) 199 | if !ok { 200 | return fmt.Errorf("Copy destination expected GCS but got %T", des) 201 | } 202 | 203 | oh := srcgcs.gcsb.Object(srcgcs.name) 204 | dh := desgcs.gcsb.Object(desgcs.name) 205 | 206 | _, err := dh.CopierFrom(oh).Run(ctx) 207 | return err 208 | } 209 | 210 | // Move which is a Copy & Delete 211 | func (g *GcsFS) Move(ctx context.Context, src, des cloudstorage.Object) error { 212 | 213 | srcgcs, ok := src.(*object) 214 | if !ok { 215 | return fmt.Errorf("Move source file expected GCS but got %T", src) 216 | } 217 | desgcs, ok := des.(*object) 218 | if !ok { 219 | return fmt.Errorf("Move destination expected GCS but got %T", des) 220 | } 221 | 222 | oh := srcgcs.gcsb.Object(srcgcs.name) 223 | dh := desgcs.gcsb.Object(desgcs.name) 224 | 225 | if _, err := dh.CopierFrom(oh).Run(ctx); err != nil { 226 | return err 227 | } 228 | 229 | return oh.Delete(ctx) 230 | } 231 | 232 | // NewReader create GCS file reader. 233 | func (g *GcsFS) NewReader(o string) (io.ReadCloser, error) { 234 | return g.NewReaderWithContext(context.Background(), o) 235 | } 236 | 237 | // NewReaderWithContext create new GCS File reader with context. 238 | func (g *GcsFS) NewReaderWithContext(ctx context.Context, o string) (io.ReadCloser, error) { 239 | obj := g.gcsb().Object(o).ReadCompressed(true) 240 | attrs, err := obj.Attrs(ctx) 241 | if errors.Is(err, storage.ErrObjectNotExist) { 242 | return nil, cloudstorage.ErrObjectNotFound 243 | } else if err != nil { 244 | return nil, err 245 | } 246 | // we check ContentType here because files uploaded compressed without an 247 | // explicit ContentType set get autodetected as "application/x-gzip" instead 248 | // of "application/octet-stream", but files with the gzip ContentType get 249 | // auto-decompressed regardless of your Accept-Encoding header 250 | if attrs.ContentEncoding == compressionMime && attrs.ContentType != "application/x-gzip" { 251 | rc, err := obj.NewReader(ctx) 252 | if errors.Is(err, storage.ErrObjectNotExist) { 253 | return nil, cloudstorage.ErrObjectNotFound 254 | } else if err != nil { 255 | return nil, err 256 | } 257 | gr, err := gzip.NewReader(rc) 258 | if err != nil { 259 | return nil, err 260 | } 261 | return gr, err 262 | } 263 | 264 | rc, err := obj.NewReader(ctx) 265 | if errors.Is(err, storage.ErrObjectNotExist) { 266 | return rc, cloudstorage.ErrObjectNotFound 267 | } 268 | return rc, err 269 | } 270 | 271 | // NewWriter create GCS Object Writer. 272 | func (g *GcsFS) NewWriter(o string, metadata map[string]string) (io.WriteCloser, error) { 273 | return g.NewWriterWithContext(context.Background(), o, metadata) 274 | } 275 | 276 | type gzipWriteCloser struct { 277 | ctx context.Context 278 | w io.WriteCloser 279 | c io.Closer 280 | } 281 | 282 | // newGZIPWriteCloser is a io.WriteCloser that closes both the gzip writer and also the passed in writer 283 | func newGZIPWriteCloser(ctx context.Context, rc io.WriteCloser) io.WriteCloser { 284 | return &gzipWriteCloser{ctx, gzip.NewWriter(rc), rc} 285 | } 286 | 287 | func (b *gzipWriteCloser) Write(p []byte) (int, error) { 288 | if err := b.ctx.Err(); err != nil { 289 | return 0, err 290 | } 291 | return b.w.Write(p) 292 | } 293 | 294 | func (b *gzipWriteCloser) Close() error { 295 | if err := b.ctx.Err(); err != nil { 296 | return err 297 | } 298 | if err := b.w.Close(); err != nil { 299 | return err 300 | } 301 | return b.c.Close() 302 | } 303 | 304 | // NewWriterWithContext create writer with provided context and metadata. 305 | func (g *GcsFS) NewWriterWithContext(ctx context.Context, o string, metadata map[string]string, opts ...cloudstorage.Opts) (io.WriteCloser, error) { 306 | obj := g.gcsb().Object(o) 307 | disableCompression := false 308 | if len(opts) > 0 { 309 | if opts[0].DisableCompression { 310 | disableCompression = true 311 | } 312 | if opts[0].IfNotExists { 313 | obj = obj.If(storage.Conditions{DoesNotExist: true}) 314 | } 315 | } 316 | wc := obj.NewWriter(ctx) 317 | if metadata != nil { 318 | wc.Metadata = metadata 319 | //contenttype is only used for viewing the file in a browser. (i.e. the GCS Object browser). 320 | ctype := cloudstorage.EnsureContextType(o, metadata) 321 | wc.ContentType = ctype 322 | } 323 | if g.enableCompression && !disableCompression { 324 | wc.ContentEncoding = compressionMime 325 | return newGZIPWriteCloser(ctx, wc), nil 326 | } 327 | return wc, nil 328 | } 329 | 330 | // Delete requested object path string. 331 | func (g *GcsFS) Delete(ctx context.Context, obj string) error { 332 | err := g.gcsb().Object(obj).Delete(ctx) 333 | if err != nil { 334 | return err 335 | } 336 | return nil 337 | } 338 | 339 | // objectIterator iterator to match store interface for iterating 340 | // through all GcsObjects that matched query. 341 | type objectIterator struct { 342 | g *GcsFS 343 | ctx context.Context 344 | iter *storage.ObjectIterator 345 | } 346 | 347 | func (*objectIterator) Close() {} 348 | 349 | // Next iterator to go to next object or else returns error for done. 350 | func (it *objectIterator) Next() (cloudstorage.Object, error) { 351 | retryCt := 0 352 | for { 353 | select { 354 | case <-it.ctx.Done(): 355 | // If has been closed 356 | return nil, it.ctx.Err() 357 | default: 358 | o, err := it.iter.Next() 359 | if err == nil { 360 | return newObject(it.g, o), nil 361 | } else if err == iterator.Done { 362 | return nil, err 363 | } else if err == context.Canceled || err == context.DeadlineExceeded { 364 | // Return to user 365 | return nil, err 366 | } 367 | if retryCt < 5 { 368 | cloudstorage.Backoff(retryCt) 369 | } else { 370 | return nil, err 371 | } 372 | retryCt++ 373 | } 374 | } 375 | } 376 | 377 | type object struct { 378 | name string 379 | updated time.Time 380 | metadata map[string]string 381 | googleObject *storage.ObjectAttrs 382 | gcsb *storage.BucketHandle 383 | bucket string 384 | cachedcopy *os.File 385 | readonly bool 386 | opened bool 387 | cachepath string 388 | enableCompression bool 389 | } 390 | 391 | func newObject(g *GcsFS, o *storage.ObjectAttrs) *object { 392 | metadata := o.Metadata 393 | if metadata == nil { 394 | metadata = make(map[string]string) 395 | } 396 | metadata["content_length"] = strconv.FormatInt(o.Size, 10) 397 | metadata["attrs_content_type"] = o.ContentType 398 | metadata["attrs_cache_control"] = o.CacheControl 399 | metadata["content_encoding"] = o.ContentEncoding 400 | 401 | return &object{ 402 | name: o.Name, 403 | updated: o.Updated, 404 | metadata: metadata, 405 | gcsb: g.gcsb(), 406 | bucket: g.bucket, 407 | cachepath: cloudstorage.CachePathObj(g.cachepath, o.Name, g.Id), 408 | enableCompression: g.enableCompression, 409 | } 410 | } 411 | func (o *object) StorageSource() string { 412 | return StoreType 413 | } 414 | func (o *object) Name() string { 415 | return o.name 416 | } 417 | func (o *object) String() string { 418 | return o.name 419 | } 420 | func (o *object) Updated() time.Time { 421 | return o.updated 422 | } 423 | func (o *object) MetaData() map[string]string { 424 | return o.metadata 425 | } 426 | func (o *object) SetMetaData(meta map[string]string) { 427 | o.metadata = meta 428 | } 429 | 430 | func (o *object) Delete() error { 431 | o.Release() 432 | return o.gcsb.Object(o.name).Delete(context.Background()) 433 | } 434 | 435 | func (o *object) Open(accesslevel cloudstorage.AccessLevel) (*os.File, error) { 436 | if o.opened { 437 | return nil, fmt.Errorf("the store object is already opened. %s", o.name) 438 | } 439 | 440 | var errs []error = make([]error, 0) 441 | var cachedcopy *os.File = nil 442 | var err error 443 | var readonly = accesslevel == cloudstorage.ReadOnly 444 | 445 | err = os.MkdirAll(path.Dir(o.cachepath), 0775) 446 | if err != nil { 447 | return nil, fmt.Errorf("error occurred creating cachedcopy dir. cachepath=%s object=%s err=%v", 448 | o.cachepath, o.name, err) 449 | } 450 | 451 | err = cloudstorage.EnsureDir(o.cachepath) 452 | if err != nil { 453 | return nil, fmt.Errorf("error occurred creating cachedcopy's dir. cachepath=%s err=%v", 454 | o.cachepath, err) 455 | } 456 | 457 | cachedcopy, err = os.Create(o.cachepath) 458 | if err != nil { 459 | return nil, fmt.Errorf("error occurred creating file. local=%s err=%v", 460 | o.cachepath, err) 461 | } 462 | 463 | for try := 0; try < GCSRetries; try++ { 464 | if o.googleObject == nil { 465 | gobj, err := o.gcsb.Object(o.name).Attrs(context.Background()) 466 | if err != nil { 467 | if strings.Contains(err.Error(), "doesn't exist") { 468 | // New, this is fine 469 | } else { 470 | errs = append(errs, fmt.Errorf("error storage.NewReader err=%v", err)) 471 | cloudstorage.Backoff(try) 472 | continue 473 | } 474 | } 475 | 476 | if gobj != nil { 477 | o.googleObject = gobj 478 | } 479 | } 480 | 481 | if o.googleObject != nil { 482 | //we have a preexisting object, so lets download it.. 483 | rc, err := o.gcsb.Object(o.name).ReadCompressed(true).NewReader(context.Background()) 484 | if err != nil { 485 | errs = append(errs, fmt.Errorf("error storage.NewReader err=%v", err)) 486 | cloudstorage.Backoff(try) 487 | continue 488 | } 489 | defer rc.Close() 490 | 491 | if _, err := cachedcopy.Seek(0, io.SeekStart); err != nil { 492 | return nil, fmt.Errorf("error seeking to start of cachedcopy err=%v", err) // don't retry on local fs errors 493 | } 494 | 495 | var writtenBytes int64 496 | // we check ContentType here because files uploaded compressed without an 497 | // explicit ContentType set get autodetected as "application/x-gzip" instead 498 | // of "application/octet-stream", but files with the gzip ContentType get 499 | // auto-decompressed regardless of your Accept-Encoding header 500 | if o.googleObject.ContentEncoding == compressionMime && o.googleObject.ContentType != "application/x-gzip" { 501 | cr, err := gzip.NewReader(rc) 502 | if err != nil { 503 | return nil, fmt.Errorf("error decompressing data err=%v", err) // don't retry on decompression errors 504 | } 505 | writtenBytes, err = io.Copy(cachedcopy, cr) 506 | if err != nil && (strings.HasPrefix(err.Error(), "gzip: ")) { 507 | return nil, fmt.Errorf("error copying/decompressing data err=%v", err) // don't retry on decompression errors 508 | } 509 | } else { 510 | writtenBytes, err = io.Copy(cachedcopy, rc) 511 | } 512 | if err != nil { 513 | errs = append(errs, fmt.Errorf("error coping bytes. err=%v", err)) 514 | //recreate the cachedcopy file incase it has incomplete data 515 | if err := os.Remove(o.cachepath); err != nil { 516 | return nil, fmt.Errorf("error resetting the cachedcopy err=%v", err) //don't retry on local fs errors 517 | } 518 | if cachedcopy, err = os.Create(o.cachepath); err != nil { 519 | return nil, fmt.Errorf("error creating a new cachedcopy file. local=%s err=%v", o.cachepath, err) 520 | } 521 | 522 | cloudstorage.Backoff(try) 523 | continue 524 | } 525 | 526 | if o.googleObject.ContentEncoding != compressionMime { // compression checks crc 527 | // make sure the whole object was downloaded from google 528 | if contentLength, ok := o.metadata["content_length"]; ok { 529 | if contentLengthInt, err := strconv.ParseInt(contentLength, 10, 64); err == nil { 530 | if contentLengthInt != writtenBytes { 531 | return nil, fmt.Errorf("partial file download error. tfile=%v", o.name) 532 | } 533 | } else { 534 | return nil, fmt.Errorf("content_length is not a number. tfile=%v", o.name) 535 | } 536 | } 537 | } 538 | } 539 | 540 | if readonly { 541 | cachedcopy.Close() 542 | cachedcopy, err = os.Open(o.cachepath) 543 | if err != nil { 544 | name := "unknown" 545 | if cachedcopy != nil { 546 | name = cachedcopy.Name() 547 | } 548 | return nil, fmt.Errorf("error opening file. local=%s object=%s tfile=%v err=%v", o.cachepath, o.name, name, err) 549 | } 550 | } else { 551 | if _, err := cachedcopy.Seek(0, os.SEEK_SET); err != nil { 552 | return nil, fmt.Errorf("error seeking to start of cachedcopy err=%v", err) //don't retry on local fs errors 553 | } 554 | } 555 | 556 | o.cachedcopy = cachedcopy 557 | o.readonly = readonly 558 | o.opened = true 559 | return o.cachedcopy, nil 560 | } 561 | 562 | return nil, fmt.Errorf("fetch error retry cnt reached: obj=%s tfile=%v errs:[%v]", o.name, o.cachepath, errs) 563 | } 564 | 565 | func (o *object) File() *os.File { 566 | return o.cachedcopy 567 | } 568 | func (o *object) Read(p []byte) (n int, err error) { 569 | return o.cachedcopy.Read(p) 570 | } 571 | func (o *object) Write(p []byte) (n int, err error) { 572 | if o.cachedcopy == nil { 573 | _, err := o.Open(cloudstorage.ReadWrite) 574 | if err != nil { 575 | return 0, err 576 | } 577 | } 578 | return o.cachedcopy.Write(p) 579 | } 580 | 581 | func (o *object) Sync() error { 582 | 583 | if !o.opened { 584 | return fmt.Errorf("object isn't opened object:%s", o.name) 585 | } 586 | if o.readonly { 587 | return fmt.Errorf("trying to Sync a readonly object:%s", o.name) 588 | } 589 | 590 | var errs = make([]string, 0) 591 | 592 | cachedcopy, err := os.OpenFile(o.cachepath, os.O_RDWR, 0664) 593 | if err != nil { 594 | return fmt.Errorf("couldn't open localfile for sync'ing. local=%s err=%v", 595 | o.cachepath, err) 596 | } 597 | defer cachedcopy.Close() 598 | 599 | for try := 0; try < GCSRetries; try++ { 600 | if _, err := cachedcopy.Seek(0, os.SEEK_SET); err != nil { 601 | return fmt.Errorf("error seeking to start of cachedcopy err=%v", err) //don't retry on local filesystem errors 602 | } 603 | rd := bufio.NewReader(cachedcopy) 604 | 605 | wc := o.gcsb.Object(o.name).NewWriter(context.Background()) 606 | 607 | if o.metadata != nil { 608 | wc.Metadata = o.metadata 609 | //contenttype is only used for viewing the file in a browser. (i.e. the GCS Object browser). 610 | ctype := cloudstorage.EnsureContextType(o.name, o.metadata) 611 | wc.ContentType = ctype 612 | } 613 | 614 | if o.enableCompression { 615 | wc.ContentEncoding = compressionMime 616 | cw := gzip.NewWriter(wc) 617 | if _, err = io.Copy(cw, rd); err != nil { 618 | errs = append(errs, fmt.Sprintf("copy to remote object error:%v", err)) 619 | cloudstorage.Backoff(try) 620 | continue 621 | } 622 | 623 | if err = cw.Close(); err != nil { 624 | errs = append(errs, fmt.Sprintf("close compression writer error:%v", err)) 625 | cloudstorage.Backoff(try) 626 | continue 627 | } 628 | 629 | if err = wc.Close(); err != nil { 630 | errs = append(errs, fmt.Sprintf("Close writer error:%v", err)) 631 | cloudstorage.Backoff(try) 632 | continue 633 | } 634 | } else { 635 | if _, err = io.Copy(wc, rd); err != nil { 636 | errs = append(errs, fmt.Sprintf("copy to remote object error:%v", err)) 637 | err2 := wc.CloseWithError(err) 638 | if err2 != nil { 639 | errs = append(errs, fmt.Sprintf("CloseWithError error:%v", err2)) 640 | } 641 | cloudstorage.Backoff(try) 642 | continue 643 | } 644 | 645 | if err = wc.Close(); err != nil { 646 | errs = append(errs, fmt.Sprintf("close gcs writer error:%v", err)) 647 | cloudstorage.Backoff(try) 648 | continue 649 | } 650 | } 651 | 652 | return nil 653 | } 654 | 655 | errmsg := strings.Join(errs, ",") 656 | return fmt.Errorf("GCS sync error after retry: (oname=%s cpath:%v) errors[%v]", o.name, o.cachepath, errmsg) 657 | } 658 | 659 | func (o *object) Close() error { 660 | if !o.opened { 661 | return nil 662 | } 663 | defer func() { 664 | os.Remove(o.cachepath) 665 | o.cachedcopy = nil 666 | o.opened = false 667 | }() 668 | 669 | if !o.readonly { 670 | err := o.cachedcopy.Sync() 671 | if err != nil { 672 | return err 673 | } 674 | } 675 | 676 | err := o.cachedcopy.Close() 677 | if err != nil { 678 | if !strings.Contains(err.Error(), "already closed") { 679 | gou.Warnf("error closing cached copy %v", err) 680 | return fmt.Errorf("error on sync and closing localfile. %q err=%v", o.cachepath, err) 681 | } 682 | } 683 | 684 | if o.opened && !o.readonly { 685 | err := o.Sync() 686 | if err != nil { 687 | return err 688 | } 689 | } 690 | 691 | return nil 692 | } 693 | 694 | func (o *object) Release() error { 695 | if o.cachedcopy != nil { 696 | gou.Debugf("release %q vs %q", o.cachedcopy.Name(), o.cachepath) 697 | o.cachedcopy.Close() 698 | o.cachedcopy = nil 699 | o.opened = false 700 | return os.Remove(o.cachepath) 701 | } 702 | // most likely this doesn't exist so don't return error 703 | os.Remove(o.cachepath) 704 | return nil 705 | } 706 | -------------------------------------------------------------------------------- /azure/store.go: -------------------------------------------------------------------------------- 1 | package azure 2 | 3 | import ( 4 | "bufio" 5 | "encoding/base64" 6 | "encoding/binary" 7 | "fmt" 8 | "io" 9 | "os" 10 | "path" 11 | "strings" 12 | "time" 13 | 14 | az "github.com/Azure/azure-sdk-for-go/storage" 15 | "github.com/araddon/gou" 16 | "github.com/lytics/cloudstorage" 17 | "github.com/pborman/uuid" 18 | "golang.org/x/net/context" 19 | "golang.org/x/sync/errgroup" 20 | ) 21 | 22 | const ( 23 | // StoreType = "azure" this is used to define the storage type to create 24 | // from cloudstorage.NewStore(config) 25 | StoreType = "azure" 26 | 27 | // Configuration Keys. These are the names of keys 28 | // to look for in the json map[string]string to extract for config. 29 | 30 | // ConfKeyAuthKey config key name of the azure api key for auth 31 | ConfKeyAuthKey = "azure_key" 32 | 33 | // Authentication Source's 34 | 35 | // AuthKey is for using azure api key 36 | AuthKey cloudstorage.AuthMethod = "azure_key" 37 | ) 38 | 39 | var ( 40 | // Retries number of times to retry upon failures. 41 | Retries = 3 42 | // PageSize is default page size 43 | PageSize = 2000 44 | 45 | // ErrNoAzureSession no valid session 46 | ErrNoAzureSession = fmt.Errorf("no valid azure session was created") 47 | // ErrNoAccessKey error for no azure_key 48 | ErrNoAccessKey = fmt.Errorf("no settings.azure_key") 49 | // ErrNoAuth error for no findable auth 50 | ErrNoAuth = fmt.Errorf("No auth provided") 51 | ) 52 | 53 | func init() { 54 | // Register this Driver (azure) in cloudstorage driver registry. 55 | cloudstorage.Register(StoreType, func(conf *cloudstorage.Config) (cloudstorage.Store, error) { 56 | client, sess, err := NewClient(conf) 57 | if err != nil { 58 | return nil, err 59 | } 60 | return NewStore(client, sess, conf) 61 | }) 62 | } 63 | 64 | type ( 65 | // FS Simple wrapper for accessing azure blob files, it doesn't currently implement a 66 | // Reader/Writer interface so not useful for stream reading of large files yet. 67 | FS struct { 68 | PageSize int 69 | ID string 70 | baseClient *az.Client 71 | client *az.BlobStorageClient 72 | endpoint string 73 | bucket string 74 | cachepath string 75 | } 76 | 77 | object struct { 78 | fs *FS 79 | o *az.Blob 80 | cachedcopy *os.File 81 | rc io.ReadCloser 82 | 83 | name string // aka "id" in azure 84 | updated time.Time // LastModified in azure 85 | metadata map[string]string 86 | bucket string 87 | readonly bool 88 | opened bool 89 | cachepath string 90 | 91 | //infoOnce sync.Once 92 | infoErr error 93 | } 94 | ) 95 | 96 | // NewClient create new AWS s3 Client. Uses cloudstorage.Config to read 97 | // necessary config settings such as bucket, region, auth. 98 | func NewClient(conf *cloudstorage.Config) (*az.Client, *az.BlobStorageClient, error) { 99 | 100 | switch conf.AuthMethod { 101 | case AuthKey: 102 | accessKey := conf.Settings.String(ConfKeyAuthKey) 103 | if accessKey == "" { 104 | return nil, nil, ErrNoAccessKey 105 | } 106 | basicClient, err := az.NewBasicClient(conf.Project, accessKey) 107 | if err != nil { 108 | gou.Warnf("could not get azure client %v", err) 109 | return nil, nil, err 110 | } 111 | client := basicClient.GetBlobService() 112 | return &basicClient, &client, err 113 | } 114 | 115 | return nil, nil, ErrNoAuth 116 | } 117 | 118 | // NewStore Create AWS S3 storage client of type cloudstorage.Store 119 | func NewStore(c *az.Client, blobClient *az.BlobStorageClient, conf *cloudstorage.Config) (*FS, error) { 120 | 121 | if conf.TmpDir == "" { 122 | return nil, fmt.Errorf("unable to create cachepath. config.tmpdir=%q", conf.TmpDir) 123 | } 124 | 125 | err := os.MkdirAll(conf.TmpDir, 0775) 126 | if err != nil { 127 | return nil, fmt.Errorf("unable to create cachepath. config.tmpdir=%q err=%v", conf.TmpDir, err) 128 | } 129 | 130 | uid := uuid.NewUUID().String() 131 | uid = strings.Replace(uid, "-", "", -1) 132 | 133 | return &FS{ 134 | baseClient: c, 135 | client: blobClient, 136 | bucket: conf.Bucket, 137 | cachepath: conf.TmpDir, 138 | ID: uid, 139 | PageSize: 10000, 140 | }, nil 141 | } 142 | 143 | // Type of store = "azure" 144 | func (f *FS) Type() string { 145 | return StoreType 146 | } 147 | 148 | // Client gets access to the underlying google cloud storage client. 149 | func (f *FS) Client() interface{} { 150 | return f.client 151 | } 152 | 153 | // String function to provide azure://..../file path 154 | func (f *FS) String() string { 155 | return fmt.Sprintf("azure://%s/", f.bucket) 156 | } 157 | 158 | // NewObject of Type azure. 159 | func (f *FS) NewObject(objectname string) (cloudstorage.Object, error) { 160 | obj, err := f.Get(context.Background(), objectname) 161 | if err != nil && err != cloudstorage.ErrObjectNotFound { 162 | return nil, err 163 | } else if obj != nil { 164 | return nil, cloudstorage.ErrObjectExists 165 | } 166 | 167 | cf := cloudstorage.CachePathObj(f.cachepath, objectname, f.ID) 168 | 169 | return &object{ 170 | fs: f, 171 | name: objectname, 172 | metadata: map[string]string{cloudstorage.ContentTypeKey: cloudstorage.ContentType(objectname)}, 173 | bucket: f.bucket, 174 | cachedcopy: nil, 175 | cachepath: cf, 176 | }, nil 177 | } 178 | 179 | func (o *object) DisableCompression() {} 180 | 181 | // Get a single File Object 182 | func (f *FS) Get(ctx context.Context, objectpath string) (cloudstorage.Object, error) { 183 | 184 | obj, err := f.getObject(ctx, objectpath) 185 | if err != nil { 186 | return nil, err 187 | } else if obj == nil { 188 | return nil, cloudstorage.ErrObjectNotFound 189 | } 190 | 191 | return obj, nil 192 | } 193 | 194 | // get single object 195 | func (f *FS) getObject(ctx context.Context, objectname string) (*object, error) { 196 | 197 | blob := f.client.GetContainerReference(f.bucket).GetBlobReference(objectname) 198 | err := blob.GetProperties(nil) 199 | if err != nil { 200 | if strings.Contains(err.Error(), "404") { 201 | return nil, cloudstorage.ErrObjectNotFound 202 | } 203 | return nil, err 204 | } 205 | o := &object{ 206 | name: objectname, 207 | fs: f, 208 | o: blob, 209 | } 210 | 211 | o.o.Properties.Etag = cloudstorage.CleanETag(o.o.Properties.Etag) 212 | o.updated = time.Time(o.o.Properties.LastModified) 213 | o.cachepath = cloudstorage.CachePathObj(f.cachepath, o.name, f.ID) 214 | 215 | return o, nil 216 | //return newObjectFromHead(f, objectname, res), nil 217 | } 218 | 219 | func (f *FS) getOpenObject(ctx context.Context, objectname string) (io.ReadCloser, error) { 220 | rc, err := f.client.GetContainerReference(f.bucket).GetBlobReference(objectname).Get(nil) 221 | if err != nil && strings.Contains(err.Error(), "404") { 222 | return nil, cloudstorage.ErrObjectNotFound 223 | } else if err != nil { 224 | return nil, err 225 | } 226 | return rc, nil 227 | } 228 | 229 | func convertMetaData(m map[string]*string) (map[string]string, error) { 230 | result := make(map[string]string, len(m)) 231 | for key, value := range m { 232 | if value != nil { 233 | result[strings.ToLower(key)] = *value 234 | } else { 235 | result[strings.ToLower(key)] = "" 236 | } 237 | 238 | } 239 | return result, nil 240 | } 241 | 242 | // List objects from this store. 243 | func (f *FS) List(ctx context.Context, q cloudstorage.Query) (*cloudstorage.ObjectsResponse, error) { 244 | 245 | itemLimit := uint(f.PageSize) 246 | if q.PageSize > 0 { 247 | itemLimit = uint(q.PageSize) 248 | } 249 | 250 | params := az.ListBlobsParameters{ 251 | Prefix: q.Prefix, 252 | MaxResults: itemLimit, 253 | Marker: q.Marker, 254 | } 255 | 256 | blobs, err := f.client.GetContainerReference(f.bucket).ListBlobs(params) 257 | if err != nil { 258 | return nil, err 259 | } 260 | objResp := &cloudstorage.ObjectsResponse{ 261 | Objects: make(cloudstorage.Objects, len(blobs.Blobs)), 262 | } 263 | 264 | for i, o := range blobs.Blobs { 265 | objResp.Objects[i] = newObject(f, &o) 266 | } 267 | objResp.NextMarker = blobs.NextMarker 268 | q.Marker = blobs.NextMarker 269 | 270 | return objResp, nil 271 | } 272 | 273 | // Objects returns an iterator over the objects in the google bucket that match the Query q. 274 | // If q is nil, no filtering is done. 275 | func (f *FS) Objects(ctx context.Context, q cloudstorage.Query) (cloudstorage.ObjectIterator, error) { 276 | return cloudstorage.NewObjectPageIterator(ctx, f, q), nil 277 | } 278 | 279 | // Folders get folders list. 280 | func (f *FS) Folders(ctx context.Context, q cloudstorage.Query) ([]string, error) { 281 | 282 | q.Delimiter = "/" 283 | 284 | // Think we should just put 1 here right? 285 | itemLimit := uint(f.PageSize) 286 | if q.PageSize > 0 { 287 | itemLimit = uint(q.PageSize) 288 | } 289 | 290 | params := az.ListBlobsParameters{ 291 | Prefix: q.Prefix, 292 | MaxResults: itemLimit, 293 | Delimiter: "/", 294 | } 295 | 296 | for { 297 | select { 298 | case <-ctx.Done(): 299 | // If has been closed 300 | return nil, ctx.Err() 301 | default: 302 | // if q.Marker != "" { 303 | // params.Marker = &q.Marker 304 | // } 305 | blobs, err := f.client.GetContainerReference(f.bucket).ListBlobs(params) 306 | if err != nil { 307 | gou.Warnf("leaving %v", err) 308 | return nil, err 309 | } 310 | if len(blobs.BlobPrefixes) > 0 { 311 | return blobs.BlobPrefixes, nil 312 | } 313 | return nil, nil 314 | } 315 | } 316 | } 317 | 318 | /* 319 | // Copy from src to destination 320 | func (f *FS) Copy(ctx context.Context, src, des cloudstorage.Object) error { 321 | 322 | so, ok := src.(*object) 323 | if !ok { 324 | return fmt.Errorf("Copy source file expected s3 but got %T", src) 325 | } 326 | do, ok := des.(*object) 327 | if !ok { 328 | return fmt.Errorf("Copy destination expected s3 but got %T", des) 329 | } 330 | 331 | oh := so.b.Object(so.name) 332 | dh := do.b.Object(do.name) 333 | 334 | _, err := dh.CopierFrom(oh).Run(ctx) 335 | return err 336 | } 337 | 338 | // Move which is a Copy & Delete 339 | func (f *FS) Move(ctx context.Context, src, des cloudstorage.Object) error { 340 | 341 | so, ok := src.(*object) 342 | if !ok { 343 | return fmt.Errorf("Move source file expected s3 but got %T", src) 344 | } 345 | do, ok := des.(*object) 346 | if !ok { 347 | return fmt.Errorf("Move destination expected s3 but got %T", des) 348 | } 349 | 350 | oh := so.b.Object(so.name) 351 | dh := do.b.Object(des.name) 352 | 353 | if _, err := dh.CopierFrom(oh).Run(ctx); err != nil { 354 | return err 355 | } 356 | 357 | return oh.Delete(ctx) 358 | } 359 | */ 360 | // NewReader create file reader. 361 | func (f *FS) NewReader(o string) (io.ReadCloser, error) { 362 | return f.NewReaderWithContext(context.Background(), o) 363 | } 364 | 365 | // NewReaderWithContext create new File reader with context. 366 | func (f *FS) NewReaderWithContext(ctx context.Context, objectname string) (io.ReadCloser, error) { 367 | ioc, err := f.client.GetContainerReference(f.bucket).GetBlobReference(objectname).Get(nil) 368 | if err != nil { 369 | // translate the string error to typed error 370 | if strings.Contains(err.Error(), "404") { 371 | return nil, cloudstorage.ErrObjectNotFound 372 | } 373 | return nil, err 374 | } 375 | return ioc, nil 376 | } 377 | 378 | // NewWriter create Object Writer. 379 | func (f *FS) NewWriter(objectName string, metadata map[string]string) (io.WriteCloser, error) { 380 | return f.NewWriterWithContext(context.Background(), objectName, metadata) 381 | } 382 | 383 | // NewWriterWithContext create writer with provided context and metadata. 384 | func (f *FS) NewWriterWithContext(ctx context.Context, name string, metadata map[string]string, opts ...cloudstorage.Opts) (io.WriteCloser, error) { 385 | if len(opts) > 0 && opts[0].IfNotExists { 386 | return nil, fmt.Errorf("options IfNotExists not supported for store type") 387 | } 388 | name = strings.Replace(name, " ", "+", -1) 389 | o := &object{name: name, metadata: metadata} 390 | rwc := newAzureWriteCloser(ctx, f, o) 391 | 392 | return rwc, nil 393 | } 394 | 395 | // azureWriteCloser - manages data and go routines used to pipe data to azures, calling Close 396 | // will flush data to azures and block until all inflight data has been written or 397 | // we get an error. 398 | type azureWriteCloser struct { 399 | pr *io.PipeReader 400 | pw *io.PipeWriter 401 | wc *bufio.Writer 402 | g *errgroup.Group 403 | } 404 | 405 | // azureWriteCloser is a io.WriteCloser that manages the azure connection pipe and when Close is called 406 | // it blocks until all data is flushed to azure via a background go routine call to uploadMultiPart. 407 | func newAzureWriteCloser(ctx context.Context, f *FS, obj *object) io.WriteCloser { 408 | pr, pw := io.Pipe() 409 | bw := bufio.NewWriter(pw) 410 | 411 | g, _ := errgroup.WithContext(ctx) 412 | 413 | g.Go(func() error { 414 | // Upload the file to azure. 415 | // Do a multipart upload 416 | err := f.uploadMultiPart(obj, pr) 417 | if err != nil { 418 | gou.Warnf("could not upload %v", err) 419 | return err 420 | } 421 | return nil 422 | }) 423 | 424 | return azureWriteCloser{ 425 | pr, pw, bw, g, 426 | } 427 | } 428 | 429 | // Write writes data to our write buffer, which writes to the backing io pipe. 430 | // If an error is encountered while writting we may not see it here, my guess is 431 | // we wouldn't see it until someone calls close and the error is returned from the 432 | // error group. 433 | func (bc azureWriteCloser) Write(p []byte) (nn int, err error) { 434 | return bc.wc.Write(p) 435 | } 436 | 437 | // Close and block until we flush inflight data to azures 438 | func (bc azureWriteCloser) Close() error { 439 | //Flush buffered data to the backing pipe writer. 440 | if err := bc.wc.Flush(); err != nil { 441 | return err 442 | } 443 | //Close the pipe writer so that the pipe reader will return EOF, 444 | // doing so will cause uploadMultiPart to complete and return. 445 | if err := bc.pw.Close(); err != nil { 446 | return err 447 | } 448 | //Use the error group's Wait method to block until uploadMultPart has completed 449 | if err := bc.g.Wait(); err != nil { 450 | return err 451 | } 452 | return nil 453 | } 454 | 455 | const ( 456 | // constants related to chunked uploads 457 | initialChunkSize = 4 * 1024 * 1024 458 | maxChunkSize = 100 * 1024 * 1024 459 | maxParts = 50000 460 | ) 461 | 462 | func makeBlockID(id uint64) string { 463 | bytesID := make([]byte, 8) 464 | binary.LittleEndian.PutUint64(bytesID, id) 465 | return base64.StdEncoding.EncodeToString(bytesID) 466 | } 467 | 468 | // uploadMultiPart start an upload 469 | func (f *FS) uploadMultiPart(o *object, r io.Reader) error { 470 | 471 | //chunkSize, err := calcBlockSize(size) 472 | // if err != nil { 473 | // return err 474 | // } 475 | var buf = make([]byte, initialChunkSize) 476 | 477 | var blocks []az.Block 478 | var rawID uint64 479 | 480 | blob := f.client.GetContainerReference(f.bucket).GetBlobReference(o.name) 481 | 482 | // TODO: performance improvement to mange uploads in separate 483 | // go-routine than the reader 484 | for { 485 | n, err := r.Read(buf) 486 | if err != nil { 487 | if err == io.EOF { 488 | break 489 | } 490 | gou.Warnf("unknown err=%v", err) 491 | return err 492 | } 493 | 494 | blockID := makeBlockID(rawID) 495 | chunk := buf[:n] 496 | 497 | if err := blob.PutBlock(blockID, chunk, nil); err != nil { 498 | return err 499 | } 500 | 501 | blocks = append(blocks, az.Block{ 502 | ID: blockID, 503 | Status: az.BlockStatusLatest, 504 | }) 505 | rawID++ 506 | } 507 | 508 | err := blob.PutBlockList(blocks, nil) 509 | if err != nil { 510 | gou.Warnf("could not put block list %v", err) 511 | return err 512 | } 513 | 514 | err = blob.GetProperties(nil) 515 | if err != nil { 516 | gou.Warnf("could not load blog properties %v", err) 517 | return err 518 | } 519 | 520 | blob.Metadata = o.metadata 521 | 522 | err = blob.SetMetadata(nil) 523 | if err != nil { 524 | gou.Warnf("can't set metadata err=%v", err) 525 | return err 526 | } 527 | return nil 528 | } 529 | 530 | // Delete requested object path string. 531 | func (f *FS) Delete(ctx context.Context, name string) error { 532 | err := f.client.GetContainerReference(f.bucket).GetBlobReference(name).Delete(nil) 533 | if err != nil && strings.Contains(err.Error(), "404") { 534 | return cloudstorage.ErrObjectNotFound 535 | } 536 | return err 537 | } 538 | 539 | func newObject(f *FS, o *az.Blob) *object { 540 | obj := &object{ 541 | fs: f, 542 | o: o, 543 | name: o.Name, 544 | bucket: f.bucket, 545 | cachepath: cloudstorage.CachePathObj(f.cachepath, o.Name, f.ID), 546 | } 547 | obj.o.Properties.Etag = cloudstorage.CleanETag(obj.o.Properties.Etag) 548 | return obj 549 | } 550 | 551 | /* 552 | func newObjectFromHead(f *FS, name string, o *s3.HeadObjectOutput) *object { 553 | obj := &object{ 554 | fs: f, 555 | name: name, 556 | bucket: f.bucket, 557 | cachepath: cloudstorage.CachePathObj(f.cachepath, name, f.ID), 558 | } 559 | if o.LastModified != nil { 560 | obj.updated = *o.LastModified 561 | } 562 | // metadata? 563 | obj.metadata, _ = convertMetaData(o.Metadata) 564 | return obj 565 | } 566 | */ 567 | func (o *object) StorageSource() string { 568 | return StoreType 569 | } 570 | func (o *object) Name() string { 571 | return o.name 572 | } 573 | func (o *object) String() string { 574 | return o.name 575 | } 576 | func (o *object) Updated() time.Time { 577 | return o.updated 578 | } 579 | func (o *object) MetaData() map[string]string { 580 | return o.metadata 581 | } 582 | func (o *object) SetMetaData(meta map[string]string) { 583 | o.metadata = meta 584 | } 585 | 586 | func (o *object) Delete() error { 587 | return o.fs.Delete(context.Background(), o.name) 588 | } 589 | 590 | func (o *object) Open(accesslevel cloudstorage.AccessLevel) (*os.File, error) { 591 | if o.opened { 592 | return nil, fmt.Errorf("the store object is already opened. %s", o.name) 593 | } 594 | 595 | var errs []error = make([]error, 0) 596 | var cachedcopy *os.File = nil 597 | var err error 598 | var readonly = accesslevel == cloudstorage.ReadOnly 599 | 600 | err = os.MkdirAll(path.Dir(o.cachepath), 0775) 601 | if err != nil { 602 | return nil, fmt.Errorf("error occurred creating cachedcopy dir. cachepath=%s object=%s err=%v", o.cachepath, o.name, err) 603 | } 604 | 605 | err = cloudstorage.EnsureDir(o.cachepath) 606 | if err != nil { 607 | return nil, fmt.Errorf("error occurred creating cachedcopy's dir. cachepath=%s err=%v", o.cachepath, err) 608 | } 609 | 610 | cachedcopy, err = os.Create(o.cachepath) 611 | if err != nil { 612 | return nil, fmt.Errorf("error occurred creating file. local=%s err=%v", o.cachepath, err) 613 | } 614 | 615 | for try := 0; try < Retries; try++ { 616 | if o.rc == nil { 617 | rc, err := o.fs.getOpenObject(context.Background(), o.name) 618 | if err != nil { 619 | if err == cloudstorage.ErrObjectNotFound { 620 | // New, this is fine 621 | } else { 622 | // lets re-try 623 | errs = append(errs, fmt.Errorf("error getting object err=%v", err)) 624 | cloudstorage.Backoff(try) 625 | continue 626 | } 627 | } 628 | 629 | if rc != nil { 630 | o.rc = rc 631 | } 632 | } 633 | 634 | if o.rc != nil { 635 | // we have a preexisting object, so lets download it.. 636 | defer o.rc.Close() 637 | 638 | if _, err := cachedcopy.Seek(0, os.SEEK_SET); err != nil { 639 | return nil, fmt.Errorf("error seeking to start of cachedcopy err=%v", err) //don't retry on local fs errors 640 | } 641 | 642 | _, err = io.Copy(cachedcopy, o.rc) 643 | if err != nil { 644 | errs = append(errs, fmt.Errorf("error coping bytes. err=%v", err)) 645 | //recreate the cachedcopy file incase it has incomplete data 646 | if err := os.Remove(o.cachepath); err != nil { 647 | return nil, fmt.Errorf("error resetting the cachedcopy err=%v", err) //don't retry on local fs errors 648 | } 649 | if cachedcopy, err = os.Create(o.cachepath); err != nil { 650 | return nil, fmt.Errorf("error creating a new cachedcopy file. local=%s err=%v", o.cachepath, err) 651 | } 652 | 653 | cloudstorage.Backoff(try) 654 | continue 655 | } 656 | } 657 | 658 | if readonly { 659 | cachedcopy.Close() 660 | cachedcopy, err = os.Open(o.cachepath) 661 | if err != nil { 662 | name := "unknown" 663 | if cachedcopy != nil { 664 | name = cachedcopy.Name() 665 | } 666 | return nil, fmt.Errorf("error opening file. local=%s object=%s tfile=%v err=%v", o.cachepath, o.name, name, err) 667 | } 668 | } else { 669 | if _, err := cachedcopy.Seek(0, os.SEEK_SET); err != nil { 670 | return nil, fmt.Errorf("error seeking to start of cachedcopy err=%v", err) //don't retry on local fs errors 671 | } 672 | } 673 | 674 | o.cachedcopy = cachedcopy 675 | o.readonly = readonly 676 | o.opened = true 677 | return o.cachedcopy, nil 678 | } 679 | 680 | return nil, fmt.Errorf("fetch error retry cnt reached: obj=%s tfile=%v errs:[%v]", o.name, o.cachepath, errs) 681 | } 682 | 683 | func (o *object) File() *os.File { 684 | return o.cachedcopy 685 | } 686 | 687 | func (o *object) Read(p []byte) (n int, err error) { 688 | return o.cachedcopy.Read(p) 689 | } 690 | func (o *object) Write(p []byte) (n int, err error) { 691 | if o.cachedcopy == nil { 692 | _, err := o.Open(cloudstorage.ReadWrite) 693 | if err != nil { 694 | return 0, err 695 | } 696 | } 697 | return o.cachedcopy.Write(p) 698 | } 699 | 700 | func (o *object) Sync() error { 701 | 702 | if !o.opened { 703 | return fmt.Errorf("object isn't opened object:%s", o.name) 704 | } 705 | if o.readonly { 706 | return fmt.Errorf("trying to Sync a readonly object:%s", o.name) 707 | } 708 | 709 | cachedcopy, err := os.OpenFile(o.cachepath, os.O_RDWR, 0664) 710 | if err != nil { 711 | return fmt.Errorf("couldn't open localfile for sync'ing. local=%s err=%v", o.cachepath, err) 712 | } 713 | defer cachedcopy.Close() 714 | 715 | if _, err := cachedcopy.Seek(0, os.SEEK_SET); err != nil { 716 | return fmt.Errorf("error seeking to start of cachedcopy err=%v", err) //don't retry on local filesystem errors 717 | } 718 | 719 | // Upload the file 720 | if err = o.fs.uploadMultiPart(o, cachedcopy); err != nil { 721 | gou.Warnf("could not upload %v", err) 722 | return fmt.Errorf("failed to upload file, %v", err) 723 | } 724 | return nil 725 | } 726 | 727 | func (o *object) Close() error { 728 | if !o.opened { 729 | return nil 730 | } 731 | defer func() { 732 | os.Remove(o.cachepath) 733 | o.cachedcopy = nil 734 | o.opened = false 735 | }() 736 | 737 | if !o.readonly { 738 | err := o.cachedcopy.Sync() 739 | if err != nil { 740 | return err 741 | } 742 | } 743 | 744 | err := o.cachedcopy.Close() 745 | if err != nil { 746 | if !strings.Contains(err.Error(), os.ErrClosed.Error()) { 747 | return err 748 | } 749 | } 750 | 751 | if o.opened && !o.readonly { 752 | err := o.Sync() 753 | if err != nil { 754 | gou.Errorf("error on sync %v", err) 755 | return err 756 | } 757 | } 758 | return nil 759 | } 760 | 761 | func (o *object) Release() error { 762 | if o.cachedcopy != nil { 763 | gou.Debugf("release %q vs %q", o.cachedcopy.Name(), o.cachepath) 764 | o.cachedcopy.Close() 765 | return os.Remove(o.cachepath) 766 | } 767 | os.Remove(o.cachepath) 768 | return nil 769 | } 770 | --------------------------------------------------------------------------------