├── .github ├── CODEOWNERS ├── release.yml ├── dependabot.yml └── workflows │ ├── go.yml │ └── dependency-review.yml ├── client_test.go ├── .gitignore ├── download.go ├── go.mod ├── upload.go ├── client.go ├── mock ├── aws │ └── mock_s3client.go └── client │ └── mock_client.go ├── README.md ├── temp_file.go ├── go.sum ├── download_test.go ├── temp_file_test.go ├── LICENCE.txt └── upload_test.go /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # Default fallback for everything not matched by any pattern 2 | * @embrace-io/opensource-backend 3 | -------------------------------------------------------------------------------- /.github/release.yml: -------------------------------------------------------------------------------- 1 | changelog: 2 | categories: 3 | - title: Features 4 | labels: 5 | - '*' 6 | exclude: 7 | labels: 8 | - dependencies 9 | - title: Dependencies 10 | labels: 11 | - dependencies 12 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "gomod" 4 | directory: "/" 5 | schedule: 6 | interval: "daily" 7 | groups: 8 | aws-sdk: 9 | patterns: 10 | - "github.com/aws/aws-sdk-go-v2*" 11 | - package-ecosystem: "github-actions" 12 | directory: "/" 13 | schedule: 14 | interval: "daily" 15 | -------------------------------------------------------------------------------- /client_test.go: -------------------------------------------------------------------------------- 1 | package s3batchstore 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/aws/aws-sdk-go-v2/aws" 7 | . "github.com/onsi/gomega" 8 | ) 9 | 10 | func TestNewClient(t *testing.T) { 11 | g := NewGomegaWithT(t) 12 | 13 | c := NewClient[string](aws.Config{}, testBucketName) 14 | g.Expect(c).ToNot(BeNil()) 15 | g.Expect(c.(*client[string]).s3Client).ToNot(BeNil()) 16 | g.Expect(c.(*client[string]).s3Bucket).To(Equal(testBucketName)) 17 | } 18 | -------------------------------------------------------------------------------- /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | permissions: 10 | contents: read 11 | 12 | jobs: 13 | build: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v6 17 | 18 | - name: Set up Go 19 | uses: actions/setup-go@v6 20 | with: 21 | go-version-file: 'go.mod' 22 | 23 | - name: Build 24 | run: go build -v ./... 25 | 26 | - name: Test 27 | run: go test -v ./... 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # If you prefer the allow list template instead of the deny list, see community template: 2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore 3 | # 4 | # Binaries for programs and plugins 5 | *.exe 6 | *.exe~ 7 | *.dll 8 | *.so 9 | *.dylib 10 | 11 | # Test binary, built with `go test -c` 12 | *.test 13 | 14 | # Output of the go coverage tool, specifically when used with LiteIDE 15 | *.out 16 | 17 | # Dependency directories (remove the comment below to include it) 18 | # vendor/ 19 | 20 | # Go workspace file 21 | go.work 22 | go.work.sum 23 | 24 | .idea -------------------------------------------------------------------------------- /download.go: -------------------------------------------------------------------------------- 1 | package s3batchstore 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "io" 7 | 8 | "github.com/aws/aws-sdk-go-v2/aws" 9 | "github.com/aws/aws-sdk-go-v2/service/s3" 10 | ) 11 | 12 | func (c *client[K]) Fetch(ctx context.Context, ind ObjectIndex) ([]byte, error) { 13 | byteRange := byteRangeString(ind.Offset, ind.Length) 14 | result, err := c.s3Client.GetObject(ctx, &s3.GetObjectInput{ 15 | Bucket: aws.String(c.s3Bucket), 16 | Key: aws.String(ind.File), 17 | Range: aws.String(byteRange), 18 | }) 19 | if err != nil { 20 | return nil, fmt.Errorf("failed to download object from file %s/%s %s: %w", c.s3Bucket, ind.File, byteRange, err) 21 | } 22 | 23 | defer func() { _ = result.Body.Close() }() 24 | return io.ReadAll(result.Body) 25 | } 26 | 27 | // byteRangeString generates the byte range to read a byte range from an s3 file. 28 | func byteRangeString(offset, length uint64) string { 29 | return fmt.Sprintf("bytes=%d-%d", offset, offset+length-1) 30 | } 31 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/embrace-io/s3-batch-object-store 2 | 3 | go 1.24.0 4 | 5 | toolchain go1.25.4 6 | 7 | require ( 8 | github.com/aws/aws-sdk-go-v2 v1.40.1 9 | github.com/aws/aws-sdk-go-v2/service/s3 v1.93.0 10 | github.com/klauspost/compress v1.18.2 11 | github.com/oklog/ulid/v2 v2.1.1 12 | github.com/onsi/gomega v1.38.3 13 | go.uber.org/mock v0.6.0 14 | ) 15 | 16 | require ( 17 | github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4 // indirect 18 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.15 // indirect 19 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.15 // indirect 20 | github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.15 // indirect 21 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4 // indirect 22 | github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.6 // indirect 23 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.15 // indirect 24 | github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.15 // indirect 25 | github.com/aws/smithy-go v1.24.0 // indirect 26 | github.com/google/go-cmp v0.7.0 // indirect 27 | go.yaml.in/yaml/v3 v3.0.4 // indirect 28 | golang.org/x/net v0.43.0 // indirect 29 | golang.org/x/text v0.28.0 // indirect 30 | ) 31 | -------------------------------------------------------------------------------- /.github/workflows/dependency-review.yml: -------------------------------------------------------------------------------- 1 | # Dependency Review Action 2 | # 3 | # This Action will scan dependency manifest files that change as part of a Pull Request, 4 | # surfacing known-vulnerable versions of the packages declared or updated in the PR. 5 | # Once installed, if the workflow run is marked as required, PRs introducing known-vulnerable 6 | # packages will be blocked from merging. 7 | # 8 | # Source repository: https://github.com/actions/dependency-review-action 9 | # Public documentation: https://docs.github.com/en/code-security/supply-chain-security/understanding-your-software-supply-chain/about-dependency-review#dependency-review-enforcement 10 | name: Dependency Review 11 | on: 12 | pull_request: 13 | branches: [ "main" ] 14 | 15 | # If using a dependency submission action in this workflow this permission will need to be set to: 16 | # 17 | # permissions: 18 | # contents: write 19 | # 20 | # https://docs.github.com/en/enterprise-cloud@latest/code-security/supply-chain-security/understanding-your-software-supply-chain/using-the-dependency-submission-api 21 | permissions: 22 | contents: read 23 | # Write permissions for pull-requests are required for using the `comment-summary-in-pr` option, comment out if you aren't using this option 24 | pull-requests: write 25 | 26 | jobs: 27 | dependency-review: 28 | runs-on: ubuntu-latest 29 | steps: 30 | - uses: actions/checkout@v6 31 | 32 | - name: Dependency Review 33 | uses: actions/dependency-review-action@v4 34 | # Commonly enabled options, see https://github.com/actions/dependency-review-action#configuration-options for all available options. 35 | with: 36 | comment-summary-in-pr: always 37 | # fail-on-severity: moderate 38 | # deny-licenses: GPL-1.0-or-later, LGPL-2.0-or-later 39 | # retry-on-snapshot-warnings: true 40 | -------------------------------------------------------------------------------- /upload.go: -------------------------------------------------------------------------------- 1 | package s3batchstore 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "encoding/json" 7 | "fmt" 8 | "net/url" 9 | 10 | "github.com/aws/aws-sdk-go-v2/service/s3" 11 | "github.com/aws/aws-sdk-go-v2/service/s3/types" 12 | "github.com/klauspost/compress/zstd" 13 | ) 14 | 15 | func (c *client[K]) UploadFile(ctx context.Context, file *TempFile[K], withMetaFile bool) error { 16 | body, err := file.readOnly() 17 | if err != nil { 18 | return fmt.Errorf("failed to get the readonly file: %w", err) 19 | } 20 | 21 | tagging := serializeTags(file.Tags()) 22 | _, err = c.s3Client.PutObject(ctx, &s3.PutObjectInput{ 23 | Bucket: &c.s3Bucket, 24 | Key: &file.fileName, 25 | Body: body, 26 | Tagging: &tagging, 27 | }) 28 | if err != nil { 29 | return fmt.Errorf("failed to upload data file to s3: %w", err) 30 | } 31 | 32 | if withMetaFile { 33 | // If requested, also upload the meta file: 34 | metafileKey := file.MetaFileKey() 35 | metafileBody, err := json.Marshal(file.indexes) 36 | if err != nil { 37 | return fmt.Errorf("failed to marshal meta body: %w", err) 38 | } 39 | 40 | // Compress the metafile body with zstd 41 | var compressedBuf bytes.Buffer 42 | zstdWriter, err := zstd.NewWriter(&compressedBuf) 43 | if err != nil { 44 | return fmt.Errorf("failed to create zstd writer: %w", err) 45 | } 46 | _, err = zstdWriter.Write(metafileBody) 47 | if err != nil { 48 | return fmt.Errorf("failed to write to zstd writer: %w", err) 49 | } 50 | err = zstdWriter.Close() 51 | if err != nil { 52 | return fmt.Errorf("failed to close zstd writer: %w", err) 53 | } 54 | 55 | _, err = c.s3Client.PutObject(ctx, &s3.PutObjectInput{ 56 | Bucket: &c.s3Bucket, 57 | Key: &metafileKey, 58 | Body: bytes.NewReader(compressedBuf.Bytes()), 59 | Tagging: &tagging, 60 | }) 61 | if err != nil { 62 | return fmt.Errorf("failed to upload meta file to s3: %w", err) 63 | } 64 | } 65 | 66 | return nil 67 | } 68 | 69 | func (c *client[K]) DeleteFile(ctx context.Context, file *TempFile[K]) error { 70 | metafileKey := file.MetaFileKey() 71 | _, err := c.s3Client.DeleteObjects(ctx, &s3.DeleteObjectsInput{ 72 | Bucket: &c.s3Bucket, 73 | Delete: &types.Delete{ 74 | Objects: []types.ObjectIdentifier{ 75 | {Key: &file.fileName}, 76 | {Key: &metafileKey}, 77 | }, 78 | }, 79 | }) 80 | if err != nil { 81 | return fmt.Errorf("failed to delete files: %w", err) 82 | } 83 | return nil 84 | } 85 | 86 | // serializeTags converts the tags to url encoded string. 87 | func serializeTags(tags map[string]string) string { 88 | params := url.Values{} 89 | for k, v := range tags { 90 | params.Add(k, v) 91 | } 92 | return params.Encode() 93 | } 94 | -------------------------------------------------------------------------------- /client.go: -------------------------------------------------------------------------------- 1 | package s3batchstore 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/aws/aws-sdk-go-v2/aws" 7 | "github.com/aws/aws-sdk-go-v2/service/s3" 8 | ) 9 | 10 | // Client is the client used to store and fetch object to/from s3. 11 | // K represents the type of IDs for the objects that will be uploaded 12 | // To create a new file, first call NewTempFile, then append objects to it, and finally call UploadFile. 13 | // After the file is uploaded, you can save the object indexes to a database, and use them to fetch the objects later. 14 | // To fetch the contents of a single object, call Fetch with the ObjectIndex that you had stored. 15 | // 16 | //go:generate mockgen -source=./client.go -destination=./mock/client/mock_client.go -package=mocks3batchstore Client 17 | type Client[K comparable] interface { 18 | // NewTempFile creates a new file in a temp folder. 19 | // tags can be used to store information about this file in S3, like retention days 20 | // The file itself is not thread safe, if you expect to make concurrent calls to Append, you should protect it. 21 | // Once all the objects are appended, you can call UploadFile to upload the file to s3. 22 | NewTempFile(tags map[string]string) (*TempFile[K], error) 23 | 24 | // UploadFile will take a TempFile that already has all the objects in it, and upload it to a s3 file, 25 | // in one single operation. 26 | // withMetaFile indicates whether the metadata will be also uploaded to the file.MetaFileKey() location, 27 | // with the index information for each object, or not. 28 | UploadFile(ctx context.Context, file *TempFile[K], withMetaFile bool) error 29 | 30 | // DeleteFile allows to try to delete any files that may have been uploaded to s3 based on the provided file. 31 | // This is provided in case of any error when calling UploadFile, callers have the possibility to clean up the files. 32 | DeleteFile(ctx context.Context, file *TempFile[K]) error 33 | 34 | // Fetch downloads the payload from s3 given the ObjectIndex, fetching only the needed bytes, and returning 35 | // the payload as a byte array. 36 | // The caller is responsible for decompressing/unmarshalling or any operation needed to parse it to the proper struct. 37 | Fetch(ctx context.Context, ind ObjectIndex) ([]byte, error) 38 | } 39 | 40 | // S3Client is used to mock the aws s3 functions used in this module. 41 | // 42 | //go:generate mockgen -destination=./mock/aws/mock_s3client.go -package=mocks3 . S3Client 43 | type S3Client interface { 44 | PutObject(ctx context.Context, params *s3.PutObjectInput, optFns ...func(*s3.Options)) (*s3.PutObjectOutput, error) 45 | DeleteObjects(ctx context.Context, params *s3.DeleteObjectsInput, optFns ...func(*s3.Options)) (*s3.DeleteObjectsOutput, error) 46 | GetObject(ctx context.Context, params *s3.GetObjectInput, optFns ...func(*s3.Options)) (*s3.GetObjectOutput, error) 47 | } 48 | 49 | type client[K comparable] struct { 50 | s3Client S3Client 51 | s3Bucket string 52 | } 53 | 54 | // NewClient creates a new client that can be used to upload and download objects to s3. 55 | // K represents the type of IDs for the objects that will be uploaded and fetched. 56 | func NewClient[K comparable](awsConfig aws.Config, s3Bucket string) Client[K] { 57 | s3Client := s3.NewFromConfig(awsConfig) 58 | return &client[K]{ 59 | s3Client: s3Client, 60 | s3Bucket: s3Bucket, 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /mock/aws/mock_s3client.go: -------------------------------------------------------------------------------- 1 | // Code generated by MockGen. DO NOT EDIT. 2 | // Source: github.com/embrace-io/s3-batch-object-store (interfaces: S3Client) 3 | // 4 | // Generated by this command: 5 | // 6 | // mockgen -destination=./mock/aws/mock_s3client.go -package=mocks3 . S3Client 7 | // 8 | 9 | // Package mocks3 is a generated GoMock package. 10 | package mocks3 11 | 12 | import ( 13 | context "context" 14 | reflect "reflect" 15 | 16 | s3 "github.com/aws/aws-sdk-go-v2/service/s3" 17 | gomock "go.uber.org/mock/gomock" 18 | ) 19 | 20 | // MockS3Client is a mock of S3Client interface. 21 | type MockS3Client struct { 22 | ctrl *gomock.Controller 23 | recorder *MockS3ClientMockRecorder 24 | } 25 | 26 | // MockS3ClientMockRecorder is the mock recorder for MockS3Client. 27 | type MockS3ClientMockRecorder struct { 28 | mock *MockS3Client 29 | } 30 | 31 | // NewMockS3Client creates a new mock instance. 32 | func NewMockS3Client(ctrl *gomock.Controller) *MockS3Client { 33 | mock := &MockS3Client{ctrl: ctrl} 34 | mock.recorder = &MockS3ClientMockRecorder{mock} 35 | return mock 36 | } 37 | 38 | // EXPECT returns an object that allows the caller to indicate expected use. 39 | func (m *MockS3Client) EXPECT() *MockS3ClientMockRecorder { 40 | return m.recorder 41 | } 42 | 43 | // DeleteObjects mocks base method. 44 | func (m *MockS3Client) DeleteObjects(arg0 context.Context, arg1 *s3.DeleteObjectsInput, arg2 ...func(*s3.Options)) (*s3.DeleteObjectsOutput, error) { 45 | m.ctrl.T.Helper() 46 | varargs := []any{arg0, arg1} 47 | for _, a := range arg2 { 48 | varargs = append(varargs, a) 49 | } 50 | ret := m.ctrl.Call(m, "DeleteObjects", varargs...) 51 | ret0, _ := ret[0].(*s3.DeleteObjectsOutput) 52 | ret1, _ := ret[1].(error) 53 | return ret0, ret1 54 | } 55 | 56 | // DeleteObjects indicates an expected call of DeleteObjects. 57 | func (mr *MockS3ClientMockRecorder) DeleteObjects(arg0, arg1 any, arg2 ...any) *gomock.Call { 58 | mr.mock.ctrl.T.Helper() 59 | varargs := append([]any{arg0, arg1}, arg2...) 60 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteObjects", reflect.TypeOf((*MockS3Client)(nil).DeleteObjects), varargs...) 61 | } 62 | 63 | // GetObject mocks base method. 64 | func (m *MockS3Client) GetObject(arg0 context.Context, arg1 *s3.GetObjectInput, arg2 ...func(*s3.Options)) (*s3.GetObjectOutput, error) { 65 | m.ctrl.T.Helper() 66 | varargs := []any{arg0, arg1} 67 | for _, a := range arg2 { 68 | varargs = append(varargs, a) 69 | } 70 | ret := m.ctrl.Call(m, "GetObject", varargs...) 71 | ret0, _ := ret[0].(*s3.GetObjectOutput) 72 | ret1, _ := ret[1].(error) 73 | return ret0, ret1 74 | } 75 | 76 | // GetObject indicates an expected call of GetObject. 77 | func (mr *MockS3ClientMockRecorder) GetObject(arg0, arg1 any, arg2 ...any) *gomock.Call { 78 | mr.mock.ctrl.T.Helper() 79 | varargs := append([]any{arg0, arg1}, arg2...) 80 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetObject", reflect.TypeOf((*MockS3Client)(nil).GetObject), varargs...) 81 | } 82 | 83 | // PutObject mocks base method. 84 | func (m *MockS3Client) PutObject(arg0 context.Context, arg1 *s3.PutObjectInput, arg2 ...func(*s3.Options)) (*s3.PutObjectOutput, error) { 85 | m.ctrl.T.Helper() 86 | varargs := []any{arg0, arg1} 87 | for _, a := range arg2 { 88 | varargs = append(varargs, a) 89 | } 90 | ret := m.ctrl.Call(m, "PutObject", varargs...) 91 | ret0, _ := ret[0].(*s3.PutObjectOutput) 92 | ret1, _ := ret[1].(error) 93 | return ret0, ret1 94 | } 95 | 96 | // PutObject indicates an expected call of PutObject. 97 | func (mr *MockS3ClientMockRecorder) PutObject(arg0, arg1 any, arg2 ...any) *gomock.Call { 98 | mr.mock.ctrl.T.Helper() 99 | varargs := append([]any{arg0, arg1}, arg2...) 100 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "PutObject", reflect.TypeOf((*MockS3Client)(nil).PutObject), varargs...) 101 | } 102 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # s3-batch-object-store 2 | 3 | `s3-batch-object-store` is a Go module that allows for batch uploading of objects to a single S3 file and retrieving 4 | each object separately using the AWS S3 API, fetching only the bytes for that specific object. 5 | 6 | The method basically consists of appending multiple objects to a single file, keep the information of where each object 7 | is placed in the file, and then upload one single file to s3 with many objects in it, reducing drastically the number 8 | of PUT operations needed to store a large number of objets. 9 | 10 | After uploading a file, you can store the index information by calling `file.Indexes()` wherever you want. 11 | The best solution for how to store and query your index data will depend on your application data patterns and is beyond 12 | the scope of what this package aspires to do. 13 | 14 | Then, when you need to retrieve an object, you can use the index information to fetch that object and the GET call to s3 15 | will only retrieve the bytes that correspond to that object, reducing the amount of data transferred. 16 | 17 | This method of storage and retrieval is well suited for write-heavy workloads, where you want to fetch a small 18 | percentage of the stored objects later. 19 | This storage approach also works well when you have objects of widely varying size. 20 | 21 | ## Features 22 | 23 | - Batch upload multiple objects into a single S3 file, reducing the number of PUT operations. 24 | - Retrieve individual objects using index information (byte offset and length). 25 | 26 | ## Installation 27 | 28 | To install the module, use `go get`: 29 | 30 | ```sh 31 | go get github.com/embrace-io/s3-batch-object-store 32 | ``` 33 | 34 | ## Usage 35 | 36 | ### Example 37 | 38 | Here is a basic example demonstrating how to use the `s3-batch-object-store` module: 39 | 40 | 41 | ```go 42 | package main 43 | 44 | import ( 45 | "context" 46 | "fmt" 47 | "time" 48 | 49 | "github.com/aws/aws-sdk-go-v2/config" 50 | "github.com/embrace-io/s3-batch-object-store" 51 | ) 52 | 53 | func main() { 54 | ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) 55 | defer cancel() 56 | 57 | // Load the AWS configuration 58 | awsCfg, err := config.LoadDefaultConfig(ctx) 59 | if err != nil { 60 | panic("failed to load AWS SDK config: " + err.Error()) 61 | } 62 | 63 | // Create the s3 batch store Client, with string as object IDs. 64 | client := s3batchstore.NewClient[string](awsCfg, "my-bucket") 65 | 66 | // Example objects to upload 67 | objects := map[string][]byte{ 68 | "object1": []byte("This is the content of object1."), 69 | "object2": []byte("This is the content of object2."), 70 | "object3": []byte("This is the content of object3."), 71 | } 72 | 73 | // Create the new temp file 74 | file, err := client.NewTempFile(map[string]string{ 75 | // You can add any tags and these will be set in the s3 file. 76 | // This can be used for example to set TTL rules, and automatically delete the files. 77 | "retention-days": "14", 78 | }) 79 | if err != nil { 80 | panic("failed to create temp file: " + err.Error()) 81 | } 82 | 83 | // Append all the objects to the file: 84 | for id, obj := range objects { 85 | if err = file.Append(id, obj); err != nil { 86 | panic("failed to append object to temp file: " + err.Error()) 87 | } 88 | } 89 | 90 | // You can check the file properties to decide when to upload a file: 91 | fmt.Printf("File is %s old, has %d objects, and is %d bytes long\n", file.Age(), file.Count(), file.Size()) 92 | // File is 42.375µs old, has 3 objects, and is 93 bytes long 93 | 94 | // Upload the objects 95 | err = client.UploadFile(ctx, file, true) 96 | if err != nil { 97 | panic("failed to upload object: " + err.Error()) 98 | } 99 | 100 | // At this point the file.Indexes() can be stored to be used later to retrieve the objects. 101 | fmt.Printf("File indexes:\n") 102 | for id, index := range file.Indexes() { 103 | fmt.Printf("objectID: %v, index: %+v\n", id, index) 104 | } 105 | 106 | // Retrieve an object 107 | indexes := file.Indexes() 108 | content, err := client.Fetch(ctx, indexes["object2"]) 109 | if err != nil { 110 | panic("failed to retrieve object, " + err.Error()) 111 | } 112 | 113 | fmt.Printf("Contents of object2:\n%s", content) 114 | // Contents of object2: 115 | // This is the content of object2. 116 | } 117 | ``` 118 | 119 | ## Contributing 120 | 121 | Contributions are welcome! Please open an issue or submit a pull request. 122 | -------------------------------------------------------------------------------- /temp_file.go: -------------------------------------------------------------------------------- 1 | package s3batchstore 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "os" 7 | "time" 8 | 9 | "github.com/oklog/ulid/v2" 10 | ) 11 | 12 | // version is used to prefix the file name, so that we can change how the files are read in the future 13 | const version string = "v1" 14 | 15 | // TempFile creates a temp file in the filesystem, and is used to store the contents that will be uploaded to s3. 16 | // This way we avoid having all the bytes in memory. 17 | // This will also keep track of the indexes for each slice of bytes, in order to know where each of them are located 18 | // TempFile is not thread safe, if you expect to make concurrent calls to Append, you should protect it. 19 | // K represents the type of IDs for the objects that will be uploaded 20 | type TempFile[K comparable] struct { 21 | fileName string 22 | file *os.File 23 | createdOn time.Time 24 | tags map[string]string 25 | 26 | readonly bool 27 | count uint // How many items are currently saved in the file 28 | offset uint64 // The current offset in the file 29 | indexes map[K]ObjectIndex 30 | } 31 | 32 | type ObjectIndex struct { 33 | File string `json:"file"` 34 | Offset uint64 `json:"offset"` 35 | Length uint64 `json:"length"` 36 | } 37 | 38 | func (c *client[K]) NewTempFile(tags map[string]string) (*TempFile[K], error) { 39 | return NewTempFile[K](tags) 40 | } 41 | 42 | func NewTempFile[K comparable](tags map[string]string) (*TempFile[K], error) { 43 | fileName := ulid.Make().String() 44 | 45 | file, err := os.CreateTemp(os.TempDir(), fileName) 46 | if err != nil { 47 | return nil, err 48 | } 49 | 50 | return &TempFile[K]{ 51 | fileName: version + "/" + timeToFilePath(time.Now()) + "/" + fileName, 52 | file: file, 53 | createdOn: time.Now(), 54 | tags: tags, 55 | indexes: map[K]ObjectIndex{}, 56 | }, nil 57 | } 58 | 59 | // Append is the same as AppendAndReturnIndex but doesn't return an index. This method could be deleted, but 60 | // it is kept for backwards compatibility. 61 | func (f *TempFile[K]) Append(id K, bytes []byte) error { 62 | _, err := f.AppendAndReturnIndex(id, bytes) 63 | return err 64 | } 65 | 66 | // AppendAndReturnIndex will take an id, and the slice of bytes of the Object, and append it to the temp file. 67 | // This will also return the associated ObjectIndex information for this slice of bytes, which tells 68 | // where the object is located in this file (file, offset, length) 69 | // This method is not thread safe, if you expect to make concurrent calls to Append, you should protect it. 70 | // If you provide the same id twice, the second call will overwrite the first one, but the file will still grow in size. 71 | func (f *TempFile[K]) AppendAndReturnIndex(id K, bytes []byte) (ObjectIndex, error) { 72 | if f.readonly { 73 | return ObjectIndex{}, fmt.Errorf("file %s is readonly", f.fileName) 74 | } 75 | 76 | length := uint64(len(bytes)) 77 | 78 | // Append to file 79 | bytesWritten, err := f.file.Write(bytes) 80 | if err != nil { 81 | return ObjectIndex{}, fmt.Errorf("failed to write %d bytes (%d written) to file %s: %w", length, bytesWritten, f.file.Name(), err) 82 | } 83 | 84 | // Add index 85 | index := ObjectIndex{ 86 | File: f.fileName, 87 | Offset: f.offset, 88 | Length: length, 89 | } 90 | f.indexes[id] = index 91 | 92 | // Increment counters/metrics 93 | f.count++ 94 | f.offset += length 95 | 96 | return index, nil 97 | } 98 | 99 | // Name returns the fileName 100 | func (f *TempFile[K]) Name() string { 101 | return f.fileName 102 | } 103 | 104 | // Tags returns the tags associated with this file 105 | func (f *TempFile[K]) Tags() map[string]string { 106 | return f.tags 107 | } 108 | 109 | // Age returns the duration since this file was created 110 | func (f *TempFile[K]) Age() time.Duration { 111 | return time.Since(f.createdOn) 112 | } 113 | 114 | // Count returns the number of items stored in this file 115 | func (f *TempFile[K]) Count() uint { 116 | return f.count 117 | } 118 | 119 | // Size returns the size of the file contents in bytes 120 | func (f *TempFile[K]) Size() uint64 { 121 | return f.offset 122 | } 123 | 124 | // Indexes returns the indexes that the file is holding 125 | func (f *TempFile[K]) Indexes() map[K]ObjectIndex { 126 | return f.indexes 127 | } 128 | 129 | // Close will delete the file, as it is no longer needed, and given that these files may be really large, 130 | // we want to avoid having then live in the os for a long period of time. 131 | func (f *TempFile[K]) Close() error { 132 | // This is a temp file, so on Close we delete it. 133 | return os.Remove(f.file.Name()) 134 | } 135 | 136 | // MetaFileKey returns the key to be used for the json meta file 137 | func (f *TempFile[K]) MetaFileKey() string { 138 | return f.fileName + ".meta.json.zst" 139 | } 140 | 141 | // readOnly logically closes the file by not accepting more appends, and returns the os.File used to upload the file to s3 142 | func (f *TempFile[K]) readOnly() (*os.File, error) { 143 | // Set file pointer to beginning 144 | if _, err := f.file.Seek(0, io.SeekStart); err != nil { 145 | return nil, err 146 | } 147 | f.readonly = true 148 | return f.file, nil 149 | } 150 | 151 | // timeToFilePath returns the time formatted as yyyy/mm/dd/hh, in UTC timezone 152 | func timeToFilePath(t time.Time) string { 153 | return t.UTC().Format("2006/01/02/15") 154 | } 155 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0= 2 | github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= 3 | github.com/aws/aws-sdk-go-v2 v1.40.1 h1:difXb4maDZkRH0x//Qkwcfpdg1XQVXEAEs2DdXldFFc= 4 | github.com/aws/aws-sdk-go-v2 v1.40.1/go.mod h1:MayyLB8y+buD9hZqkCW3kX1AKq07Y5pXxtgB+rRFhz0= 5 | github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4 h1:489krEF9xIGkOaaX3CE/Be2uWjiXrkCH6gUX+bZA/BU= 6 | github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4/go.mod h1:IOAPF6oT9KCsceNTvvYMNHy0+kMF8akOjeDvPENWxp4= 7 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.15 h1:Y5YXgygXwDI5P4RkteB5yF7v35neH7LfJKBG+hzIons= 8 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.15/go.mod h1:K+/1EpG42dFSY7CBj+Fruzm8PsCGWTXJ3jdeJ659oGQ= 9 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.15 h1:AvltKnW9ewxX2hFmQS0FyJH93aSvJVUEFvXfU+HWtSE= 10 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.15/go.mod h1:3I4oCdZdmgrREhU74qS1dK9yZ62yumob+58AbFR4cQA= 11 | github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.15 h1:NLYTEyZmVZo0Qh183sC8nC+ydJXOOeIL/qI/sS3PdLY= 12 | github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.15/go.mod h1:Z803iB3B0bc8oJV8zH2PERLRfQUJ2n2BXISpsA4+O1M= 13 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4 h1:0ryTNEdJbzUCEWkVXEXoqlXV72J5keC1GvILMOuD00E= 14 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4/go.mod h1:HQ4qwNZh32C3CBeO6iJLQlgtMzqeG17ziAA/3KDJFow= 15 | github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.6 h1:P1MU/SuhadGvg2jtviDXPEejU3jBNhoeeAlRadHzvHI= 16 | github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.6/go.mod h1:5KYaMG6wmVKMFBSfWoyG/zH8pWwzQFnKgpoSRlXHKdQ= 17 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.15 h1:3/u/4yZOffg5jdNk1sDpOQ4Y+R6Xbh+GzpDrSZjuy3U= 18 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.15/go.mod h1:4Zkjq0FKjE78NKjabuM4tRXKFzUJWXgP0ItEZK8l7JU= 19 | github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.15 h1:wsSQ4SVz5YE1crz0Ap7VBZrV4nNqZt4CIBBT8mnwoNc= 20 | github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.15/go.mod h1:I7sditnFGtYMIqPRU1QoHZAUrXkGp4SczmlLwrNPlD0= 21 | github.com/aws/aws-sdk-go-v2/service/s3 v1.93.0 h1:IrbE3B8O9pm3lsg96AXIN5MXX4pECEuExh/A0Du3AuI= 22 | github.com/aws/aws-sdk-go-v2/service/s3 v1.93.0/go.mod h1:/sJLzHtiiZvs6C1RbxS/anSAFwZD6oC6M/kotQzOiLw= 23 | github.com/aws/smithy-go v1.24.0 h1:LpilSUItNPFr1eY85RYgTIg5eIEPtvFbskaFcmmIUnk= 24 | github.com/aws/smithy-go v1.24.0/go.mod h1:LEj2LM3rBRQJxPZTB4KuzZkaZYnZPnvgIhb4pu07mx0= 25 | github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= 26 | github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= 27 | github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= 28 | github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= 29 | github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= 30 | github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= 31 | github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8= 32 | github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= 33 | github.com/klauspost/compress v1.18.2 h1:iiPHWW0YrcFgpBYhsA6D1+fqHssJscY/Tm/y2Uqnapk= 34 | github.com/klauspost/compress v1.18.2/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4= 35 | github.com/oklog/ulid/v2 v2.1.1 h1:suPZ4ARWLOJLegGFiZZ1dFAkqzhMjL3J1TzI+5wHz8s= 36 | github.com/oklog/ulid/v2 v2.1.1/go.mod h1:rcEKHmBBKfef9DhnvX7y1HZBYxjXb0cP5ExxNsTT1QQ= 37 | github.com/onsi/ginkgo/v2 v2.25.3 h1:Ty8+Yi/ayDAGtk4XxmmfUy4GabvM+MegeB4cDLRi6nw= 38 | github.com/onsi/ginkgo/v2 v2.25.3/go.mod h1:43uiyQC4Ed2tkOzLsEYm7hnrb7UJTWHYNsuy3bG/snE= 39 | github.com/onsi/gomega v1.38.3 h1:eTX+W6dobAYfFeGC2PV6RwXRu/MyT+cQguijutvkpSM= 40 | github.com/onsi/gomega v1.38.3/go.mod h1:ZCU1pkQcXDO5Sl9/VVEGlDyp+zm0m1cmeG5TOzLgdh4= 41 | github.com/pborman/getopt v0.0.0-20170112200414-7148bc3a4c30/go.mod h1:85jBQOZwpVEaDAr341tbn15RS4fCAsIst0qp7i8ex1o= 42 | go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs= 43 | go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8= 44 | go.uber.org/mock v0.6.0 h1:hyF9dfmbgIX5EfOdasqLsWD6xqpNZlXblLB/Dbnwv3Y= 45 | go.uber.org/mock v0.6.0/go.mod h1:KiVJ4BqZJaMj4svdfmHM0AUx4NJYO8ZNpPnZn1Z+BBU= 46 | go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= 47 | go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= 48 | golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE= 49 | golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg= 50 | golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= 51 | golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= 52 | golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng= 53 | golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= 54 | golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg= 55 | golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s= 56 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 57 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 58 | -------------------------------------------------------------------------------- /download_test.go: -------------------------------------------------------------------------------- 1 | package s3batchstore 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "encoding/json" 7 | "errors" 8 | "fmt" 9 | "io" 10 | "testing" 11 | 12 | "github.com/aws/aws-sdk-go-v2/aws" 13 | "github.com/aws/aws-sdk-go-v2/service/s3" 14 | mocks3 "github.com/embrace-io/s3-batch-object-store/mock/aws" 15 | . "github.com/onsi/gomega" 16 | "go.uber.org/mock/gomock" 17 | ) 18 | 19 | type objUploadFixture struct { 20 | objID string 21 | payload []byte 22 | obj *TestObject 23 | compressedPayload []byte 24 | offset uint64 25 | length uint64 26 | } 27 | 28 | func newObjectUploadFixture(g *WithT, obj *TestObject) objUploadFixture { 29 | payload, err := json.Marshal(obj) 30 | g.Expect(err).ToNot(HaveOccurred()) 31 | compressed, err := gzipCompress(payload) 32 | g.Expect(err).ToNot(HaveOccurred()) 33 | return objUploadFixture{ 34 | objID: obj.ID, 35 | payload: payload, 36 | obj: obj, 37 | compressedPayload: compressed, 38 | length: uint64(len(compressed)), 39 | } 40 | } 41 | 42 | func TestClient_Fetch(t *testing.T) { 43 | g := NewGomegaWithT(t) 44 | fixture1 := newObjectUploadFixture(g, &TestObject{ID: "1", Value: "my first payload"}) 45 | fixture2 := newObjectUploadFixture(g, &TestObject{ID: "2", Value: "my second payload"}) 46 | fixture3 := newObjectUploadFixture(g, &TestObject{ID: "3", Value: "my third payload"}) 47 | 48 | fixture1.offset = 0 49 | fixture2.offset = fixture1.length 50 | fixture3.offset = fixture1.length + fixture2.length 51 | 52 | expectedIndexes := map[string]ObjectIndex{ 53 | fixture1.objID: {Offset: fixture1.offset, Length: fixture1.length}, 54 | fixture2.objID: {Offset: fixture2.offset, Length: fixture2.length}, 55 | fixture3.objID: {Offset: fixture3.offset, Length: fixture3.length}, 56 | } 57 | 58 | bytesByID := map[string][]byte{ 59 | fixture1.objID: fixture1.payload, 60 | fixture2.objID: fixture2.payload, 61 | fixture3.objID: fixture3.payload, 62 | } 63 | 64 | ctrl := gomock.NewController(t) 65 | s3Mock := mocks3.NewMockS3Client(ctrl) 66 | 67 | c := client[string]{ 68 | s3Bucket: testBucketName, 69 | s3Client: s3Mock, 70 | } 71 | 72 | file, err := c.NewTempFile(testTags) 73 | g.Expect(err).ToNot(HaveOccurred()) 74 | defer func() { _ = file.Close() }() 75 | 76 | ctx := context.Background() 77 | 78 | s3Mock.EXPECT().PutObject(ctx, matchUploadParams(file.fileName)).DoAndReturn(func(_ context.Context, input *s3.PutObjectInput, _ ...func(*s3.Options)) (*s3.PutObjectOutput, error) { 79 | g.Expect(*input.Bucket).To(Equal(testBucketName)) 80 | g.Expect(*input.Key).To(Equal(file.fileName)) 81 | g.Expect(input.Body).ToNot(BeNil()) 82 | g.Expect(input.Tagging).To(Equal(aws.String("retention-days=14"))) 83 | return &s3.PutObjectOutput{}, nil 84 | }) 85 | s3Mock.EXPECT().PutObject(ctx, matchUploadParams(file.MetaFileKey())).DoAndReturn(func(_ context.Context, input *s3.PutObjectInput, _ ...func(*s3.Options)) (*s3.PutObjectOutput, error) { 86 | g.Expect(*input.Bucket).To(Equal(testBucketName)) 87 | g.Expect(*input.Key).To(Equal(file.MetaFileKey())) 88 | g.Expect(input.Body).ToNot(BeNil()) 89 | g.Expect(input.Tagging).To(Equal(aws.String("retention-days=14"))) 90 | return &s3.PutObjectOutput{}, nil 91 | }) 92 | s3Mock.EXPECT().GetObject(ctx, gomock.Any()).DoAndReturn(func(_ context.Context, input *s3.GetObjectInput, _ ...func(options *s3.Options)) (*s3.GetObjectOutput, error) { 93 | payloadsByRange := map[string][]byte{ 94 | byteRangeString(fixture1.offset, fixture1.length): fixture1.compressedPayload, 95 | byteRangeString(fixture2.offset, fixture2.length): fixture2.compressedPayload, 96 | byteRangeString(fixture3.offset, fixture3.length): fixture3.compressedPayload, 97 | } 98 | out, ok := payloadsByRange[*input.Range] 99 | g.Expect(ok).To(BeTrue(), fmt.Sprintf("input range %s is not a valid range", *input.Range)) 100 | 101 | var buf bytes.Buffer 102 | _, err = buf.Write(out) 103 | g.Expect(err).ToNot(HaveOccurred()) 104 | 105 | return &s3.GetObjectOutput{ 106 | Body: io.NopCloser(&buf), 107 | }, nil 108 | }).Times(3) 109 | 110 | for _, fixture := range []objUploadFixture{fixture1, fixture2, fixture3} { 111 | b, err := marshalAndCompress(fixture.obj) 112 | g.Expect(err).ToNot(HaveOccurred()) 113 | _, err = file.AppendAndReturnIndex(fixture.obj.ID, b) 114 | g.Expect(err).ToNot(HaveOccurred()) 115 | } 116 | 117 | err = c.UploadFile(ctx, file, true) 118 | g.Expect(err).To(BeNil()) 119 | g.Expect(len(file.indexes)).To(Equal(len(expectedIndexes))) 120 | for id, index := range expectedIndexes { 121 | idx, ok := file.indexes[id] 122 | g.Expect(ok).To(BeTrue()) 123 | g.Expect(idx.Offset).To(Equal(index.Offset)) 124 | g.Expect(idx.Length).To(Equal(index.Length)) 125 | } 126 | 127 | for id, ind := range expectedIndexes { 128 | b, err := c.Fetch(ctx, ind) 129 | g.Expect(err).To(BeNil()) 130 | 131 | actualPayload, err := gzipDecompress(b) 132 | g.Expect(err).To(BeNil()) 133 | 134 | g.Expect(actualPayload).To(Equal(bytesByID[id])) 135 | } 136 | } 137 | 138 | func TestClient_FetchError(t *testing.T) { 139 | g := NewGomegaWithT(t) 140 | 141 | ctx := context.Background() 142 | 143 | ctrl := gomock.NewController(t) 144 | s3Mock := mocks3.NewMockS3Client(ctrl) 145 | s3Mock.EXPECT().GetObject(ctx, gomock.Any()).DoAndReturn(func(_ context.Context, input *s3.GetObjectInput, _ ...func(options *s3.Options)) (*s3.GetObjectOutput, error) { 146 | return nil, errors.New("error connecting to s3") 147 | }).Times(1) 148 | 149 | c := client[string]{ 150 | s3Bucket: testBucketName, 151 | s3Client: s3Mock, 152 | } 153 | 154 | idx := ObjectIndex{File: "1234", Offset: 0, Length: 120} 155 | b, err := c.Fetch(ctx, idx) 156 | g.Expect(err).To(MatchError("failed to download object from file test-bucket/1234 bytes=0-119: error connecting to s3")) 157 | g.Expect(b).To(BeNil()) 158 | } 159 | -------------------------------------------------------------------------------- /temp_file_test.go: -------------------------------------------------------------------------------- 1 | package s3batchstore 2 | 3 | import ( 4 | "bytes" 5 | "compress/gzip" 6 | "encoding/json" 7 | "fmt" 8 | "io" 9 | "testing" 10 | "time" 11 | 12 | . "github.com/onsi/gomega" 13 | ) 14 | 15 | const testBucketName = "test-bucket" 16 | 17 | var testTags = map[string]string{ 18 | "retention-days": "14", 19 | } 20 | 21 | func TestFile_Append(t *testing.T) { 22 | g := NewGomegaWithT(t) 23 | 24 | c := client[string]{} 25 | 26 | file, err := c.NewTempFile(testTags) 27 | g.Expect(err).ToNot(HaveOccurred()) 28 | defer func() { _ = file.Close() }() 29 | 30 | obj1 := &TestObject{ID: "4", Value: "contents"} 31 | compressed, err := marshalAndCompress(obj1) 32 | g.Expect(err).ToNot(HaveOccurred()) 33 | 34 | err = file.Append(obj1.ID, compressed) 35 | g.Expect(err).ToNot(HaveOccurred()) 36 | g.Expect(file.Name()).To(Equal(file.fileName)) 37 | g.Expect(file.Indexes()[obj1.ID].Offset).To(Equal(uint64(0))) 38 | g.Expect(file.Indexes()[obj1.ID].Length).To(BeNumerically(">", 0)) 39 | 40 | // Add another object, using AppendAndReturnIndex 41 | obj2 := &TestObject{ID: "5", Value: "contents"} 42 | compressed, err = marshalAndCompress(obj2) 43 | g.Expect(err).ToNot(HaveOccurred()) 44 | 45 | index, err := file.AppendAndReturnIndex(obj2.ID, compressed) 46 | g.Expect(err).ToNot(HaveOccurred()) 47 | g.Expect(file.Name()).To(Equal(file.fileName)) 48 | g.Expect(file.Indexes()[obj2.ID]).To(Equal(index)) 49 | g.Expect(file.Indexes()[obj2.ID].Offset).To(Equal(file.Indexes()[obj1.ID].Length)) 50 | g.Expect(file.Indexes()[obj2.ID].Length).To(BeNumerically(">", 0)) 51 | } 52 | 53 | func TestFile_WriteError(t *testing.T) { 54 | g := NewGomegaWithT(t) 55 | 56 | c := client[string]{} 57 | 58 | file, err := c.NewTempFile(testTags) 59 | g.Expect(err).ToNot(HaveOccurred()) 60 | defer func() { _ = file.Close() }() 61 | 62 | obj := &TestObject{ID: "4", Value: "contents"} 63 | compressed, err := marshalAndCompress(obj) 64 | g.Expect(err).ToNot(HaveOccurred()) 65 | 66 | index, err := file.AppendAndReturnIndex(obj.ID, compressed) 67 | g.Expect(err).ToNot(HaveOccurred()) 68 | g.Expect(file.Name()).To(Equal(file.fileName)) 69 | g.Expect(file.Indexes()[obj.ID]).To(Equal(index)) 70 | g.Expect(file.Indexes()[obj.ID].Offset).To(Equal(uint64(0))) 71 | g.Expect(file.Indexes()[obj.ID].Length).To(BeNumerically(">", 0)) 72 | 73 | // If file is closed, it won't be able to write more: 74 | g.Expect(file.file.Close()).ToNot(HaveOccurred()) 75 | 76 | // Try to append a new object 77 | obj = &TestObject{ID: "5", Value: "contents"} 78 | compressed, err = marshalAndCompress(obj) 79 | g.Expect(err).ToNot(HaveOccurred()) 80 | 81 | index, err = file.AppendAndReturnIndex(obj.ID, compressed) 82 | fileName := file.file.Name() 83 | g.Expect(err).To(MatchError(fmt.Sprintf("failed to write %d bytes (0 written) to file %s: write %s: file already closed", len(compressed), fileName, fileName))) 84 | g.Expect(index).To(Equal(ObjectIndex{})) 85 | g.Expect(file.Indexes()[obj.ID]).To(Equal(index)) 86 | } 87 | 88 | func TestFile_ReadOnly(t *testing.T) { 89 | g := NewGomegaWithT(t) 90 | 91 | c := client[string]{} 92 | 93 | file, err := c.NewTempFile(testTags) 94 | g.Expect(err).ToNot(HaveOccurred()) 95 | defer func() { _ = file.Close() }() 96 | 97 | obj := &TestObject{ID: "4", Value: "contents"} 98 | compressed, err := marshalAndCompress(obj) 99 | g.Expect(err).ToNot(HaveOccurred()) 100 | 101 | // Store one object, then ask for the readonly file and try to store one more object 102 | index, err := file.AppendAndReturnIndex(obj.ID, compressed) 103 | g.Expect(err).ToNot(HaveOccurred()) 104 | g.Expect(file.Indexes()[obj.ID]).To(Equal(index)) 105 | g.Expect(index.Offset).To(Equal(uint64(0))) 106 | g.Expect(index.Length).To(BeNumerically(">", 0)) 107 | 108 | roFile, err := file.readOnly() 109 | g.Expect(roFile).ToNot(BeNil()) 110 | g.Expect(err).To(BeNil()) 111 | 112 | // Append a new object 113 | obj = &TestObject{ID: "5", Value: "contents"} 114 | compressed, err = marshalAndCompress(obj) 115 | g.Expect(err).ToNot(HaveOccurred()) 116 | 117 | index, err = file.AppendAndReturnIndex(obj.ID, compressed) 118 | g.Expect(err).To(MatchError(fmt.Sprintf("file %s is readonly", file.fileName))) 119 | g.Expect(index).To(Equal(ObjectIndex{})) 120 | g.Expect(file.Indexes()[obj.ID]).To(Equal(index)) 121 | } 122 | 123 | func TestFile_ReadOnlyError(t *testing.T) { 124 | g := NewGomegaWithT(t) 125 | 126 | c := client[string]{} 127 | 128 | file, err := c.NewTempFile(testTags) 129 | g.Expect(err).ToNot(HaveOccurred()) 130 | defer func() { _ = file.Close() }() 131 | 132 | obj := &TestObject{ID: "4", Value: "contents"} 133 | compressed, err := marshalAndCompress(obj) 134 | g.Expect(err).ToNot(HaveOccurred()) 135 | 136 | index, err := file.AppendAndReturnIndex(obj.ID, compressed) 137 | g.Expect(err).ToNot(HaveOccurred()) 138 | g.Expect(file.Indexes()[obj.ID]).To(Equal(index)) 139 | g.Expect(file.Indexes()[obj.ID].Offset).To(Equal(uint64(0))) 140 | g.Expect(file.Indexes()[obj.ID].Length).To(BeNumerically(">", 0)) 141 | 142 | // If file is closed, we won't be able to get the readOnly file 143 | g.Expect(file.file.Close()).ToNot(HaveOccurred()) 144 | 145 | roFile, err := file.readOnly() 146 | g.Expect(roFile).To(BeNil()) 147 | g.Expect(err).To(MatchError(fmt.Sprintf("seek %s: file already closed", file.file.Name()))) 148 | } 149 | 150 | func TestTimeToFilePath(t *testing.T) { 151 | g := NewGomegaWithT(t) 152 | tt := time.Date(2021, 10, 8, 02, 10, 14, 33, time.UTC) 153 | g.Expect(timeToFilePath(tt)).To(Equal("2021/10/08/02")) 154 | } 155 | 156 | // TestObject represents a document that may be uploaded to s3 and fetched from s3 157 | type TestObject struct { 158 | ID string `json:"id"` 159 | Value string `json:"value"` 160 | } 161 | 162 | func marshalAndCompress(v any) ([]byte, error) { 163 | b, err := json.Marshal(v) 164 | if err != nil { 165 | return nil, err 166 | } 167 | return gzipCompress(b) 168 | } 169 | 170 | func gzipCompress(data []byte) ([]byte, error) { 171 | var b bytes.Buffer 172 | gz := gzip.NewWriter(&b) 173 | if _, err := gz.Write(data); err != nil { 174 | return nil, err 175 | } 176 | if err := gz.Close(); err != nil { 177 | return nil, err 178 | } 179 | return b.Bytes(), nil 180 | } 181 | 182 | func gzipDecompress(data []byte) ([]byte, error) { 183 | reader := bytes.NewReader(data) 184 | gz, err := gzip.NewReader(reader) 185 | if err != nil { 186 | return nil, err 187 | } 188 | return io.ReadAll(gz) 189 | } 190 | -------------------------------------------------------------------------------- /mock/client/mock_client.go: -------------------------------------------------------------------------------- 1 | // Code generated by MockGen. DO NOT EDIT. 2 | // Source: ./client.go 3 | // 4 | // Generated by this command: 5 | // 6 | // mockgen -source=./client.go -destination=./mock/client/mock_client.go -package=mocks3batchstore Client 7 | // 8 | 9 | // Package mocks3batchstore is a generated GoMock package. 10 | package mocks3batchstore 11 | 12 | import ( 13 | context "context" 14 | reflect "reflect" 15 | 16 | s3 "github.com/aws/aws-sdk-go-v2/service/s3" 17 | s3batchstore "github.com/embrace-io/s3-batch-object-store" 18 | gomock "go.uber.org/mock/gomock" 19 | ) 20 | 21 | // MockClient is a mock of Client interface. 22 | type MockClient[K comparable] struct { 23 | ctrl *gomock.Controller 24 | recorder *MockClientMockRecorder[K] 25 | } 26 | 27 | // MockClientMockRecorder is the mock recorder for MockClient. 28 | type MockClientMockRecorder[K comparable] struct { 29 | mock *MockClient[K] 30 | } 31 | 32 | // NewMockClient creates a new mock instance. 33 | func NewMockClient[K comparable](ctrl *gomock.Controller) *MockClient[K] { 34 | mock := &MockClient[K]{ctrl: ctrl} 35 | mock.recorder = &MockClientMockRecorder[K]{mock} 36 | return mock 37 | } 38 | 39 | // EXPECT returns an object that allows the caller to indicate expected use. 40 | func (m *MockClient[K]) EXPECT() *MockClientMockRecorder[K] { 41 | return m.recorder 42 | } 43 | 44 | // DeleteFile mocks base method. 45 | func (m *MockClient[K]) DeleteFile(ctx context.Context, file *s3batchstore.TempFile[K]) error { 46 | m.ctrl.T.Helper() 47 | ret := m.ctrl.Call(m, "DeleteFile", ctx, file) 48 | ret0, _ := ret[0].(error) 49 | return ret0 50 | } 51 | 52 | // DeleteFile indicates an expected call of DeleteFile. 53 | func (mr *MockClientMockRecorder[K]) DeleteFile(ctx, file any) *gomock.Call { 54 | mr.mock.ctrl.T.Helper() 55 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteFile", reflect.TypeOf((*MockClient[K])(nil).DeleteFile), ctx, file) 56 | } 57 | 58 | // Fetch mocks base method. 59 | func (m *MockClient[K]) Fetch(ctx context.Context, ind s3batchstore.ObjectIndex) ([]byte, error) { 60 | m.ctrl.T.Helper() 61 | ret := m.ctrl.Call(m, "Fetch", ctx, ind) 62 | ret0, _ := ret[0].([]byte) 63 | ret1, _ := ret[1].(error) 64 | return ret0, ret1 65 | } 66 | 67 | // Fetch indicates an expected call of Fetch. 68 | func (mr *MockClientMockRecorder[K]) Fetch(ctx, ind any) *gomock.Call { 69 | mr.mock.ctrl.T.Helper() 70 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Fetch", reflect.TypeOf((*MockClient[K])(nil).Fetch), ctx, ind) 71 | } 72 | 73 | // NewTempFile mocks base method. 74 | func (m *MockClient[K]) NewTempFile(tags map[string]string) (*s3batchstore.TempFile[K], error) { 75 | m.ctrl.T.Helper() 76 | ret := m.ctrl.Call(m, "NewTempFile", tags) 77 | ret0, _ := ret[0].(*s3batchstore.TempFile[K]) 78 | ret1, _ := ret[1].(error) 79 | return ret0, ret1 80 | } 81 | 82 | // NewTempFile indicates an expected call of NewTempFile. 83 | func (mr *MockClientMockRecorder[K]) NewTempFile(tags any) *gomock.Call { 84 | mr.mock.ctrl.T.Helper() 85 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "NewTempFile", reflect.TypeOf((*MockClient[K])(nil).NewTempFile), tags) 86 | } 87 | 88 | // UploadFile mocks base method. 89 | func (m *MockClient[K]) UploadFile(ctx context.Context, file *s3batchstore.TempFile[K], withMetaFile bool) error { 90 | m.ctrl.T.Helper() 91 | ret := m.ctrl.Call(m, "UploadFile", ctx, file, withMetaFile) 92 | ret0, _ := ret[0].(error) 93 | return ret0 94 | } 95 | 96 | // UploadFile indicates an expected call of UploadFile. 97 | func (mr *MockClientMockRecorder[K]) UploadFile(ctx, file, withMetaFile any) *gomock.Call { 98 | mr.mock.ctrl.T.Helper() 99 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UploadFile", reflect.TypeOf((*MockClient[K])(nil).UploadFile), ctx, file, withMetaFile) 100 | } 101 | 102 | // MockS3Client is a mock of S3Client interface. 103 | type MockS3Client struct { 104 | ctrl *gomock.Controller 105 | recorder *MockS3ClientMockRecorder 106 | } 107 | 108 | // MockS3ClientMockRecorder is the mock recorder for MockS3Client. 109 | type MockS3ClientMockRecorder struct { 110 | mock *MockS3Client 111 | } 112 | 113 | // NewMockS3Client creates a new mock instance. 114 | func NewMockS3Client(ctrl *gomock.Controller) *MockS3Client { 115 | mock := &MockS3Client{ctrl: ctrl} 116 | mock.recorder = &MockS3ClientMockRecorder{mock} 117 | return mock 118 | } 119 | 120 | // EXPECT returns an object that allows the caller to indicate expected use. 121 | func (m *MockS3Client) EXPECT() *MockS3ClientMockRecorder { 122 | return m.recorder 123 | } 124 | 125 | // DeleteObjects mocks base method. 126 | func (m *MockS3Client) DeleteObjects(ctx context.Context, params *s3.DeleteObjectsInput, optFns ...func(*s3.Options)) (*s3.DeleteObjectsOutput, error) { 127 | m.ctrl.T.Helper() 128 | varargs := []any{ctx, params} 129 | for _, a := range optFns { 130 | varargs = append(varargs, a) 131 | } 132 | ret := m.ctrl.Call(m, "DeleteObjects", varargs...) 133 | ret0, _ := ret[0].(*s3.DeleteObjectsOutput) 134 | ret1, _ := ret[1].(error) 135 | return ret0, ret1 136 | } 137 | 138 | // DeleteObjects indicates an expected call of DeleteObjects. 139 | func (mr *MockS3ClientMockRecorder) DeleteObjects(ctx, params any, optFns ...any) *gomock.Call { 140 | mr.mock.ctrl.T.Helper() 141 | varargs := append([]any{ctx, params}, optFns...) 142 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteObjects", reflect.TypeOf((*MockS3Client)(nil).DeleteObjects), varargs...) 143 | } 144 | 145 | // GetObject mocks base method. 146 | func (m *MockS3Client) GetObject(ctx context.Context, params *s3.GetObjectInput, optFns ...func(*s3.Options)) (*s3.GetObjectOutput, error) { 147 | m.ctrl.T.Helper() 148 | varargs := []any{ctx, params} 149 | for _, a := range optFns { 150 | varargs = append(varargs, a) 151 | } 152 | ret := m.ctrl.Call(m, "GetObject", varargs...) 153 | ret0, _ := ret[0].(*s3.GetObjectOutput) 154 | ret1, _ := ret[1].(error) 155 | return ret0, ret1 156 | } 157 | 158 | // GetObject indicates an expected call of GetObject. 159 | func (mr *MockS3ClientMockRecorder) GetObject(ctx, params any, optFns ...any) *gomock.Call { 160 | mr.mock.ctrl.T.Helper() 161 | varargs := append([]any{ctx, params}, optFns...) 162 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetObject", reflect.TypeOf((*MockS3Client)(nil).GetObject), varargs...) 163 | } 164 | 165 | // PutObject mocks base method. 166 | func (m *MockS3Client) PutObject(ctx context.Context, params *s3.PutObjectInput, optFns ...func(*s3.Options)) (*s3.PutObjectOutput, error) { 167 | m.ctrl.T.Helper() 168 | varargs := []any{ctx, params} 169 | for _, a := range optFns { 170 | varargs = append(varargs, a) 171 | } 172 | ret := m.ctrl.Call(m, "PutObject", varargs...) 173 | ret0, _ := ret[0].(*s3.PutObjectOutput) 174 | ret1, _ := ret[1].(error) 175 | return ret0, ret1 176 | } 177 | 178 | // PutObject indicates an expected call of PutObject. 179 | func (mr *MockS3ClientMockRecorder) PutObject(ctx, params any, optFns ...any) *gomock.Call { 180 | mr.mock.ctrl.T.Helper() 181 | varargs := append([]any{ctx, params}, optFns...) 182 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "PutObject", reflect.TypeOf((*MockS3Client)(nil).PutObject), varargs...) 183 | } 184 | -------------------------------------------------------------------------------- /LICENCE.txt: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /upload_test.go: -------------------------------------------------------------------------------- 1 | package s3batchstore 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "io" 8 | "strconv" 9 | "testing" 10 | 11 | "github.com/aws/aws-sdk-go-v2/aws" 12 | "github.com/aws/aws-sdk-go-v2/service/s3" 13 | "github.com/aws/aws-sdk-go-v2/service/s3/types" 14 | mocks3 "github.com/embrace-io/s3-batch-object-store/mock/aws" 15 | "github.com/klauspost/compress/zstd" 16 | . "github.com/onsi/gomega" 17 | "go.uber.org/mock/gomock" 18 | ) 19 | 20 | func TestClient_UploadFile(t *testing.T) { 21 | g := NewGomegaWithT(t) 22 | ctx := context.Background() 23 | 24 | objs := []*TestObject{ 25 | { 26 | ID: "1", 27 | Value: "my first payload", 28 | }, 29 | { 30 | ID: "3", 31 | Value: "my third payload", 32 | }, 33 | { 34 | ID: "6", 35 | Value: "my sixth payload", 36 | }, 37 | } 38 | compressedObjLengths := make([]int, len(objs)) 39 | for i, obj := range objs { 40 | compressed, err := marshalAndCompress(obj) 41 | g.Expect(err).ToNot(HaveOccurred()) 42 | compressedObjLengths[i] = len(compressed) 43 | } 44 | 45 | tests := []struct { 46 | name string 47 | objs []*TestObject 48 | withMetaFile bool 49 | configureMocks func(g *WithT, file *TempFile[string], s3Mock *mocks3.MockS3Client) 50 | err interface{} 51 | }{ 52 | { 53 | name: "successful upload with meta file", 54 | objs: objs, 55 | withMetaFile: true, 56 | configureMocks: func(g *WithT, file *TempFile[string], s3Mock *mocks3.MockS3Client) { 57 | s3Mock.EXPECT().PutObject(ctx, matchUploadParams(file.fileName)).DoAndReturn(func(_ context.Context, input *s3.PutObjectInput, _ ...func(*s3.Options)) (*s3.PutObjectOutput, error) { 58 | g.Expect(*input.Bucket).To(Equal(testBucketName)) 59 | g.Expect(*input.Key).To(Equal(file.fileName)) 60 | g.Expect(input.Body).ToNot(BeNil()) 61 | g.Expect(input.Tagging).To(Equal(aws.String("retention-days=14"))) 62 | return &s3.PutObjectOutput{}, nil 63 | }) 64 | s3Mock.EXPECT().PutObject(ctx, matchUploadParams(file.MetaFileKey())).DoAndReturn(func(_ context.Context, input *s3.PutObjectInput, _ ...func(*s3.Options)) (*s3.PutObjectOutput, error) { 65 | g.Expect(*input.Bucket).To(Equal(testBucketName)) 66 | g.Expect(*input.Key).To(Equal(file.MetaFileKey())) 67 | g.Expect(input.Body).ToNot(BeNil()) 68 | g.Expect(input.Tagging).To(Equal(aws.String("retention-days=14"))) 69 | 70 | compressedBody, err := io.ReadAll(input.Body) 71 | g.Expect(err).ToNot(HaveOccurred()) 72 | 73 | // Decompress the zstd-compressed body 74 | zstdReader, err := zstd.NewReader(nil) 75 | g.Expect(err).ToNot(HaveOccurred()) 76 | body, err := zstdReader.DecodeAll(compressedBody, nil) 77 | g.Expect(err).ToNot(HaveOccurred()) 78 | 79 | g.Expect(body).To(MatchJSON(`{` + 80 | `"1":{"file":"` + file.fileName + `","offset":0,"length":` + strconv.Itoa(compressedObjLengths[0]) + `},` + 81 | `"3":{"file":"` + file.fileName + `","offset":` + strconv.Itoa(compressedObjLengths[0]) + `,"length":` + strconv.Itoa(compressedObjLengths[1]) + `},` + 82 | `"6":{"file":"` + file.fileName + `","offset":` + strconv.Itoa(compressedObjLengths[0]+compressedObjLengths[1]) + `,"length":` + strconv.Itoa(compressedObjLengths[2]) + `}` + 83 | `}`)) 84 | return &s3.PutObjectOutput{}, nil 85 | }) 86 | }, 87 | }, 88 | { 89 | name: "successful upload without meta file", 90 | objs: objs, 91 | withMetaFile: false, 92 | configureMocks: func(g *WithT, file *TempFile[string], s3Mock *mocks3.MockS3Client) { 93 | // Only regular file expected 94 | s3Mock.EXPECT().PutObject(ctx, matchUploadParams(file.fileName)).DoAndReturn(func(_ context.Context, input *s3.PutObjectInput, _ ...func(*s3.Options)) (*s3.PutObjectOutput, error) { 95 | g.Expect(*input.Bucket).To(Equal(testBucketName)) 96 | g.Expect(*input.Key).To(Equal(file.fileName)) 97 | g.Expect(input.Body).ToNot(BeNil()) 98 | g.Expect(input.Tagging).To(Equal(aws.String("retention-days=14"))) 99 | return &s3.PutObjectOutput{}, nil 100 | }) 101 | }, 102 | }, 103 | { 104 | name: "file readOnly error", 105 | objs: objs, 106 | withMetaFile: true, 107 | configureMocks: func(g *WithT, file *TempFile[string], s3Mock *mocks3.MockS3Client) { 108 | // If for any reason the underlying file gets closed, we won't be able to get the readOnly contents. 109 | g.Expect(file.file.Close()).ToNot(HaveOccurred()) 110 | }, 111 | err: ContainSubstring("failed to get the readonly file: seek "), 112 | }, 113 | { 114 | name: "s3 upload error", 115 | objs: objs, 116 | withMetaFile: true, 117 | configureMocks: func(g *WithT, file *TempFile[string], s3Mock *mocks3.MockS3Client) { 118 | s3Mock.EXPECT().PutObject(ctx, matchUploadParams(file.fileName)).DoAndReturn(func(_ context.Context, input *s3.PutObjectInput, _ ...func(*s3.Options)) (*s3.PutObjectOutput, error) { 119 | g.Expect(*input.Bucket).To(Equal(testBucketName)) 120 | g.Expect(*input.Key).To(Equal(file.fileName)) 121 | g.Expect(input.Body).ToNot(BeNil()) 122 | g.Expect(input.Tagging).To(Equal(aws.String("retention-days=14"))) 123 | return nil, fmt.Errorf("s3 service error") 124 | }) 125 | }, 126 | err: "failed to upload data file to s3: s3 service error", 127 | }, 128 | { 129 | name: "s3 meta file upload error", 130 | objs: objs, 131 | withMetaFile: true, 132 | configureMocks: func(g *WithT, file *TempFile[string], s3Mock *mocks3.MockS3Client) { 133 | s3Mock.EXPECT().PutObject(ctx, matchUploadParams(file.fileName)).DoAndReturn(func(_ context.Context, input *s3.PutObjectInput, _ ...func(*s3.Options)) (*s3.PutObjectOutput, error) { 134 | g.Expect(*input.Bucket).To(Equal(testBucketName)) 135 | g.Expect(*input.Key).To(Equal(file.fileName)) 136 | g.Expect(input.Body).ToNot(BeNil()) 137 | g.Expect(input.Tagging).To(Equal(aws.String("retention-days=14"))) 138 | return &s3.PutObjectOutput{}, nil 139 | }) 140 | s3Mock.EXPECT().PutObject(ctx, matchUploadParams(file.MetaFileKey())).DoAndReturn(func(_ context.Context, input *s3.PutObjectInput, _ ...func(*s3.Options)) (*s3.PutObjectOutput, error) { 141 | g.Expect(*input.Bucket).To(Equal(testBucketName)) 142 | g.Expect(*input.Key).To(Equal(file.MetaFileKey())) 143 | g.Expect(input.Body).ToNot(BeNil()) 144 | g.Expect(input.Tagging).To(Equal(aws.String("retention-days=14"))) 145 | 146 | compressedBody, err := io.ReadAll(input.Body) 147 | g.Expect(err).ToNot(HaveOccurred()) 148 | 149 | // Decompress the zstd-compressed body 150 | zstdReader, err := zstd.NewReader(nil) 151 | g.Expect(err).ToNot(HaveOccurred()) 152 | body, err := zstdReader.DecodeAll(compressedBody, nil) 153 | g.Expect(err).ToNot(HaveOccurred()) 154 | 155 | g.Expect(body).To(MatchJSON(`{` + 156 | `"1":{"file":"` + file.fileName + `","offset":0,"length":` + strconv.Itoa(compressedObjLengths[0]) + `},` + 157 | `"3":{"file":"` + file.fileName + `","offset":` + strconv.Itoa(compressedObjLengths[0]) + `,"length":` + strconv.Itoa(compressedObjLengths[1]) + `},` + 158 | `"6":{"file":"` + file.fileName + `","offset":` + strconv.Itoa(compressedObjLengths[0]+compressedObjLengths[1]) + `,"length":` + strconv.Itoa(compressedObjLengths[2]) + `}` + 159 | `}`)) 160 | return nil, fmt.Errorf("s3 service error") 161 | }) 162 | }, 163 | err: "failed to upload meta file to s3: s3 service error", 164 | }, 165 | } 166 | 167 | for _, test := range tests { 168 | t.Run(test.name, func(t *testing.T) { 169 | g := NewGomegaWithT(t) 170 | 171 | ctrl := gomock.NewController(t) 172 | s3Mock := mocks3.NewMockS3Client(ctrl) 173 | 174 | c := &client[string]{ 175 | s3Bucket: testBucketName, 176 | s3Client: s3Mock, 177 | } 178 | 179 | file, err := c.NewTempFile(testTags) 180 | g.Expect(err).ToNot(HaveOccurred()) 181 | defer func() { _ = file.Close() }() 182 | 183 | for _, objs := range test.objs { 184 | compressed, err := marshalAndCompress(objs) 185 | g.Expect(err).ToNot(HaveOccurred()) 186 | _, err = file.AppendAndReturnIndex(objs.ID, compressed) 187 | g.Expect(err).ToNot(HaveOccurred()) 188 | } 189 | g.Expect(file.Count()).To(Equal(uint(len(test.objs)))) 190 | g.Expect(file.Age()).To(BeNumerically(">=", uint64(0))) 191 | g.Expect(file.Size()).To(BeNumerically(">=", uint64(0))) 192 | 193 | test.configureMocks(g, file, s3Mock) 194 | err = c.UploadFile(ctx, file, test.withMetaFile) 195 | if test.err == nil { 196 | g.Expect(err).ToNot(HaveOccurred()) 197 | } else { 198 | g.Expect(err).To(MatchError(test.err)) 199 | } 200 | }) 201 | } 202 | } 203 | 204 | func TestClient_DeleteFile(t *testing.T) { 205 | g := NewGomegaWithT(t) 206 | 207 | ctx := context.Background() 208 | objs := []*TestObject{ 209 | { 210 | ID: "1", 211 | Value: "my first payload", 212 | }, 213 | { 214 | ID: "2", 215 | Value: "my second payload", 216 | }, 217 | { 218 | ID: "3", 219 | Value: "my third payload", 220 | }, 221 | } 222 | compressedObjLengths := make([]int, len(objs)) 223 | for i, obj := range objs { 224 | compressed, err := marshalAndCompress(obj) 225 | g.Expect(err).ToNot(HaveOccurred()) 226 | compressedObjLengths[i] = len(compressed) 227 | } 228 | 229 | tests := []struct { 230 | name string 231 | objs []*TestObject 232 | configureMocks func(g *WithT, ctrl *gomock.Controller, file *TempFile[string], s3Mock *mocks3.MockS3Client) 233 | err interface{} 234 | }{ 235 | { 236 | name: "successful delete", 237 | objs: objs, 238 | configureMocks: func(g *WithT, ctrl *gomock.Controller, file *TempFile[string], s3Mock *mocks3.MockS3Client) { 239 | // One delete call with the 2 files: regular file and meta file 240 | metaFileKey := file.MetaFileKey() 241 | s3Mock.EXPECT().DeleteObjects(ctx, &s3.DeleteObjectsInput{ 242 | Bucket: aws.String(testBucketName), 243 | Delete: &types.Delete{ 244 | Objects: []types.ObjectIdentifier{ 245 | {Key: &file.fileName}, 246 | {Key: &metaFileKey}, 247 | }, 248 | }, 249 | }).Return(&s3.DeleteObjectsOutput{}, nil).Times(1) 250 | }, 251 | }, 252 | { 253 | name: "error deleting s3 files", 254 | objs: objs, 255 | configureMocks: func(g *WithT, ctrl *gomock.Controller, file *TempFile[string], s3Mock *mocks3.MockS3Client) { 256 | metaFileKey := file.MetaFileKey() 257 | s3Mock.EXPECT().DeleteObjects(ctx, &s3.DeleteObjectsInput{ 258 | Bucket: aws.String(testBucketName), 259 | Delete: &types.Delete{ 260 | Objects: []types.ObjectIdentifier{ 261 | {Key: &file.fileName}, 262 | {Key: &metaFileKey}, 263 | }, 264 | }, 265 | }).Return(nil, errors.New("error deleting s3 file")).Times(1) 266 | }, 267 | err: "failed to delete files: error deleting s3 file", 268 | }, 269 | } 270 | for _, test := range tests { 271 | t.Run(test.name, func(t *testing.T) { 272 | g := NewGomegaWithT(t) 273 | 274 | ctrl := gomock.NewController(t) 275 | s3Mock := mocks3.NewMockS3Client(ctrl) 276 | 277 | c := &client[string]{ 278 | s3Bucket: testBucketName, 279 | s3Client: s3Mock, 280 | } 281 | 282 | file, err := c.NewTempFile(testTags) 283 | g.Expect(err).ToNot(HaveOccurred()) 284 | defer func() { _ = file.Close() }() 285 | 286 | for _, obj := range test.objs { 287 | compressed, err := marshalAndCompress(obj) 288 | g.Expect(err).ToNot(HaveOccurred()) 289 | _, err = file.AppendAndReturnIndex(obj.ID, compressed) 290 | g.Expect(err).ToNot(HaveOccurred()) 291 | } 292 | g.Expect(file.Count()).To(Equal(uint(len(test.objs)))) 293 | g.Expect(file.Age()).To(BeNumerically(">=", uint64(0))) 294 | g.Expect(file.Size()).To(BeNumerically(">=", uint64(0))) 295 | 296 | test.configureMocks(g, ctrl, file, s3Mock) 297 | err = c.DeleteFile(ctx, file) 298 | if test.err == nil { 299 | g.Expect(err).ToNot(HaveOccurred()) 300 | } else { 301 | g.Expect(err).To(MatchError(test.err)) 302 | } 303 | }) 304 | } 305 | } 306 | 307 | type uploadParamsMatcher struct { 308 | fileKey string 309 | } 310 | 311 | func matchUploadParams(fileKey string) gomock.Matcher { 312 | return &uploadParamsMatcher{fileKey: fileKey} 313 | } 314 | 315 | func (matcher *uploadParamsMatcher) Matches(actual interface{}) bool { 316 | actualInput, actualOk := actual.(*s3.PutObjectInput) 317 | return actualOk && *actualInput.Key == matcher.fileKey 318 | } 319 | 320 | func (matcher *uploadParamsMatcher) String() string { 321 | return fmt.Sprintf("uploader with key: %s", matcher.fileKey) 322 | } 323 | --------------------------------------------------------------------------------