├── .rubocop.yml ├── lib ├── wt_s3_signer │ └── version.rb └── wt_s3_signer.rb ├── Gemfile ├── .gitignore ├── Rakefile ├── spec ├── spec_helper.rb ├── support │ └── resource_allocator.rb └── url_signing_spec.rb ├── .travis.yml ├── CHANGELOG.md ├── wt_s3_signer.gemspec ├── LICENSE.md ├── README.md ├── CODE_OF_CONDUCT.md └── CONTRIBUTING.md /.rubocop.yml: -------------------------------------------------------------------------------- 1 | inherit_gem: 2 | wetransfer_style: ruby/default.yml 3 | -------------------------------------------------------------------------------- /lib/wt_s3_signer/version.rb: -------------------------------------------------------------------------------- 1 | module WT 2 | class S3Signer 3 | VERSION = '1.0.2' 4 | end 5 | end 6 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | # Specify your gem's dependencies in wt_s3_signer.gemspec 4 | gemspec 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | 3 | # for a library or gem, you might want to ignore these files since the code is 4 | # intended to run in multiple environments; otherwise, check them in: 5 | Gemfile.lock 6 | .ruby-version 7 | .ruby-gemset 8 | 9 | # unless supporting rvm < 1.11.0 or doing something fancy, ignore this: 10 | .rvmrc 11 | 12 | # YARD output 13 | doc/ 14 | .yardoc/ -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require "bundler/gem_tasks" 2 | require "rspec/core/rake_task" 3 | require 'yard' 4 | 5 | YARD::Rake::YardocTask.new(:doc) do |t| 6 | # The dash has to be between the two to "divide" the source files and 7 | # miscellaneous documentation files that contain no code 8 | t.files = ['lib/**/*.rb', '-', 'LICENSE.md'] 9 | end 10 | 11 | RSpec::Core::RakeTask.new(:spec) 12 | 13 | task :default => :spec 14 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | require 'rspec' 2 | require 'wt_s3_signer' 3 | require 'aws-sdk-s3' 4 | require 'rspec-benchmark' 5 | 6 | $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__) 7 | $LOAD_PATH.unshift(File.dirname(__FILE__)) 8 | 9 | require_relative 'support/resource_allocator' 10 | 11 | AWS_ALLOCATOR = ResourceAllocator.new 12 | 13 | RSpec.configure do |config| 14 | config.order = 'random' 15 | config.include RSpec::Benchmark::Matchers 16 | AWS_ALLOCATOR.install_rspec_hooks!(config) 17 | end 18 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | rvm: 2 | - 2.2.10 3 | - 2.6.5 4 | sudo: false 5 | cache: bundler 6 | script: 7 | - sudo apt update 8 | - sudo apt install -y awscli jq 9 | - assume_role=$(aws sts assume-role --role-arn "$ASSUME_ROLE_ARN" --role-session-name "wt_s3_signer") 10 | - export AWS_ACCESS_KEY_ID=$(echo "$assume_role" | jq -r .Credentials.AccessKeyId) 11 | - export AWS_SECRET_ACCESS_KEY=$(echo "$assume_role" | jq -r .Credentials.SecretAccessKey) 12 | - export AWS_SESSION_TOKEN=$(echo "$assume_role" | jq -r .Credentials.SessionToken) 13 | - bundle exec rake 14 | env: 15 | global: 16 | - AWS_REGION=eu-west-1 17 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 1.0.2 2 | * Release the singleton S3 client when AWS raises credential error to be able to use a new credential next time 3 | 4 | ## 1.0.1 5 | * Set `instance_profile_credentials_retries` to 5 in the S3::Client instance to prevent "missing credentials" errors 6 | 7 | ## 1.0.0 8 | * Remove option `client:` `from WT::S3Signer.for_s3_bucket` 9 | * Uses a singleton s3_client by default to take advantage of AWS credentials cache 10 | 11 | ## 0.3.0 12 | * Add option `client:` to `WT::S3Signer.for_s3_bucket`, so it's possible to inject a cached `Aws::S3::Client` instance and prevent too many requests to the AWS metadata endpoint 13 | -------------------------------------------------------------------------------- /wt_s3_signer.gemspec: -------------------------------------------------------------------------------- 1 | lib = File.expand_path('../lib', __FILE__) 2 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) 3 | require 'wt_s3_signer/version' 4 | 5 | Gem::Specification.new do |spec| 6 | spec.name = "wt_s3_signer" 7 | spec.version = WT::S3Signer::VERSION 8 | spec.date = "2019-12-16" 9 | spec.summary = "A library for signing S3 key faster" 10 | spec.description = "A Ruby Gem that optimize the signing of S3 keys. The gem is especially useful when dealing with a large amount of S3 object keys" 11 | spec.authors = ["Luca Suriano", "Julik Tarkhanov"] 12 | spec.email = ["luca.suriano@wetransfer.com", "me@julik.nl"] 13 | spec.files = `git ls-files -z`.split("\x0") 14 | spec.homepage = "https://github.com/WeTransfer/wt_s3_signer" 15 | spec.license = "MIT (Hippocratic)" 16 | 17 | spec.add_runtime_dependency "aws-sdk-s3", "~> 1" 18 | 19 | spec.add_development_dependency "yard", "~> 0.9.24" 20 | spec.add_development_dependency "rake", "~> 13.0.1" 21 | spec.add_development_dependency "rspec", "~> 3.9" 22 | spec.add_development_dependency "rspec-benchmark", "~> 0.6" 23 | spec.add_development_dependency "rubocop" 24 | spec.add_development_dependency "pry-byebug" 25 | end 26 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright 2020 WeTransfer 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | * No Harm: The software may not be used by anyone for systems or activities that actively and knowingly endanger, harm, or otherwise threaten the physical, mental, economic, or general well-being of other individuals or groups, in violation of the United Nations Universal Declaration of Human Rights (https://www.un.org/en/universal-declaration-human-rights/). 8 | 9 | * Services: If the Software is used to provide a service to others, the licensee shall, as a condition of use, require those others not to use the service in any way that violates the No Harm clause above. 10 | 11 | * Enforceability: If any portion or provision of this License shall to any extent be declared illegal or unenforceable by a court of competent jurisdiction, then the remainder of this License, or the application of such portion or provision in circumstances other than those as to which it is so declared illegal or unenforceable, shall not be affected thereby, and each portion and provision of this Agreement shall be valid and enforceable to the fullest extent permitted by law. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 14 | 15 | This Hippocratic License is an Ethical Source license (https://ethicalsource.dev) derived from the MIT License, amended to limit the impact of the unethical use of open source software. 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # wt_s3_signer [![Build Status](https://travis-ci.org/WeTransfer/wt_s3_signer.svg?branch=master)](https://travis-ci.org/WeTransfer/wt_s3_signer) 2 | 3 | An optimized AWS S3 URL signer. 4 | 5 | ## Basic usage 6 | 7 | ```ruby 8 | s3_bucket = Aws::S3::Bucket.new('shiny-bucket-name') 9 | ttl_seconds = 7 * 24 * 60 * 60 10 | 11 | # we suggest caching the S3 client in the application to reuse the cached credentials 12 | s3_client = Aws::S3::Client.new 13 | signer = WT::S3Signer.for_s3_bucket(s3_bucket, client: s3_client, expires_in: ttl_seconds) 14 | url_str = signer.presigned_get_url(object_key: full_s3_key) 15 | #=> https://shiny-bucket-name.s3.eu-west-1.amazonaws.com/dir/testobject?X-Amz-Algorithm... 16 | ``` 17 | 18 | ## Why would you want to use it? 19 | 20 | The use case is when you need to rapidly generate *lots* of presigned URLs to the same S3 bucket. When 21 | doing the signing, the AWS SDK works fine - but the following operations need to be performed: 22 | 23 | * Credential refresh 24 | * Bucket region discovery (in which region does the bucket reside?) 25 | * Bucket endpoint discovery (which hostname should be used for the request?) 26 | * Cleanup of the various edge cases (blacklisted signed headers and so on) 27 | 28 | The metadata should be retrieved only once if the bucket does not change, but with the standard 29 | SDK this information might get refreshed often. And there is a substantial amount of generic 30 | code that gets called throughout the SDK call even though it is not strictly necessary. 31 | 32 | Our signer bypasses these operations and it performs the credential discovery, as well as bucket 33 | metadata discovery, but *only once* - when you instantiate it. The primary usage pattern is as follows: 34 | 35 | ```ruby 36 | signer = WT::S3Signer.for_bucket(my_bucket_resource) 37 | signed_urls = all_object_keys.map do |obj_key| 38 | signer.presigned_get_url(object_key: obj_key) 39 | end 40 | ``` 41 | 42 | This will stay performant even if `signed_urls` contains tens of thousands of entries. 43 | 44 | Additionally, we cache all the produced strings very aggressively if they do not change between 45 | calls to the signing method. We also derive the signing key only once. This optimizes the signing even more. 46 | 47 | Here are some benchmarks we have made for comparison. The `S3Signer_SDK` class executed the same 48 | flow, but it reused the [Aws::S3::Presigner](https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/S3/Presigner.html) 49 | object that it would instantiate only once, and then call repeatedly. 50 | 51 | ``` 52 | Warming up -------------------------------------- 53 | WT::S3::Signer#presigned_get_url 54 | 9.325k i/100ms 55 | S3Signer_SDK#presigned_get_url 56 | 154.000 i/100ms 57 | Calculating ------------------------------------- 58 | WT::S3::Signer#presigned_get_url 59 | 81.422k (±18.9%) i/s - 391.650k in 5.042435s 60 | S3Signer_SDK#presigned_get_url 61 | 1.865k (± 9.3%) i/s - 9.240k in 5.009593s 62 | 63 | Comparison: 64 | WT::S3::Signer#presigned_get_url: 81421.7 i/s 65 | S3Signer_SDK#presigned_get_url: 1864.9 i/s - 43.66x slower 66 | ``` 67 | 68 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at [julik@wetransfer.com](mailto:julik@wetransfer.com) and [luca-suriano@wetransfer.com](mailto:luca-suriano@wetransfer.com). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] 44 | 45 | [homepage]: http://contributor-covenant.org 46 | [version]: http://contributor-covenant.org/version/1/4/ 47 | -------------------------------------------------------------------------------- /spec/support/resource_allocator.rb: -------------------------------------------------------------------------------- 1 | require 'set' 2 | 3 | # The resource allocator manages mutable resources that we create during test runs, 4 | # such as AWS buckets. It is kind of a tiny implementation of Go's `defer`, and ot 5 | # creates an allocation group for each level of RSpec's expectation pre/post actions. 6 | # Specifically 7 | # 8 | # resources on :suite level 9 | # resources on :all level 10 | # resources on :all level for a context 11 | # resources on :each level for a specific example 12 | # resources allocated during test 13 | # 14 | # When a scope is finished in RSpec, the allocation group will be "popped" from the stack 15 | # and all resources for which cleanup has been defined during allocation will be deleted 16 | # and cleaned up. This can be used for all sorts of test resources, but mostly for S3 buckets 17 | # and SQS queues and the like. The allocator will ensure that the cleanup blocks are 18 | # all called in the reverse order they were called for the case there are resource 19 | # dependencies 20 | # 21 | # * each allocated resource is named in a unique and non ambiguous way 22 | # * that each process uses it's own numbering sequence for allocated resource names 23 | # * that resources are named with date and time in the name so if they are leaked you can see when they got created 24 | class ResourceAllocator 25 | def initialize(common_prefix: "wt-s3-signer-test") 26 | @ctr = 0 27 | @common_prefix = common_prefix 28 | @allocation_groups = [[]] 29 | @names = Set.new 30 | @test_time_str = Time.now.utc.strftime("%Y%m%d%H%M") 31 | @common = alphanumeric_seed(4) 32 | end 33 | 34 | def computed_prefix 35 | "#{@common_prefix}-#{@test_time_str}-#{@common}-...-..." 36 | end 37 | 38 | def resource_count 39 | @allocation_groups.map(&:length).inject(&:+).to_i 40 | end 41 | 42 | def push_alloc_group 43 | @allocation_groups << [] 44 | end 45 | 46 | def alphanumeric_seed(n_chars) 47 | alphabet = ('a'..'z').to_a + ('0'..'9').to_a 48 | n_chars.times.map { alphabet[SecureRandom.random_number(alphabet.length)] }.join 49 | end 50 | 51 | def alloc_resource_name 52 | loop do 53 | @ctr += 1 54 | salt = alphanumeric_seed(5) # even more collision prevention 55 | generated_name = "#{@common_prefix}-#{@test_time_str}-#{@common}-#{@ctr}-#{salt}" 56 | unless @names.include?(generated_name) 57 | @names << generated_name 58 | return generated_name 59 | end 60 | end 61 | end 62 | 63 | def create_sqs_queue_name_and_url 64 | name = alloc_resource_name 65 | 66 | client = Aws::SQS::Client.new 67 | resp = client.create_queue(queue_name: name) 68 | url = resp.queue_url 69 | 70 | cleanup_later(name: name, resource_type: :s3_bucket) do 71 | client = Aws::SQS::Client.new 72 | client.delete_queue(queue_url: url) rescue nil 73 | end 74 | 75 | [name, url] 76 | end 77 | 78 | def create_s3_bucket_and_name 79 | name = alloc_resource_name 80 | bucket_resource = Aws::S3::Bucket.new(name) 81 | bucket_resource.create 82 | cleanup_later(name: name, resource_type: :s3_bucket) do 83 | bucket_resource.delete! 84 | end 85 | [bucket_resource, name] 86 | end 87 | 88 | def cleanup_later(resource_type: :unknown, name: alloc_resource_name, &resource_cleanup) 89 | # Store the block for later, and return the name immediately 90 | @allocation_groups << [] unless @allocation_groups.any? 91 | @allocation_groups.last << [name, resource_type, resource_cleanup] 92 | name 93 | end 94 | 95 | def pop_alloc_group 96 | resources_to_remove = @allocation_groups.pop || [] 97 | resources_to_remove.reverse_each do |name, resource_type, cleanup_proc| 98 | cleanup_proc.call(name, resource_type) 99 | end 100 | end 101 | 102 | def cleanup_all 103 | pop_alloc_group while @allocation_groups.any? 104 | end 105 | 106 | def install_rspec_hooks!(config) 107 | this_allocator = self 108 | config.before :suite do 109 | this_allocator.push_alloc_group 110 | end 111 | 112 | config.before :all do 113 | this_allocator.push_alloc_group 114 | end 115 | 116 | config.after :all do 117 | this_allocator.pop_alloc_group 118 | end 119 | 120 | config.around :each do |example| 121 | this_allocator.push_alloc_group 122 | example.run 123 | this_allocator.pop_alloc_group 124 | end 125 | 126 | config.after :suite do 127 | this_allocator.cleanup_all 128 | end 129 | end 130 | end 131 | -------------------------------------------------------------------------------- /spec/url_signing_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | require 'net/http' 3 | 4 | shared_context 'signer_bucket' do 5 | let(:bucket) { AWS_ALLOCATOR.create_s3_bucket_and_name.first } 6 | let(:signer) { described_class.for_s3_bucket(bucket, expires_in: 173) } 7 | end 8 | 9 | describe WT::S3Signer do 10 | include_context 'signer_bucket' 11 | 12 | it 'WT::Signer is faster than Aws::S3::Presigner' do 13 | allow(WT::S3Signer).to receive(:create_bucket).and_return(bucket) 14 | 15 | bucket.object('dir/testobject').put(body: 'is here') 16 | 17 | # These values come from previous performance measurements ran on nu_backend 18 | expect { bucket.object('dir/testobject').presigned_url(:get, expires_in: 173) }.to perform_at_least(1000).ips 19 | expect { signer.presigned_get_url(object_key: 'dir/testobject') }.to perform_at_least(40_000).ips 20 | end 21 | 22 | it 'signs an s3 key' do 23 | allow(WT::S3Signer).to receive(:create_bucket).and_return(bucket) 24 | 25 | bucket.object('dir/testobject').put(body: 'is here') 26 | presigned_url = signer.presigned_get_url(object_key: 'dir/testobject') 27 | 28 | expect(presigned_url).to include("X-Amz-Expires=173") 29 | end 30 | 31 | it 'signs a valid s3 key' do 32 | allow(WT::S3Signer).to receive(:create_bucket).and_return(bucket) 33 | 34 | bucket.object('dir/testobject').put(body: 'is here') 35 | presigned_url = signer.presigned_get_url(object_key: 'dir/testobject') 36 | 37 | uri = URI(presigned_url) 38 | res = Net::HTTP.get_response(uri) 39 | 40 | expect(res.code).to eq("200") 41 | end 42 | 43 | it 'throws an exception if no key is used for signing' do 44 | expect{signer.presigned_get_url(object_key: '')}.to raise_error(ArgumentError) 45 | end 46 | 47 | describe '.for_s3_bucket' do 48 | it 'uses a singleton instance of s3 client' do 49 | allow(WT::S3Signer).to receive(:create_bucket).and_return(bucket) 50 | bucket.object('dir/testobject').put(body: 'is here') 51 | 52 | # If other tests run before, they might instantiate the singleton client, 53 | # so it's acceptable for Aws::S3::Client to not receive :new 54 | expect(Aws::S3::Client).to receive(:new).at_most(:once).and_call_original 55 | 56 | signer1 = described_class.for_s3_bucket(bucket, expires_in: 174) 57 | signer2 = described_class.for_s3_bucket(bucket, expires_in: 175) 58 | 59 | presigned_url1 = signer1.presigned_get_url(object_key: 'dir/testobject') 60 | presigned_url2 = signer2.presigned_get_url(object_key: 'dir/testobject') 61 | 62 | expect(presigned_url1).to include("X-Amz-Expires=174") 63 | expect(presigned_url2).to include("X-Amz-Expires=175") 64 | end 65 | 66 | it 'releases the singleton client when AWS raises an access denied error' do 67 | s3_client = Aws::S3::Client.new(stub_responses: true) 68 | described_class.client = s3_client 69 | 70 | s3_client.stub_responses(:get_object, body: 'is here') 71 | 72 | # just to set @client internally 73 | described_class.for_s3_bucket(bucket, expires_in: 174) 74 | 75 | # now, let's simulate an error on AWS 76 | s3_client.stub_responses( 77 | :get_bucket_location, 78 | Aws::S3::Errors::AccessDenied.new(_context = nil, _message = nil) 79 | ) 80 | 81 | # After the exercise, we are going to compare if the singleton client has 82 | # changed, so it's good to check if it is not null 83 | expect(described_class.client).to eq(s3_client) 84 | 85 | # exercise again 86 | expect do 87 | described_class.for_s3_bucket(bucket, expires_in: 174) 88 | end.to raise_error(Aws::S3::Errors::AccessDenied) 89 | 90 | expect(described_class.client).not_to be_nil 91 | expect(described_class.client).not_to eq(s3_client) 92 | end 93 | 94 | it 'releases the singleton client when AWS raises a missing credentials error' do 95 | s3_client = Aws::S3::Client.new(stub_responses: true) 96 | described_class.client = s3_client 97 | 98 | s3_client.stub_responses(:get_object, body: 'is here') 99 | 100 | # just to set @client internally 101 | described_class.for_s3_bucket(bucket, expires_in: 174) 102 | 103 | # now, let's simulate an error on AWS 104 | s3_client.stub_responses( 105 | :get_bucket_location, 106 | Aws::Errors::MissingCredentialsError.new(_context = nil, _message = nil) 107 | ) 108 | 109 | # After the exercise, we are going to compare if the singleton client has 110 | # changed, so it's good to check if it is not null 111 | expect(described_class.client).to eq(s3_client) 112 | 113 | # exercise again 114 | expect do 115 | described_class.for_s3_bucket(bucket, expires_in: 174) 116 | end.to raise_error(Aws::Errors::MissingCredentialsError) 117 | 118 | expect(described_class.client).not_to be_nil 119 | expect(described_class.client).not_to eq(s3_client) 120 | end 121 | end 122 | end 123 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to wt_s3_signer 2 | 3 | As the creators, and maintainers of this project, we're glad to share our projects and invite contributors to help us stay up to date. Please take a moment to review this document in order to make the contribution process easy and effective for everyone involved. 4 | 5 | Following these guidelines helps to communicate that you respect the time of the developers managing and developing this open source project. In return, they should reciprocate that respect in addressing your issue or assessing patches and features. 6 | 7 | In general, we expect you to follow our [Code of Conduct](CODE_OF_CONDUCT.md). 8 | 9 | ## Using the issue tracker for bug reports, feature requests and discussions 10 | 11 | ### First time contributors 12 | We should encourage first time contributors. A good inspiration on this can be found [here](http://www.firsttimersonly.com/). As pointed out: 13 | 14 | > If you are an OSS project owner, then consider marking a few open issues with the label first-timers-only. The first-timers-only label explicitly announces: 15 | 16 | > "I'm willing to hold your hand so you can make your first PR. This issue is rather a bit easier than normal. And anyone who’s already contributed to open source isn’t allowed to touch this one!" 17 | 18 | By labeling issues with this `first-timers-only` label we help first time contributors step up their game and start contributing. 19 | 20 | ### Bug reports 21 | 22 | A bug is a _demonstrable problem_ that is caused by the code in the repository. 23 | Good bug reports are extremely helpful - thank you! 24 | 25 | Guidelines for bug reports: 26 | 27 | 1. **Use the GitHub issue search** — check if the issue has already been 28 | reported. 29 | 30 | 2. **Check if the issue has been fixed** — try to reproduce it using the 31 | latest `master` or development branch in the repository. 32 | 33 | 3. **Isolate the problem** — provide clear steps to reproduce. 34 | 35 | A good bug report shouldn't leave others needing to chase you up for more 36 | information. Please try to be as detailed as possible in your report. What is 37 | your environment? What steps will reproduce the issue? What would you expect to be the outcome? All these details will help people to fix any potential bugs. 38 | 39 | Example: 40 | 41 | > Short and descriptive example bug report title 42 | > 43 | > A summary of the issue and the OS environment in which it occurs. If 44 | > suitable, include the steps required to reproduce the bug. 45 | > 46 | > 1. This is the first step 47 | > 2. This is the second step 48 | > 3. Further steps, etc. 49 | > 50 | > Any other information you want to share that is relevant to the issue being 51 | > reported. This might include the lines of code that you have identified as 52 | > causing the bug, and potential solutions (and your opinions on their 53 | > merits). 54 | 55 | ### Feature requests 56 | 57 | Feature requests are welcome. But take a moment to find out whether your idea 58 | fits with the scope and aims of the project. It's up to *you* to make a strong 59 | case to convince the project's developers of the merits of this feature. Please 60 | provide as much detail and context as possible. 61 | 62 | Do check if the feature request already exists. If it does, give it a thumbs-up emoji 63 | or even comment. We'd like to avoid duplicate requests. 64 | 65 | ### Pull requests 66 | 67 | Good pull requests - patches, improvements, new features - are a fantastic 68 | help. They should remain focused in scope and avoid containing unrelated 69 | commits. 70 | 71 | **Please ask first** before embarking on any significant pull request (e.g. 72 | implementing features, refactoring code, porting to a different language), 73 | otherwise you risk spending a lot of time working on something that the 74 | project's developers might not want to merge into the project. As far as _where_ to ask, 75 | the feature request or bug report is the best place to go. 76 | 77 | Please adhere to the coding conventions used throughout a project (indentation, 78 | accurate comments, etc.) and any other requirements (such as test coverage). 79 | 80 | Follow this process if you'd like your work considered for inclusion in the 81 | project: 82 | 83 | 1. [Fork](http://help.github.com/fork-a-repo/) the project, clone your fork, 84 | and configure the remotes: 85 | 86 | ```bash 87 | # Clone your fork of the repo into the current directory 88 | git clone git@github.com:WeTransfer/wt_s3_signer.git 89 | # Navigate to the newly cloned directory 90 | cd wt_s3_signer 91 | # Assign the original repo to a remote called "upstream" 92 | git remote add upstream git@github.com:WeTransfer/wt_s3_signer.git 93 | ``` 94 | 95 | 2. If you cloned a while ago, get the latest changes from upstream: 96 | 97 | ```bash 98 | git checkout 99 | git pull upstream 100 | ``` 101 | 102 | 3. Create a new topic branch (off the main project development branch) to 103 | contain your feature, change, or fix: 104 | 105 | ```bash 106 | git checkout -b 107 | ``` 108 | 109 | 4. Commit your changes in logical chunks. 110 | 111 | 5. Locally merge (or rebase) the upstream development branch into your topic branch: 112 | 113 | ```bash 114 | git pull [--rebase] upstream 115 | ``` 116 | 117 | 6. Push your topic branch up to your fork: 118 | 119 | ```bash 120 | git push origin 121 | ``` 122 | 123 | 7. [Open a Pull Request](https://help.github.com/articles/using-pull-requests/) 124 | with a clear title and description. 125 | 126 | ### Conventions of commit messages 127 | 128 | Adding features on repo 129 | 130 | ```bash 131 | git commit -m "feat: message about this feature" 132 | ``` 133 | 134 | Fixing features on repo 135 | 136 | ```bash 137 | git commit -m "fix: message about this update" 138 | ``` 139 | 140 | Removing features on repo 141 | 142 | ```bash 143 | git commit -m "refactor: message about this" -m "BREAKING CHANGE: message about the breaking change" 144 | ``` 145 | 146 | 147 | **IMPORTANT**: By submitting a patch, you agree to allow the project owner to 148 | license your work under the same license as that used by the project. 149 | 150 | ### Discussions 151 | 152 | We aim to keep all project discussion inside GitHub issues. This is to make sure valuable discussion is accessible via search. If you have questions about how to use the library, or how the project is running - GitHub issues are the goto tool for this project. 153 | 154 | #### Our expectations on you as a contributor 155 | 156 | We want contributors to provide ideas, keep the ship shipping and to take some of the load from others. It is non-obligatory; we’re here to get things done in an enjoyable way. 🎉 157 | 158 | The fact that you'll have push access will allow you to: 159 | 160 | - Avoid having to fork the project if you want to submit other pull requests as you'll be able to create branches directly on the project. 161 | - Help triage issues, merge pull requests. 162 | - Pick up the project if other maintainers move their focus elsewhere. 163 | -------------------------------------------------------------------------------- /lib/wt_s3_signer.rb: -------------------------------------------------------------------------------- 1 | require 'openssl' 2 | require 'digest' 3 | require 'cgi' 4 | 5 | # An accelerated version of the reference implementation ported 6 | # from Python, see here: 7 | # 8 | # https://docs.aws.amazon.com/general/latest/gr/sigv4-signed-request-examples.html 9 | # 10 | # The optimisation in comparison to the ref implementation 11 | # is that everything that can be computed once gets computed for the 12 | # first signature being generated, and then reused. This includes 13 | # the timestamp and everything derived from it, the signing key 14 | # and the query string (before the signature is computed). 15 | # 16 | # Note that this is specifically made for the cases where one needs 17 | # presigned URLs for multiple objects from the same bucket, with the same 18 | # expiry. Passing the expiry via the constructor, for instance, allows us 19 | # to cache more of the query string - saving even more time. 20 | module WT 21 | class S3Signer 22 | 23 | # Creates a new instance of WT::S3Signer for a given S3 bucket object. 24 | # This object can be created in the AWS SDK using `Aws::S3::Bucket.new(my_bucket_name)`. 25 | # The bucket object helps resolving the bucket endpoint URL, determining the bucket 26 | # region and so forth. 27 | # 28 | # @param bucket[Aws::S3::Bucket] the AWS bucket resource object 29 | # @param client[Aws::S3::Client] an instance AWS S3 Client. It's recommended 30 | # to cache it in the application to avoid having too many HTTP requests to 31 | # the AWS instance metadata endpoint 32 | # @param extra_attributes[Hash] any extra keyword arguments to pass to `S3Signer.new` 33 | # @return [WT::S3Signer] 34 | def self.for_s3_bucket(bucket, **extra_attributes) 35 | kwargs = {} 36 | 37 | kwargs[:bucket_endpoint_url] = bucket.url 38 | kwargs[:bucket_host] = URI.parse(bucket.url).host 39 | kwargs[:bucket_name] = bucket.name 40 | 41 | resp = client.get_bucket_location(bucket: bucket.name) 42 | aws_region = resp.data.location_constraint 43 | 44 | # us-east-1 is a special AWS region (the oldest) and one 45 | # of the specialties is that when you ask for the region 46 | # of a bucket you get an empty string back instead of the 47 | # actual name of the region. We need to compensate for that 48 | # because if our region name is empty our signature will _not_ 49 | # be accepted by S3 (but only for buckets in the us-east-1 region!) 50 | kwargs[:aws_region] = aws_region == "" ? "us-east-1" : aws_region 51 | 52 | credentials = client.config.credentials 53 | credentials = credentials.credentials if credentials.respond_to?(:credentials) 54 | kwargs[:access_key_id] = credentials.access_key_id 55 | kwargs[:secret_access_key] = credentials.secret_access_key 56 | kwargs[:session_token] = credentials.session_token 57 | 58 | new(**kwargs, **extra_attributes) 59 | rescue Aws::S3::Errors::AccessDenied, Aws::Errors::MissingCredentialsError 60 | # We noticed cases where errors related to AWS credentials started to happen suddenly. 61 | # We don't know the root cause yet, but what we can do is release the 62 | # @client instance because it contains a cache of credentials that in most cases 63 | # is no longer valid. 64 | @client = nil 65 | 66 | raise 67 | end 68 | 69 | # Creates a new instance of WT::S3Signer 70 | # 71 | # @param now[Time] The timestamp to use for the signature (the `expires_in` is also relative to that time) 72 | # @param expires_in[Integer] The number of seconds the URL will stay current from `now` 73 | # @param aws_region[String] The name of the AWS region. Also needs to be set to "us-east-1" for the respective region. 74 | # @param bucket_endpoint_url[String] The endpoint URL for the bucket (usually same as the bucket hostname as resolved by the SDK) 75 | # @param bucket_host[String] The bucket endpoint hostname (usually derived from the bucket endpoint URL) 76 | # @param bucket_name[String] The bucket name 77 | # @param access_key_id[String] The IAM access key ID 78 | # @param secret_access_key[String] The IAM secret access key 79 | # @param session_token[String,nil] The IAM session token if STS sessions are used 80 | def initialize(now: Time.now, expires_in:, aws_region:, bucket_endpoint_url:, bucket_host:, bucket_name:, access_key_id:, secret_access_key:, session_token:) 81 | @region = aws_region 82 | @service = "s3" 83 | 84 | @expires_in = expires_in 85 | @bucket_endpoint = bucket_endpoint_url 86 | @bucket_host = bucket_host 87 | @bucket_name = bucket_name 88 | @now = now.utc 89 | @secret_key = secret_access_key 90 | @access_key = access_key_id 91 | @session_token = session_token 92 | end 93 | 94 | # Creates a signed URL for the given S3 object key. 95 | # The URL is temporary and the expiration time is based on the 96 | # expires_in value on initialize 97 | # 98 | # @param object_key[String] The S3 key that needs a presigned url 99 | # 100 | # @raise [ArgumentError] Raises an ArgumentError if `object_key:` 101 | # is empty. 102 | # 103 | # @return [String] The signed url 104 | def presigned_get_url(object_key:) 105 | # Variables that do not change during consecutive calls to the 106 | # method are instance variables. This way they are not assigned 107 | # every single time and are cached 108 | if (object_key.nil? || object_key == "") 109 | raise ArgumentError, "object_key: must not be empty" 110 | end 111 | 112 | @datestamp ||= @now.strftime("%Y%m%d") 113 | @amz_date ||= @now.strftime("%Y%m%dT%H%M%SZ") 114 | 115 | # ------ TASK 1: Create the canonical request 116 | # -- Step 1: define the method 117 | @method ||= "GET" 118 | 119 | # -- Step 2: create canonical uri 120 | # The canonical URI (the URI path) is the only thing 121 | # that changes depending on the object key 122 | canonical_uri = "/" + object_key # Might need URL escaping (!) 123 | 124 | # -- Step 3: create the canonical headers 125 | @canonical_headers ||= "host:" + @bucket_host + "\n" 126 | @signed_headers ||= "host" 127 | 128 | # -- Step 4: create the canonical query string 129 | @algorithm ||= "AWS4-HMAC-SHA256" 130 | @credential_scope ||= @datestamp + "/" + @region + "/" + @service + "/" + "aws4_request" 131 | 132 | @canonical_querystring_template ||= begin 133 | [ 134 | "X-Amz-Algorithm=#{@algorithm}", 135 | "X-Amz-Credential=" + CGI.escape(@access_key + "/" + @credential_scope), 136 | "X-Amz-Date=" + @amz_date, 137 | "X-Amz-Expires=%d" % @expires_in, 138 | # ------- When using STS we also need to add the security token 139 | ("X-Amz-Security-Token=" + CGI.escape(@session_token) if @session_token), 140 | "X-Amz-SignedHeaders=" + @signed_headers, 141 | ].compact.join('&') 142 | end 143 | 144 | # -- Step 5: create payload 145 | @payload ||= "UNSIGNED-PAYLOAD" 146 | 147 | # -- Step 6: combine elements to create the canonical request 148 | canonical_request = [ 149 | @method, 150 | canonical_uri, 151 | @canonical_querystring_template, 152 | @canonical_headers, 153 | @signed_headers, 154 | @payload 155 | ].join("\n") 156 | 157 | # ------ TASK 2: Create a String to sign 158 | string_to_sign = [ 159 | @algorithm, 160 | @amz_date, 161 | @credential_scope, 162 | Digest::SHA256.hexdigest(canonical_request) 163 | ].join("\n") 164 | 165 | # ------ TASK 3: Calculate the signature 166 | @signing_key ||= derive_signing_key(@secret_key, @datestamp, @region, @service) 167 | signature = OpenSSL::HMAC.hexdigest("SHA256", @signing_key, string_to_sign) 168 | 169 | # ------ TASK 4: Add signing information to the request 170 | qs_with_signature = @canonical_querystring_template + "&X-Amz-Signature=" + signature 171 | 172 | @bucket_endpoint + canonical_uri + "?" + qs_with_signature 173 | end 174 | 175 | # AWS gems have a mechanism to cache credentials internally. So take 176 | # advantage of this, it's necessary to use the same client instance. 177 | def self.client 178 | @client ||= Aws::S3::Client.new( 179 | # The default value is 0. If the metadata service fails to respond, it 180 | # will raise missing credentials when used 181 | instance_profile_credentials_retries: 5, 182 | ) 183 | end 184 | 185 | def self.client=(client) 186 | @client = client 187 | end 188 | 189 | private 190 | 191 | def derive_signing_key(key, datestamp, region, service) 192 | prefixed_key = "AWS4" + key 193 | k_date = hmac_bytes(prefixed_key, datestamp) 194 | k_region = hmac_bytes(k_date, region) 195 | k_service = hmac_bytes(k_region, service) 196 | hmac_bytes(k_service, "aws4_request") 197 | end 198 | 199 | def hmac_bytes(key, data) 200 | OpenSSL::HMAC.digest("SHA256", key, data) 201 | end 202 | end 203 | end 204 | --------------------------------------------------------------------------------