├── .gitignore ├── .kitchen.yml ├── Berksfile ├── CHANGELOG.md ├── MIT-LICENSE.txt ├── README.md ├── attributes └── default.rb ├── bin └── s3_crypto ├── chefignore ├── libraries ├── matchers.rb └── s3_file.rb ├── metadata.rb ├── providers └── default.rb ├── recipes ├── default.rb └── dependencies.rb ├── resources └── default.rb └── test ├── fixtures └── cookbooks │ └── s3_file_test │ ├── README.md │ ├── metadata.rb │ └── recipes │ └── default.rb └── integration └── default └── serverspec ├── default_spec.rb └── spec_helper.rb /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .kitchen.local.yml 3 | .kitchen 4 | .s3.yml 5 | Berksfile.lock 6 | *.iml 7 | -------------------------------------------------------------------------------- /.kitchen.yml: -------------------------------------------------------------------------------- 1 | --- 2 | driver: 3 | name: vagrant 4 | 5 | provisioner: 6 | name: chef_solo 7 | <% s3 = YAML.load_file('.s3.yml') %> 8 | platforms: 9 | <% %w(11.16.4 12.0.1).each do |chef_version| 10 | %w(ubuntu-12.04 ubuntu-14.04 centos-6.6 centos-7.0).each do |platform| 11 | %> 12 | - name: <%= platform %> 13 | driver_config: 14 | require_chef_omnibus: <%= chef_version %> 15 | <% end 16 | end %> 17 | suites: 18 | - name: default 19 | run_list: 20 | - recipe[s3_file_test::default] 21 | attributes: 22 | s3_file_test: 23 | file: <%= s3['file'] %> 24 | bucket: <%= s3['bucket'] %> 25 | region: <%= s3['region'] %> 26 | access_key: <%= s3['access_key'] %> 27 | secret_key: <%= s3['secret_key'] %> 28 | 29 | -------------------------------------------------------------------------------- /Berksfile: -------------------------------------------------------------------------------- 1 | source 'https://supermarket.chef.io' 2 | 3 | metadata 4 | 5 | cookbook 's3_file_test', path: 'test/fixtures/cookbooks/s3_file_test' 6 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 2015-06-03 version 2.5.4 2 | ====================== 3 | * Adds version constraint on rest-client to 1.7.3. 4 | * Adds a cookbook attribute for overwriting the rest-client gem version. 5 | 6 | 2015-03-20 version 2.5.3 7 | ======================== 8 | * Fix deprecated digest call. 9 | * Merged https://github.com/adamsb6/s3_file/pull/41. README enhancements. @eherot 10 | * Merged https://github.com/adamsb6/s3_file/pull/43. Performance fix for rest client install. @scottymarshall 11 | 12 | *version 2.5.2* 13 | 14 | * Add retries for downloads 15 | 16 | 2014-12-09 version 2.5.1 17 | ======================== 18 | * Merged https://github.com/adamsb6/s3_file/pull/36. Fix compatibility with Chef 12. 19 | 20 | 2014-10-01 version 2.5.0 21 | ========================= 22 | * Merged https://github.com/adamsb6/s3_file/pull/31. This provides an optional s3_url value for a recipe to use S3 buckets other than US based ones. 23 | * Merged https://github.com/adamsb6/s3_file/pull/29. Add ChefSpec matcher for testing. 24 | 25 | 2014-04-17 version 2.4.0 26 | ========================= 27 | * Merged pull request https://github.com/adamsb6/s3_file/pull/25. This provides new functionality to automatically decrypt an encrypted file uploaded to S3. 28 | 29 | 2014-03-18 version 2.3.3 30 | ========================= 31 | * Merged pull request https://github.com/adamsb6/s3_file/pull/24. This corrects documentation for use of X-Amz-Meta-Digest to identify md5 in multi-part uploads. 32 | 33 | 2014-02-20 version 2.3.2 34 | ========================= 35 | * Added documentation for multi-part ETag/MD5 issue. 36 | * Added changelog, backdated to 2014-02-14. 37 | 38 | 2014-02-14 version 2.3.1 39 | ========================= 40 | * Merged pull request https://github.com/adamsb6/s3_file/pull/22. This fixes an issue in which an :immediately arg to notify would trigger the notified resource before file permissions had been set. 41 | -------------------------------------------------------------------------------- /MIT-LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2012-2013 Brandon Adams and other contributors 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DESCRIPTION 2 | An LWRP that can be used to fetch files from S3. 3 | 4 | I created this LWRP to solve the chicken-and-egg problem of fetching files from S3 on the first Chef run on a newly provisioned machine. Ruby libraries that are installed on that first run are not available to Chef during the run, so I couldn't use a library like Fog to get what I needed from S3. 5 | 6 | This LWRP has no dependencies beyond the Ruby standard library, so it can be used on the first run of Chef. 7 | 8 | # REQUIREMENTS 9 | An Amazon Web Services account and something in S3 to fetch. 10 | 11 | Multi-part S3 uploads do not put the MD5 of the content in the ETag header. If x-amz-meta-digest is provided in User-Defined Metadata on the S3 Object it is processed as if it were a Digest header (RFC 3230). 12 | 13 | The MD5 of the local file will be checked against the MD5 from x-amz-meta-digest if it is present. If not it will check against the ETag. If there is no match or the local file is absent it will be downloaded. 14 | 15 | By default, a catalog file in Chef's cache path will be kept for all downloaded files tracking their etag and md5 at time of download. If either of these don't match, the file will be downloaded. To disable this behavior, set `node['s3_file']['use_catalog']` to `false`. 16 | 17 | If credentials are not provided, s3_file will attempt to use the first instance profile associated with the instance. See documentation at http://docs.aws.amazon.com/IAM/latest/UserGuide/instance-profiles.html for more on instance profiles. 18 | 19 | # USAGE 20 | s3_file acts like other file resources. The only supported action is :create, which is the default. 21 | 22 | Attribute Parameters: 23 | 24 | * `aws_access_key_id` - your AWS access key id. (optional) 25 | * `aws_secret_access_key` - your AWS secret access key. (optional) 26 | * `token` - token used for temporary IAM credentials. (optional) 27 | * `bucket` - the bucket to pull from. 28 | * `s3_url` - Custom S3 URL. If specified this URL *must* include the bucket name at the end. (optional) 29 | * `public_bucket` - Set to true if the bucket is public, defaults to false (optional) 30 | * `remote_path` - the S3 key to pull. 31 | * `owner` - the owner of the file. (optional) 32 | * `group` - the group owner of the file. (optional) 33 | * `mode` - the octal mode of the file. (optional) 34 | * `decryption_key` - the 32 character SHA256 key used to encrypt your S3 file. (optional) 35 | 36 | Example: 37 | 38 | s3_file "/tmp/somefile" do 39 | remote_path "/my/s3/key" 40 | bucket "my-s3-bucket" 41 | aws_access_key_id "mykeyid" 42 | aws_secret_access_key "mykey" 43 | s3_url "https://s3.amazonaws.com/bucket" 44 | owner "me" 45 | group "mygroup" 46 | mode "0644" 47 | action :create 48 | decryption_key "my SHA256 digest key" 49 | decrypted_file_checksum "SHA256 hex digest of decrypted file" 50 | end 51 | 52 | # MD5 and Multi-Part Upload 53 | s3_file compares the MD5 hash of a local file, if present, and the ETag header of the S3 object. If they do not match, then the remote object will be downloaded and notifiations will be fired. 54 | 55 | In most cases, the ETag of an S3 object will be identical to its MD5 hash. However, if the file was uploaded to S3 via multi-part upload, then the ETag will be set to the MD5 hash of the first uploaded part. In these cases, MD5 of the local file and remote object will never match. 56 | 57 | To work around this issue, set an X-Amz-Meta-Digest tag on your S3 object with value set to `md5=MD5 of the entire object`. s3_file will then use that value in place of the ETag value, and will skip downloading in case the MD5 of the local file matches the value of the X-Amz-Meta-Digest header. 58 | 59 | # USING ENCRYPTED S3 FILES 60 | s3_file can decrypt files that have been encrypted using an AES-256-CBC cipher. To use the decryption part of the resource, you must provide a decryption_key which can be generated by following the instructions below. You can also include an optional decrypted_file_checksum which allows Chef to check to see if it needs to redownload the encrypted file. Note that this checksum is different from the one in S3 because the file you compare to is already decrypted so a SHA256 checksum is used instead of the MD5. Instructions to generate the decrypted_file_checksum are below as well. 61 | 62 | To use s3_file with encrypted files: 63 | 64 | 1. Create a new key using `bin/s3_crypto -g > my_new_key`. 65 | 1. Create a SHA256 hex digest checsksum of your source file by calling `bin/s3_crypto -c -i my_source_file [ -o my_checksum_file ]`. 66 | 1. Encrypt your file using the new key by calling `bin/s3_crypto -e -k my_new_key -i my_source_file [ -o my_destination_file ]`. 67 | 1. You can test decryption of your file using `bin/s3_crypto -d -k my_new_key -i my_encoded_file [ -o my_decoded_destionation ]`. 68 | 1. Upload your encrypted file to S3 as normal. 69 | 1. In the s3_file resource call, provide the string within `my_new_key` as the decryption_key of the resource. 70 | 1. In the s3_file resource call, provide the string within `my_checksum_file` as the decrypted_file_checksum of the resource. 71 | 72 | Note that when you make the s3_file call, it is best if you make decryption_key a node property and provide it via an encrypted databag or pull the key from the environment. It is not wise to check in your decryption key to your recipe. 73 | 74 | To create your cipher, run `bin/s3_crypto -g > my_new_key` and a new 256-bit (32 hexidecimal characters) will be generated for you. Paste that key into a file for later use. DO NOT include an endline in the file otherwise the encryption and decryption will fail. 75 | 76 | Try `bin/s3_crypto -g > my_new_key`. 77 | 78 | You can use the utility `bin/s3_crypto` to encrypt files prior to uploading to S3 and to decrypt files prior to make sure the encryption is working. 79 | 80 | # ChefSpec matcher 81 | s3_file comes with a matcher to use in [ChefSpec](https://github.com/sethvargo/chefspec). 82 | 83 | This spec checks the code from the USAGE example above: 84 | 85 | it 'downloads some file from s3' do 86 | expect(chef_run).to create_s3_file('/tmp/somefile') 87 | .with(bucket: "my-s3-bucket", remote_path: "/my/s3/key") 88 | end 89 | 90 | # Testing 91 | 92 | This cookbook has Test Kitchen integration tests. To test, create a .s3.yml file with the following S3 details. 93 | 94 | file: file 95 | bucket: bucket 96 | region: xx-xxxx-x 97 | access_key: XXXXXXXXXXXXXXXXXXXX 98 | secret_key: XXXXXXXXXXXXXXXXXXXX 99 | 100 | If you're using the ChefDK then type `chef exec kitchen test`, otherwise `kitchen test`. 101 | -------------------------------------------------------------------------------- /attributes/default.rb: -------------------------------------------------------------------------------- 1 | default['s3_file']['mime-types']['version'] = '2.6.2' 2 | default['s3_file']['rest-client']['version'] = '1.7.3' 3 | 4 | # Keep a catalog of each downloaded file's etag and md5 at time of download. 5 | default['s3_file']['use_catalog'] = true 6 | -------------------------------------------------------------------------------- /bin/s3_crypto: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require "openssl" 4 | require "securerandom" 5 | require "digest" 6 | require "optparse" 7 | require "tempfile" 8 | 9 | BLOCKSIZE_TO_READ = 1024 * 1000 10 | 11 | options = {} 12 | optparse = OptionParser.new do |opts| 13 | opts.banner = "Usage: encode_file [options]" 14 | opts.on("-k", "--key_name [PATH]", String, "Encryption Key Path") do |k| 15 | options[:key_name] = k 16 | end 17 | opts.on("-i", "--input_file [PATH]", String, "Path to target file for encryption.") do |i| 18 | options[:input_file] = i 19 | end 20 | opts.on("-o", "--output_file [PATH]", String, "Destination path for encrypted file.") do |o| 21 | options[:output_file] = o 22 | end 23 | opts.on("-e", "--encryption_mode", "Toggle encryption mode.") do 24 | options[:encryption_mode] = true 25 | end 26 | opts.on("-d", "--decryption_mode", "Toggle decryption mode.") do 27 | options[:decryption_mode] = true 28 | end 29 | opts.on("-c", "--checksum_mode", "Toggle checksum mode (SHA256).") do 30 | options[:checksum_mode] = true 31 | end 32 | opts.on("-g", "--generate_key", "Generate a new 256bit key.") do 33 | options[:generate_key] = true 34 | end 35 | opts.on('-h', '--help', 'Display this screen') do 36 | puts opts 37 | exit 38 | end 39 | end 40 | 41 | begin 42 | optparse.parse! 43 | exclusive = [:encryption_mode, :decryption_mode, :checksum_mode, :generate_key] 44 | toggle = exclusive.select{ |param| options[param].nil? } 45 | if toggle.empty? and toggle.length != 3 46 | puts "Options cannot be used together: #{toggle.join(', ')}" 47 | puts optparse 48 | exit 49 | elsif toggle.length == exclusive.length 50 | puts "One of the following options must be used: #{toggle.join(', ')}" 51 | puts optparse 52 | exit 53 | end 54 | unless ([:encryption_mode, :decryption_mode] & options.keys).empty? 55 | manditory = [:key_name, :input_file] 56 | missing = manditory.select{ |param| options[param].nil? } 57 | unless missing.empty? 58 | puts "Missing options: #{missing.join(', ')}" 59 | puts optparse 60 | exit 61 | end 62 | end 63 | rescue OptionParser::InvalidOption, OptionParser::MissingArgument 64 | puts $!.to_s 65 | puts optparse 66 | exit 67 | end 68 | 69 | def aes256_encrypt(key, file_path, output_path) 70 | key = Digest::SHA256.digest(key) if(key.kind_of?(String) && 32 != key.bytesize) 71 | aes = OpenSSL::Cipher.new('AES-256-CBC') 72 | aes.encrypt 73 | aes.key = key 74 | if output_path.nil? 75 | File.open(file_path, "rb") do |fi| 76 | while buffer = fi.read(BLOCKSIZE_TO_READ) 77 | $stdout.write aes.update(buffer) 78 | end 79 | $stdout.write aes.final 80 | end 81 | else 82 | encrypt_file = Tempfile.new("encode") 83 | File.open(encrypt_file, "wb") do |ef| 84 | File.open(file_path, "rb") do |fi| 85 | while buffer = fi.read(BLOCKSIZE_TO_READ) 86 | ef.write aes.update(buffer) 87 | end 88 | end 89 | ef.write aes.final 90 | end 91 | ::FileUtils.mv(encrypt_file.path, output_path) 92 | end 93 | end 94 | 95 | def aes256_decrypt(key, file_path, output_path) 96 | key = Digest::SHA256.digest(key) if(key.kind_of?(String) && 32 != key.bytesize) 97 | aes = OpenSSL::Cipher.new('AES-256-CBC') 98 | aes.decrypt 99 | aes.key = key 100 | if output_path.nil? 101 | File.open(file_path, "rb") do |fi| 102 | while buffer = fi.read(BLOCKSIZE_TO_READ) 103 | $stdout.write aes.update(buffer) 104 | end 105 | end 106 | $stdout.write aes.final 107 | else 108 | decrypt_file = Tempfile.new("decode") 109 | File.open(decrypt_file, "wb") do |df| 110 | File.open(file_path, "rb") do |fi| 111 | while buffer = fi.read(BLOCKSIZE_TO_READ) 112 | df.write aes.update(buffer) 113 | end 114 | end 115 | df.write aes.final 116 | end 117 | ::FileUtils.mv(decrypt_file.path, output_path) 118 | end 119 | end 120 | 121 | def random_sha256(output_path) 122 | random_key = SecureRandom.hex(32) 123 | if output_path.nil? 124 | $stdout.write random_key 125 | else 126 | key_out = Tempfile.new("keygen") 127 | File.open(key_out, "wb") do |df| 128 | df.write random_key 129 | end 130 | ::FileUtils.mv(key_out.path, output_path) 131 | end 132 | end 133 | 134 | def read_key(key_path) 135 | (File.read(key_path)).strip 136 | end 137 | 138 | def sha256_digest(file_path, output_path) 139 | sha256 = Digest::SHA256.new 140 | 141 | File.open(file_path, "rb") do |fi| 142 | while buffer = fi.read(BLOCKSIZE_TO_READ) 143 | sha256.update buffer 144 | end 145 | end 146 | 147 | if output_path.nil? 148 | $stdout.write(sha256) 149 | else 150 | digest_file = Tempfile.new("digest") 151 | File.open(digest_file, "wb") do |df| 152 | df.write sha256 153 | end 154 | ::FileUtils.mv(digest_file.path, output_path) 155 | end 156 | end 157 | 158 | if options[:encryption_mode] 159 | key = read_key(options[:key_name]) 160 | if options[:output_file] 161 | aes256_encrypt(key,options[:input_file], options[:output_file]) 162 | else 163 | aes256_encrypt(key,options[:input_file], nil) 164 | end 165 | end 166 | if options[:decryption_mode] 167 | key = read_key(options[:key_name]) 168 | if options[:output_file] 169 | aes256_decrypt(key, options[:input_file], options[:output_file]) 170 | else 171 | aes256_decrypt(key, options[:input_file], nil) 172 | end 173 | end 174 | 175 | if options[:checksum_mode] 176 | if options[:destination_file_path] 177 | sha256_digest(options[:input_file], options[:destination_file_path]) 178 | else 179 | sha256_digest(options[:input_file], nil) 180 | end 181 | end 182 | 183 | if options[:generate_key] 184 | if options[:destination_file_path] 185 | random_sha256(options[:destination_file_path]) 186 | else 187 | random_sha256(nil) 188 | end 189 | end 190 | -------------------------------------------------------------------------------- /chefignore: -------------------------------------------------------------------------------- 1 | *file 2 | *file.lock 3 | bin/* 4 | bundle 5 | chefignore 6 | spec 7 | test/* 8 | vendor 9 | .git 10 | .* 11 | dev.* 12 | *_dev 13 | -------------------------------------------------------------------------------- /libraries/matchers.rb: -------------------------------------------------------------------------------- 1 | if defined?(ChefSpec) 2 | ChefSpec.define_matcher :s3_file 3 | 4 | def create_s3_file(path) 5 | ChefSpec::Matchers::ResourceMatcher.new(:s3_file, :create, path) 6 | end 7 | end 8 | -------------------------------------------------------------------------------- /libraries/s3_file.rb: -------------------------------------------------------------------------------- 1 | require 'time' 2 | require 'openssl' 3 | require 'base64' 4 | 5 | module S3FileLib 6 | 7 | module SigV2 8 | def self.sign(request, bucket, path, *args) 9 | token = args[2] if args[2] 10 | now = Time.now().utc.strftime('%a, %d %b %Y %H:%M:%S GMT') 11 | string_to_sign = "#{request.method}\n\n\n%s\n" % [now] 12 | 13 | string_to_sign += "x-amz-security-token:#{token}\n" if token 14 | 15 | string_to_sign += "/%s%s" % [bucket,path] 16 | 17 | digest = OpenSSL::Digest.new('sha1') 18 | signed = OpenSSL::HMAC.digest(digest, args[1], string_to_sign) 19 | signed_base64 = Base64.encode64(signed) 20 | 21 | auth_string = 'AWS %s:%s' % [args[0], signed_base64] 22 | 23 | request["date"] = now 24 | request["authorization"] = auth_string.strip 25 | request["x-amz-security-token"] = token if token 26 | request 27 | end 28 | end 29 | 30 | module SigV4 31 | def self.sigv4(string_to_sign, aws_secret_access_key, region, date, serviceName) 32 | k_date = OpenSSL::HMAC.digest("sha256", "AWS4" + aws_secret_access_key, date) 33 | k_region = OpenSSL::HMAC.digest("sha256", k_date, region) 34 | k_service = OpenSSL::HMAC.digest("sha256", k_region, serviceName) 35 | k_signing = OpenSSL::HMAC.digest("sha256", k_service, "aws4_request") 36 | 37 | OpenSSL::HMAC.hexdigest("sha256", k_signing, string_to_sign) 38 | end 39 | 40 | def self.sign(request, params, *args) 41 | token = args[3] if args[3] 42 | url = URI.parse(params[:url]) 43 | content = request.body || "" 44 | 45 | algorithm = "AWS4-HMAC-SHA256" 46 | service = "s3" 47 | now = Time.now.utc 48 | time = now.strftime("%Y%m%dT%H%M%SZ") 49 | date = now.strftime("%Y%m%d") 50 | 51 | body_digest = Digest::SHA256.hexdigest(content) 52 | 53 | request["date"] = now 54 | request["host"] = url.host 55 | request["x-amz-date"] = time 56 | request["x-amz-security-token"] = token if token 57 | request["x-amz-content-sha256"] = body_digest 58 | 59 | canonical_query_string = url.query || "" 60 | canonical_headers = request.each_header.sort.map { |k, v| "#{k.downcase}:#{v.gsub(/\s+/, ' ').strip}" }.join("\n") + "\n" # needs extra newline at end 61 | signed_headers = request.each_name.map(&:downcase).sort.join(";") 62 | 63 | canonical_request = [request.method, url.path, canonical_query_string, canonical_headers, signed_headers, body_digest].join("\n") 64 | scope = format("%s/%s/%s/%s", date, args[0], service, "aws4_request") 65 | credential = [args[1], scope].join("/") 66 | 67 | string_to_sign = "#{algorithm}\n#{time}\n#{scope}\n#{Digest::SHA256.hexdigest(canonical_request)}" 68 | signed_hex = sigv4(string_to_sign, args[2], args[0], date, service) 69 | auth_string = "#{algorithm} Credential=#{credential}, SignedHeaders=#{signed_headers}, Signature=#{signed_hex}" 70 | 71 | request["Authorization"] = auth_string 72 | request 73 | end 74 | end 75 | 76 | BLOCKSIZE_TO_READ = 1024 * 1000 unless const_defined?(:BLOCKSIZE_TO_READ) 77 | 78 | def self.with_region_detect(region = nil) 79 | yield(region) 80 | rescue client::BadRequest => e 81 | if region.nil? 82 | region = e.response.headers[:x_amz_region] 83 | raise if region.nil? 84 | yield(region) 85 | else 86 | raise 87 | end 88 | end 89 | 90 | def self.do_request(method, url, bucket, path, *args, public_bucket: public_bucket) 91 | region = args[3] 92 | url = build_endpoint_url(bucket, region) if url.nil? 93 | 94 | with_region_detect(region) do |real_region| 95 | client.reset_before_execution_procs 96 | client.add_before_execution_proc do |request, params| 97 | if !public_bucket 98 | if real_region.nil? 99 | SigV2.sign(request, bucket, path, args[0], args[1], args[2]) 100 | else 101 | SigV4.sign(request, params, real_region, args[0], args[1], args[2]) 102 | end 103 | end 104 | end 105 | client::Request.execute(:method => method, :url => "#{url}#{path}", :raw_response => true) 106 | end 107 | end 108 | 109 | def self.build_endpoint_url(bucket, region) 110 | endpoint = if region && region != "us-east-1" 111 | "s3-#{region}.amazonaws.com" 112 | else 113 | "s3.amazonaws.com" 114 | end 115 | 116 | if bucket =~ /^[a-z0-9][a-z0-9-]+[a-z0-9]$/ 117 | "https://#{bucket}.#{endpoint}" 118 | else 119 | "https://#{endpoint}/#{bucket}" 120 | end 121 | end 122 | 123 | def self.get_md5_from_s3(bucket, url, path, *args, public_bucket: nil) 124 | if public_bucket 125 | get_digests_from_s3(bucket, url, path, public_bucket: public_bucket)["md5"] 126 | else 127 | get_digests_from_s3(bucket, url, path, args[0], args[1], args[2], args[3], public_bucket: public_bucket)["md5"] 128 | end 129 | end 130 | 131 | def self.get_digests_from_headers(headers) 132 | etag = headers[:etag].gsub('"','') 133 | digest = headers[:x_amz_meta_digest] 134 | digests = digest.nil? ? {} : Hash[digest.split(",").map {|a| a.split("=")}] 135 | return {"md5" => etag}.merge(digests) 136 | end 137 | 138 | def self.get_digests_from_s3(bucket, url, path, *args, timeout: 300,open_timeout: 10, retries: 5, public_bucket: public_bucket) 139 | now, auth_string = get_s3_auth("HEAD", bucket, path, args[1], args[2], args[3]) 140 | max_tries = retries + 1 141 | headers = build_headers(now, auth_string, token) 142 | saved_exception = nil 143 | 144 | while (max_tries > 0) 145 | begin 146 | 147 | response = RestClient.head('https://%s.s3.amazonaws.com%s' % [bucket,path], headers) 148 | 149 | etag = response.headers[:etag].gsub('"','') 150 | digest = response.headers[:x_amz_meta_digest] 151 | digests = digest.nil? ? {} : Hash[digest.split(",").map {|a| a.split("=")}] 152 | 153 | return {"md5" => etag}.merge(digests) 154 | 155 | rescue => e 156 | max_tries = max_tries - 1 157 | saved_exception = e 158 | end 159 | end 160 | raise saved_exception 161 | end 162 | 163 | def self.validate_download_checksum(response) 164 | # Default to not checking md5 sum of downloaded objects 165 | # per http://docs.aws.amazon.com/AmazonS3/latest/API/RESTCommonResponseHeaders.html 166 | # If an object is created by either the Multipart Upload or Part Copy operation, 167 | # the ETag is not an MD5 digest, regardless of the method of encryption 168 | # however, if present, x-amz-meta-digest will contain the digest, so 169 | # try if we see enough information and verify_md5 is set. 170 | if response.headers[:x_amz_meta_digest] 171 | return self.verify_md5_checksum(response.headers[:x_amz_meta_digest_md5].gsub('"',''), response.file.path) 172 | else 173 | server_side_encryption_customer_algorithm = response.headers[:x_amz_server_side_encryption_customer_algorithm] 174 | server_side_encryption = response.headers[:x_amz_server_side_encryption] 175 | if server_side_encryption_customer_algorithm.nil? and server_side_encryption != "aws:kms" 176 | return self.verify_md5_checksum(response.headers[:etag].gsub('"',''), response.file.path) 177 | else 178 | # If we do not have the x-amz-meta-digest-md5 header, we 179 | # cannot validate objects encrypted with SSE-C or SSE-KMS, 180 | # because the ETag will not be the MD5 digest. Assume it is 181 | # valid in those cases. 182 | return true 183 | end 184 | end 185 | end 186 | 187 | 188 | def self.get_from_s3(bucket, url, path, aws_access_key_id, aws_secret_access_key, token, public_bucket: public_bucket, verify_md5: false, region: nil) 189 | response = nil 190 | retries = 5 191 | for attempts in 0..retries 192 | begin 193 | if public_bucket 194 | response = do_request("GET", url, bucket, path, public_bucket: public_bucket) 195 | else 196 | response = do_request("GET", url, bucket, path, args[0], args[1], args[2], args[3], public_bucket: public_bucket) 197 | end 198 | # check the length of the downloaded object, 199 | # make sure we didn't get nailed by 200 | # a quirk in Net::HTTP class from the Ruby standard library. 201 | # Net::HTTP has the behavior (and I would call this a bug) that if the 202 | # connection gets reset in the middle of transferring the response, 203 | # it silently truncates the response back to the caller without throwing an exception. 204 | # ** See https://github.com/ruby/ruby/blob/trunk/lib/net/http/response.rb#L291 205 | # and https://github.com/ruby/ruby/blob/trunk/lib/net/protocol.rb#L99 . 206 | # It attempts to read up to Content-Length worth of bytes, but if hits an early EOF, 207 | # it just returns without throwing an exception (the ignore_eof flag). 208 | 209 | length = response.headers[:content_length].to_i() 210 | if not length.nil? and response.file.size() != length 211 | raise "Downloaded object size (#{response.file.size()}) does not match expected content_length (#{length})" 212 | end 213 | 214 | # default to not checking md5 sum of downloaded objects 215 | # per http://docs.aws.amazon.com/AmazonS3/latest/API/RESTCommonResponseHeaders.html 216 | # If an object is created by either the Multipart Upload or Part Copy operation, 217 | # the ETag is not an MD5 digest, regardless of the method of encryption 218 | # however, if present, x-amz-meta-digest will contain the digest, so 219 | # try if we see enough information and verify_md5 is set. 220 | if verify_md5 221 | if not self.validate_download_checksum(response) 222 | raise "Downloaded object has an md5sum which differs from the expected value provided by S3" 223 | end 224 | end 225 | 226 | return response 227 | # break 228 | rescue client::MovedPermanently, client::Found, client::TemporaryRedirect => e 229 | uri = URI.parse(e.response.header['location']) 230 | path = uri.path 231 | uri.path = "" 232 | url = uri.to_s 233 | retry 234 | rescue => e 235 | error = e.respond_to?(:response) ? e.response : e 236 | if attempts < retries 237 | Chef::Log.warn(error) 238 | sleep 5 239 | next 240 | else 241 | Chef::Log.fatal(error) 242 | raise e 243 | end 244 | raise e 245 | end 246 | end 247 | end 248 | 249 | def self.get_s3_auth(method, bucket,path,aws_access_key_id,aws_secret_access_key, token) 250 | now = Time.now().utc.strftime('%a, %d %b %Y %H:%M:%S GMT') 251 | string_to_sign = "#{method}\n\n\n%s\n" % [now] 252 | 253 | if token 254 | string_to_sign += "x-amz-security-token:#{token}\n" 255 | end 256 | 257 | string_to_sign += "/%s%s" % [bucket,path] 258 | 259 | digest = digest = OpenSSL::Digest::Digest.new('sha1') 260 | signed = OpenSSL::HMAC.digest(digest, aws_secret_access_key, string_to_sign) 261 | signed_base64 = Base64.encode64(signed) 262 | 263 | auth_string = 'AWS %s:%s' % [aws_access_key_id,signed_base64] 264 | 265 | [now,auth_string] 266 | end 267 | 268 | def self.aes256_decrypt(key, file) 269 | Chef::Log.debug("Decrypting S3 file.") 270 | key = key.strip 271 | require "digest" 272 | key = Digest::SHA256.digest(key) if(key.kind_of?(String) && 32 != key.bytesize) 273 | aes = OpenSSL::Cipher.new('AES-256-CBC') 274 | aes.decrypt 275 | aes.key = key 276 | decrypt_file = Tempfile.new("chef-s3-decrypt") 277 | File.open(decrypt_file, "wb") do |df| 278 | File.open(file, "rb") do |fi| 279 | while buffer = fi.read(BLOCKSIZE_TO_READ) 280 | df.write aes.update(buffer) 281 | end 282 | end 283 | df.write aes.final 284 | end 285 | decrypt_file 286 | end 287 | 288 | def self.verify_sha256_checksum(checksum, file) 289 | recipe_sha256 = checksum 290 | local_sha256 = Digest::SHA256.new 291 | 292 | File.open(file, "rb") do |fi| 293 | while buffer = fi.read(BLOCKSIZE_TO_READ) 294 | local_sha256.update buffer 295 | end 296 | end 297 | 298 | Chef::Log.debug "sha256 provided #{recipe_sha256}" 299 | Chef::Log.debug "sha256 of local object is #{local_sha256.hexdigest}" 300 | 301 | local_sha256.hexdigest == recipe_sha256 302 | end 303 | 304 | def self.verify_md5_checksum(checksum, file) 305 | s3_md5 = checksum 306 | local_md5 = buffered_md5_checksum(file) 307 | 308 | Chef::Log.debug "md5 of remote object is #{s3_md5}" 309 | Chef::Log.debug "md5 of local object is #{local_md5.hexdigest}" 310 | 311 | local_md5.hexdigest == s3_md5 312 | end 313 | 314 | def self.buffered_md5_checksum(file) 315 | local_md5 = Digest::MD5.new 316 | 317 | # buffer the checksum which should save RAM consumption 318 | File.open(file, "rb") do |fi| 319 | while buffer = fi.read(BLOCKSIZE_TO_READ) 320 | local_md5.update buffer 321 | end 322 | end 323 | local_md5 324 | end 325 | 326 | def self.verify_etag(etag, file) 327 | catalog.fetch(file, nil) == etag 328 | end 329 | 330 | def self.catalog_path 331 | File.join(Chef::Config[:file_cache_path], 's3_file_etags.json') 332 | end 333 | 334 | def self.catalog 335 | File.exist?(catalog_path) ? JSON.parse(IO.read(catalog_path)) : {} 336 | end 337 | 338 | def self.write_catalog(data) 339 | File.open(catalog_path, 'w', 0644) { |f| f.write(JSON.dump(data)) } 340 | end 341 | 342 | def self.client 343 | require 'rest-client' 344 | RestClient.proxy = ENV['http_proxy'] 345 | RestClient.proxy = ENV['https_proxy'] 346 | RestClient.proxy = ENV['no_proxy'] 347 | RestClient 348 | end 349 | end 350 | -------------------------------------------------------------------------------- /metadata.rb: -------------------------------------------------------------------------------- 1 | name "s3_file" 2 | maintainer "Brandon Adams" 3 | maintainer_email "brandon.adams@me.com" 4 | license "MIT" 5 | description "Installs/Configures s3_file LWRP" 6 | long_description IO.read(File.join(File.dirname(__FILE__), 'README.md')) 7 | version "2.9.0" 8 | 9 | source_url 'https://github.com/adamsb6/s3_file' if responds_to(:source_url) 10 | issues_url 'https://github.com/adamsb6/s3_file/issues' if responds_to(:issues_url) 11 | 12 | chef_version '>= 12.6' if responds_to(:chef_version) 13 | 14 | supports 'centos' 15 | supports 'redhat' 16 | supports 'windows' 17 | -------------------------------------------------------------------------------- /providers/default.rb: -------------------------------------------------------------------------------- 1 | require 'digest/md5' 2 | require 'json' 3 | require 'cgi' 4 | 5 | use_inline_resources 6 | 7 | action :create do 8 | @run_context.include_recipe 's3_file::dependencies' 9 | client = S3FileLib.client 10 | download = true 11 | 12 | # handle key specified without leading slash, and support URL encoding when necessary. 13 | remote_path = ::File.join('', new_resource.remote_path).split('/').map{|x| CGI.escape(x)}.join('/') 14 | 15 | # we need credentials to be mutable 16 | aws_access_key_id = new_resource.aws_access_key_id 17 | aws_secret_access_key = new_resource.aws_secret_access_key 18 | token = new_resource.token 19 | decryption_key = new_resource.decryption_key 20 | region = new_resource.aws_region 21 | 22 | Chef::Log.debug("credentials received [aws_access_key_id:%s, aws_secret_access_key:%s, token:%s]" % [!aws_access_key_id.nil?, !aws_secret_access_key.nil?, !token.nil?]) 23 | # if credentials not set, try instance profile 24 | if aws_access_key_id.nil? && aws_secret_access_key.nil? && token.nil? 25 | if new_resource.public_bucket 26 | aws_access_key_id = '' 27 | aws_secret_access_key = '' 28 | token = '' 29 | else 30 | get_token = Proc.new { client.put('http://169.254.169.254/latest/api/token/', nil, {:'X-aws-ec2-metadata-token-ttl-seconds' => '60'})&.body } 31 | 32 | instance_profile_base_url = 'http://169.254.169.254/latest/meta-data/iam/security-credentials/' 33 | begin 34 | instance_profiles = client.get(instance_profile_base_url, {:'X-aws-ec2-metadata-token' => get_token.call()}) 35 | rescue client::ResourceNotFound, Errno::ETIMEDOUT # set 404 on an EC2 instance 36 | raise ArgumentError.new 'No credentials provided and no instance profile on this machine.' 37 | end 38 | instance_profile_name = instance_profiles.split.first 39 | instance_profile = JSON.load(client.get(instance_profile_base_url + instance_profile_name), {:'X-aws-ec2-metadata-token' => get_token.call()}) 40 | 41 | aws_access_key_id = instance_profile['AccessKeyId'] 42 | aws_secret_access_key = instance_profile['SecretAccessKey'] 43 | token = instance_profile['Token'] 44 | 45 | # now try to auto-detect the region from the instance 46 | if region.nil? 47 | dynamic_doc_base_url = 'http://169.254.169.254/latest/dynamic/instance-identity/document' 48 | begin 49 | dynamic_doc = JSON.load(client.get(dynamic_doc_base_url, {:'X-aws-ec2-metadata-token' => get_token.call()})) 50 | region = dynamic_doc && dynamic_doc['region'] 51 | rescue Exception => e 52 | Chef::Log.debug "Unable to auto-detect region from instance-identity document: #{e.message}" 53 | end 54 | end 55 | end 56 | end 57 | 58 | if ::File.exists?(new_resource.path) 59 | s3_etag = S3FileLib::get_md5_from_s3(new_resource.bucket, new_resource.s3_url, remote_path, aws_access_key_id, aws_secret_access_key, token, public_bucket: new_resource.public_bucket) 60 | 61 | if decryption_key.nil? 62 | if new_resource.decrypted_file_checksum.nil? 63 | if S3FileLib::verify_md5_checksum(s3_etag, new_resource.path) 64 | Chef::Log.debug 'Skipping download, md5sum of local file matches file in S3.' 65 | download = false 66 | end 67 | #we have a decryption key so we must switch to the sha256 checksum 68 | else 69 | if S3FileLib::verify_sha256_checksum(new_resource.decrypted_file_checksum, new_resource.path) 70 | Chef::Log.debug 'Skipping download, sha256 of local file matches recipe.' 71 | download = false 72 | end 73 | end 74 | # since our resource is a decrypted file, we must use the 75 | # checksum provided by the resource to compare to the local file 76 | else 77 | unless new_resource.decrypted_file_checksum.nil? 78 | if S3FileLib::verify_sha256_checksum(new_resource.decrypted_file_checksum, new_resource.path) 79 | Chef::Log.debug 'Skipping download, sha256 of local file matches recipe.' 80 | download = false 81 | end 82 | end 83 | end 84 | 85 | # Don't download if content and etag match prior download 86 | if node['s3_file']['use_catalog'] 87 | catalog_data = S3FileLib::catalog.fetch(new_resource.path, nil) 88 | existing_file_md5 = S3FileLib::buffered_md5_checksum(new_resource.path) 89 | if catalog_data && existing_file_md5 == catalog_data['local_md5'] && s3_etag == catalog_data['etag'] 90 | Chef::Log.debug 'Skipping download, md5 of local file and etag matches prior download.' 91 | download = false 92 | end 93 | end 94 | end 95 | 96 | if download 97 | response = S3FileLib::get_from_s3(new_resource.bucket, new_resource.s3_url, remote_path, aws_access_key_id, aws_secret_access_key, token, region: region, verify_md5: new_resource.verify_md5, public_bucket: new_resource.public_bucket) 98 | 99 | # not simply using the file resource here because we would have to buffer 100 | # whole file into memory in order to set content this solves 101 | # https://github.com/adamsb6/s3_file/issues/15 102 | unless decryption_key.nil? 103 | begin 104 | decrypted_file = S3FileLib::aes256_decrypt(decryption_key,response.file.path) 105 | rescue OpenSSL::Cipher::CipherError => e 106 | 107 | Chef::Log.error("Error decrypting #{name}, is decryption key correct?") 108 | Chef::Log.error("Error message: #{e.message}") 109 | 110 | raise e 111 | end 112 | 113 | downloaded_file = decrypted_file 114 | else 115 | downloaded_file = response.file 116 | end 117 | 118 | # Write etag and md5 to catalog for future reference 119 | if node['s3_file']['use_catalog'] 120 | catalog = S3FileLib::catalog 121 | catalog[new_resource.path] = { 122 | 'etag' => response.headers[:etag].gsub('"',''), 123 | 'local_md5' => S3FileLib::buffered_md5_checksum(downloaded_file.path) 124 | } 125 | S3FileLib::write_catalog(catalog) 126 | end 127 | 128 | # Take ownership and permissions from existing object 129 | if ::File.exist?(new_resource.path) 130 | stat = ::File::Stat.new(new_resource.path) 131 | ::FileUtils.chown(stat.uid, stat.gid, downloaded_file) 132 | ::FileUtils.chmod(stat.mode, downloaded_file) 133 | end 134 | ::FileUtils.mv(downloaded_file.path, new_resource.path) 135 | end 136 | 137 | f = file new_resource.path do 138 | action :create 139 | owner new_resource.owner || ENV['USER'] 140 | group new_resource.group || ENV['USER'] 141 | mode new_resource.mode || '0644' 142 | end 143 | 144 | new_resource.updated_by_last_action(download || f.updated_by_last_action?) 145 | end 146 | -------------------------------------------------------------------------------- /recipes/default.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Cookbook Name:: s3_file 3 | # Recipe:: default 4 | # 5 | # Copyright 2011, YOUR_COMPANY_NAME 6 | # 7 | # All rights reserved - Do Not Redistribute 8 | # 9 | -------------------------------------------------------------------------------- /recipes/dependencies.rb: -------------------------------------------------------------------------------- 1 | chef_gem 'mime-types' do 2 | version node['s3_file']['mime-types']['version'] 3 | action :install 4 | compile_time true if Chef::Resource::ChefGem.method_defined?(:compile_time) 5 | end 6 | 7 | chef_gem 'rest-client' do 8 | version node['s3_file']['rest-client']['version'] 9 | action :install 10 | compile_time true if Chef::Resource::ChefGem.method_defined?(:compile_time) 11 | end 12 | -------------------------------------------------------------------------------- /resources/default.rb: -------------------------------------------------------------------------------- 1 | actions :create 2 | attribute :path, :kind_of => String, :name_attribute => true 3 | attribute :remote_path, :kind_of => String 4 | attribute :bucket, :kind_of => String 5 | attribute :aws_access_key_id, :kind_of => String, :default => nil 6 | attribute :aws_secret_access_key, :kind_of => String, :default => nil 7 | attribute :aws_region, :kind_of => String, :default => nil 8 | attribute :s3_url, :kind_of => String, :default => nil 9 | attribute :public_bucket, :kind_of => [TrueClass, FalseClass], :default => false 10 | attribute :token, :kind_of => String, :default => nil 11 | attribute :owner, :kind_of => [String, NilClass], :default => nil 12 | attribute :group, :kind_of => [String, NilClass], :default => nil 13 | attribute :mode, :kind_of => [String, Integer, NilClass], :default => nil 14 | attribute :decryption_key, :kind_of => String, :default => nil 15 | attribute :decrypted_file_checksum, :kind_of => String, :default => nil 16 | attribute :verify_md5, :kind_of => [TrueClass, FalseClass], :default => false 17 | 18 | def initialize(*args) 19 | super 20 | @action = :create 21 | end 22 | -------------------------------------------------------------------------------- /test/fixtures/cookbooks/s3_file_test/README.md: -------------------------------------------------------------------------------- 1 | #Description 2 | 3 | This cookbook defines acceptance tests for `s3_file`. It simple attempts to fetch a file from S3. 4 | 5 | ##Attributes 6 | 7 | - `node['s3_file_test']['bucket']` - The bucket where the test file resides 8 | - `node['s3_file_test']['region']` - The AWS region for the bucket 9 | - `node['s3_file_test']['file']` - The name of the test file 10 | - `node['s3_file_test']['access_key']` - The AWS access key which allows us to fetch our test S3 file 11 | - `node['s3_file_test']['secret_key']` - The AWS secret key which allows us to fetch our test S3 file 12 | 13 | -------------------------------------------------------------------------------- /test/fixtures/cookbooks/s3_file_test/metadata.rb: -------------------------------------------------------------------------------- 1 | name 's3_file_test' 2 | maintainer 'Brandon Adams' 3 | maintainer_email 'brandon.adams@me.com' 4 | license 'MIT' 5 | description 'Tests s3_file LWRP' 6 | long_description IO.read(File.join(File.dirname(__FILE__), 'README.md')) 7 | version '0.1.0' 8 | 9 | depends 's3_file' 10 | 11 | -------------------------------------------------------------------------------- /test/fixtures/cookbooks/s3_file_test/recipes/default.rb: -------------------------------------------------------------------------------- 1 | 2 | s3_file '/root/kitchen-test' do 3 | remote_path node['s3_file_test']['file'] 4 | bucket node['s3_file_test']['bucket'] 5 | aws_access_key_id node['s3_file_test']['access_key'] 6 | aws_secret_access_key node['s3_file_test']['secret_key'] 7 | mode 0600 8 | owner 'root' 9 | group 'root' 10 | end 11 | 12 | -------------------------------------------------------------------------------- /test/integration/default/serverspec/default_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe file('/root/kitchen-test') do 4 | it { should be_file } 5 | end 6 | -------------------------------------------------------------------------------- /test/integration/default/serverspec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | require 'serverspec' 2 | 3 | set :backend, :exec 4 | --------------------------------------------------------------------------------