├── public └── robots.txt ├── tmp └── .gitignore ├── config.ru ├── CONTRIBUTING.md ├── Gemfile ├── LICENSE.txt ├── Gemfile.lock ├── README.md └── app.rb /public/robots.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tmp/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /config.ru: -------------------------------------------------------------------------------- 1 | require File.join(File.expand_path(File.dirname(__FILE__)), 'app.rb') 2 | 3 | run Sinatra::Application 4 | 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | By submitting code to this project, you agree to irrevocably release it under the same license as this project. See LICENSE.txt for license information. 2 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org/' 2 | 3 | gem 'sinatra' 4 | gem 'json' 5 | gem 'httparty' 6 | gem 's3' 7 | gem 'rmagick' 8 | 9 | group :development do 10 | gem 'shotgun', :require => nil 11 | gem 'thin', :require => nil 12 | gem 'puma', :require => nil 13 | end 14 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2016 by Aaron Parecki 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: https://rubygems.org/ 3 | specs: 4 | daemons (1.2.4) 5 | eventmachine (1.2.1) 6 | httparty (0.14.0) 7 | multi_xml (>= 0.5.2) 8 | json (2.0.2) 9 | multi_xml (0.6.0) 10 | proxies (0.2.1) 11 | puma (3.6.2) 12 | rack (1.6.5) 13 | rack-protection (1.5.3) 14 | rack 15 | rmagick (2.16.0) 16 | s3 (0.3.25) 17 | proxies (~> 0.2.0) 18 | shotgun (0.9.2) 19 | rack (>= 1.0) 20 | sinatra (1.4.7) 21 | rack (~> 1.5) 22 | rack-protection (~> 1.4) 23 | tilt (>= 1.3, < 3) 24 | thin (1.7.0) 25 | daemons (~> 1.0, >= 1.0.9) 26 | eventmachine (~> 1.0, >= 1.0.4) 27 | rack (>= 1, < 3) 28 | tilt (2.0.5) 29 | 30 | PLATFORMS 31 | ruby 32 | 33 | DEPENDENCIES 34 | httparty 35 | json 36 | puma 37 | rmagick 38 | s3 39 | shotgun 40 | sinatra 41 | thin 42 | 43 | BUNDLED WITH 44 | 1.11.2 45 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ca3db 2 | ===== 3 | 4 | ca3db is a "content-addressable avatar archive", intended for permanently storing 5 | multiple versions of user avatars found on social networks and websites. 6 | 7 | ## API 8 | 9 | ca3db exposes an HTTP API for archiving images. Once deployed, there is just one API endpoint which accepts the following parameters as a JSON payload: 10 | 11 | `POST /archive` 12 | 13 | * `url`: The URL of the image to store 14 | * `bucket`: The name of the S3 bucket to store the image in 15 | * `region`: The Amazon region that the S3 bucket lives in 16 | * `key_id`: An Amazon API ID that has write access to the S3 bucket 17 | * `secret_key`: The secret key for the above ID 18 | * `max_height` (optional): If specified, the image will be resized to this maximum height if larger 19 | 20 | The service will fetch the URL, and store the image in the S3 bucket. The filename is a 21 | hash of the file contents, so calling this multiple times with the same image will not 22 | store duplicate photos. Similarly, if the same URL gets replaced with a different image, 23 | both images will be stored at different URLs. 24 | 25 | The response will be a JSON payload with the URL of the archived image, e.g.: 26 | 27 | ```json 28 | { 29 | "url":"https://s3-us-west-2.amazonaws.com/ca3db/kylewm.com/dcffbb0712bbccc3ed94fc0f0c873ce8fde83d0cc3474fff93109042c378e2f4.jpeg" 30 | } 31 | ``` 32 | -------------------------------------------------------------------------------- /app.rb: -------------------------------------------------------------------------------- 1 | Encoding.default_internal = 'UTF-8' 2 | require 'rubygems' 3 | require 'bundler/setup' 4 | Bundler.require 5 | require 'digest' 6 | 7 | def build_s3_key(url, hash) 8 | host = URI.parse(url).host 9 | return "#{host}/#{hash}" 10 | end 11 | 12 | def build_public_url(region, bucket, key, content_type) 13 | return "https://s3-#{region}.amazonaws.com/#{bucket}/#{key}.#{content_type}" 14 | end 15 | 16 | post '/archive' do 17 | content_type :json 18 | 19 | if request.content_type.start_with? "application/json" 20 | begin 21 | payload = JSON.parse(request.env["rack.input"].read) 22 | rescue 23 | return {error: "Error parsing request. Ensure you send a JSON or form-encoded payload"}.to_json 24 | end 25 | else 26 | payload = params 27 | end 28 | 29 | # Check required parameters 30 | required = [ 31 | 'key_id', 32 | 'secret_key', 33 | 'region', 34 | 'bucket', 35 | 'url' 36 | ] 37 | required.each do |field| 38 | if !payload[field] 39 | return {error: "Missing field: #{field}"}.to_json 40 | end 41 | end 42 | 43 | image_url = payload['url'] 44 | 45 | # Fetch the image 46 | response = HTTParty.get image_url, follow_redirects: true 47 | 48 | if response.code != 200 49 | return {error: "URL return invalid status code: #{response.code}"}.to_json 50 | end 51 | 52 | # Check content type 53 | # Either Content-Type or Content-Disposition headers are checked 54 | extension = false 55 | if c=response.headers['content-type'] 56 | if m=c.match(/image\/(png|jpg|jpeg|gif|ico|svg)/) 57 | extension = m[1] 58 | end 59 | end 60 | 61 | if extension == false && (c=response.headers['content-disposition']) 62 | if m=c.match(/filename=.+\.(png|jpg|jpeg|gif|ico|svg)/) 63 | extension = m[1] 64 | end 65 | end 66 | 67 | if !extension 68 | return {error: "Input was not a recognized image type"}.to_json 69 | end 70 | 71 | extension = 'jpg' if extension == 'jpeg' 72 | 73 | # Calculate hash of image contents and check if it already exists in the bucket 74 | hash = Digest::SHA256.hexdigest response.body 75 | 76 | key = build_s3_key image_url, hash 77 | # puts "#{key}.#{extension}" 78 | 79 | s3 = S3::Service.new(access_key_id: payload['key_id'], secret_access_key: payload['secret_key']) 80 | bucket = s3.buckets.find(payload['bucket']) 81 | 82 | begin 83 | object = bucket.objects.find "#{key}.meta" 84 | # Look up the type in the meta file 85 | if m=object.content.match(/type: (.+)/) 86 | ext = m[1] 87 | else 88 | ext = extension 89 | end 90 | 91 | public_url = build_public_url payload['region'], payload['bucket'], key, ext 92 | # If no exception is thrown, the object already exists so return it now 93 | return {url: public_url, new: false}.to_json 94 | rescue 95 | 96 | public_url = build_public_url payload['region'], payload['bucket'], key, extension 97 | # puts public_url 98 | 99 | image_data = response.body 100 | 101 | # Resize the image if requested 102 | if ['png','jpg','gif'].include?(extension) && payload['max_height'] 103 | img = Magick::Image.from_blob(response.body).first 104 | img.change_geometry!("x#{payload['max_height']}>") { |cols, rows, img| 105 | img.resize! cols, rows 106 | } 107 | image_data = img.to_blob 108 | end 109 | 110 | # Store the object and metadata in s3 now 111 | metadata = "date: #{Time.now.strftime('%Y-%m-%dT%H:%M:%S')}\n"\ 112 | "type: #{extension}\n"\ 113 | "url: #{image_url}\n" 114 | metadata_obj = bucket.objects.build("#{key}.meta") 115 | metadata_obj.content = metadata 116 | metadata_obj.content_type = 'text/plain' 117 | metadata_obj.acl = :public_read 118 | metadata_obj.save 119 | 120 | image_obj = bucket.objects.build("#{key}.#{extension}") 121 | image_obj.content = image_data 122 | image_obj.content_type = "image/#{extension == 'jpg' ? 'jpeg' : extension}" 123 | image_obj.acl = :public_read 124 | image_obj.save 125 | 126 | return {url: public_url, new: true}.to_json 127 | end 128 | 129 | end 130 | --------------------------------------------------------------------------------