├── .env ├── .gitignore ├── LICENSE ├── README.md ├── index.js ├── optimizer.js └── package.json /.env: -------------------------------------------------------------------------------- 1 | AWS_ACCESS_KEY_ID=changeme 2 | AWS_SECRET_ACCESS_KEY=changeme 3 | AWS_ROLE_ARN=changeme 4 | 5 | AWS_REGION=us-west-2 6 | AWS_ENVIRONMENT=production 7 | AWS_FUNCTION_NAME=optimizer 8 | AWS_HANDLER=index.optimizer 9 | 10 | AWS_MEMORY_SIZE=1536 11 | AWS_TIMEOUT=300 12 | 13 | AWS_RUNTIME=nodejs4.3 14 | 15 | SOURCE_BUCKET=bluethumb-art-uploads 16 | UPLOAD_BUCKET=bluethumb-art-uploads-optim 17 | UPLOAD_ACL=public-read 18 | PNG_OPTIM_LEVEL=7 19 | GIF_OPTIM_INTERLACED=true 20 | JPG_OPTIM_PROGRESSIVE=true 21 | MAX_FILE_SIZE=-1 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | *.log 3 | .marker 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 GoSquared 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Optim 2 | 3 | Automagitically optimize your images on S3 with the magic of AWS Lambda. 4 | 5 | Optim is a super-simple [Lambda][l] function that can listen to an S3 bucket for uploads, and runs everything it can through [imagemin][imagemin]. 6 | 7 | 8 | ## Setup 9 | 10 | * Clone this repo 11 | 12 | * Run `npm install` 13 | 14 | * Fill in `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` in `.env` to a set of credentials that can create Lambda functions (alternatively have these already in your environment) 15 | 16 | * Create an IAM role for Optim to use. It needs the following permissions on all the S3 buckets you want to use (allowing these operations on ARN `*` is easiest to start with): 17 | * `getObject` 18 | * `putObject` 19 | * `putObjectAcl` 20 | 21 | 22 | * Find the ARN for this role. It looks something like `arn:aws:iam::1234567890:role/rolename`. 23 | 24 | * Fill in `AWS_ROLE_ARN` in `.env` 25 | 26 | * Run `npm run deploy` 27 | 28 | * Hurrah, your Lambda function is now deployed! It'll be created with the name `optim-production` unless you changed values in `.env` 29 | 30 | * You can now hook this function up to any S3 bucket you like in the management console. Easiest way is to follow [AWS's guide][s3-evt-setup] 31 | 32 | 33 | ## Configuration 34 | 35 | There are two sets of configuration here. The `.env` file contains configuration related to setup and deployment, and `runtime.env` is for configuration of how Optim behaves. 36 | 37 | In `.env`: 38 | 39 | * `AWS_ACCESS_KEY_ID`: the AWS access key used to deploy the Lambda function 40 | * `AWS_SECRET_ACCESS_KEY`: the corresponding secret access key 41 | * `AWS_ROLE_ARN`: role with which the lambda function will be executed 42 | * `AWS_REGION`: which region to deploy to 43 | * `AWS_FUNCTION_NAME` and `AWS_ENVIRONMENT` control naming of the lambda function created 44 | * `AWS_MEMORY_SIZE` is the amount of memory given to your Lambda. It's also related to how much CPU share it gets. Since optimizing images is fairly intensive, probably best to keep this high 45 | * `AWS_TIMEOUT` runtime timeout for the lambda in seconds up to 5 minutes. Again, image optimization is fairly intensive so you'll probably want to leave this at the maximum of 300. 46 | 47 | In `runtime.env`: 48 | 49 | * `UPLOAD_ACL`: finalised images will be uploaded with this permission level. Should be one of `private` `public-read` `public-read-write` `aws-exec-read` `authenticated-read` `bucket-owner-read` `bucket-owner-full-control`. Default is `public-read`. 50 | * `MAX_FILE_SIZE`: files over this size in bytes will be skipped (e.g. big PNGs will probably just hit the timeout anyway). Set to `-1` for no limit 51 | * `PNG_OPTIM_LEVEL`: Optimization level to use for PNGs, between 0 and 7. Lower level means faster optimization, higher means better results. 52 | 53 | 54 | [l]: https://aws.amazon.com/lambda/ 55 | [imagemin]: https://github.com/imagemin/imagemin 56 | [s3-evt-setup]: http://docs.aws.amazon.com/AmazonS3/latest/UG/SettingBucketNotifications.html 57 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | module.exports = require('./optimizer'); 2 | -------------------------------------------------------------------------------- /optimizer.js: -------------------------------------------------------------------------------- 1 | var aws = require('aws-sdk'), 2 | s3 = new aws.S3(), //{ apiVersion: '2006-03-01' } 3 | Imagemin = require('imagemin'), 4 | async = require('async'), 5 | fs = require('fs'), 6 | env = require('dotenv'), 7 | _ = require('underscore'); 8 | 9 | // Load environment variables if not already loaded 10 | if (!process.env.AWS_ACCESS_KEY_ID) { 11 | require('dotenv').load(); 12 | console.log(process.env); 13 | } 14 | 15 | var SOURCE_BUCKET = process.env.SOURCE_BUCKET; 16 | var UPLOAD_BUCKET = process.env.UPLOAD_BUCKET; 17 | var UPLOAD_ACL = process.env.UPLOAD_ACL || 'public-read'; 18 | var SKIP_FILE_SIZE = +process.env.MAX_FILE_SIZE || -1; 19 | 20 | // Imagemin options object for all image types 21 | var imageminOptions = { 22 | optimizationLevel: (+process.env.PNG_OPTIM_LEVEL || 7), 23 | progressive: (process.env.JPG_OPTIM_PROGRESSIVE == 'true'), 24 | interlaced: (process.env.GIF_OPTIM_INTERLACED == 'true') 25 | }; 26 | 27 | // How many keys to retrieve with a single request to the S3 API. 28 | // Larger key sets require paging and multiple calls. 29 | var maxKeys = 100; // number os keys per query 30 | var processedLog = './processed.log'; // file containing all precessed files 31 | var skippedLog = './skipped.log'; // file containing all skipped files 32 | var markerFile = './.marker'; // file containing current file marker 33 | 34 | // Array of S3 keys to process 35 | var keys = []; 36 | 37 | // State flags 38 | var isLoadingData = false; 39 | var isComplete = false; 40 | 41 | // Optimize a single image from it's AWS key 42 | function processOne(key, callback) { 43 | console.log('Processing', key); 44 | 45 | async.waterfall([ 46 | function check(next) { 47 | s3.headObject({ Bucket: SOURCE_BUCKET, Key: key }, function(err, data) { 48 | if (err) return next(err); 49 | 50 | if (data.Metadata && data.Metadata.optimized) { 51 | console.log('Image is already optimized. Skipping.'); 52 | return next('skip'); 53 | } 54 | 55 | if (!isImageFile(key)) { 56 | console.log('File is not an image type. Skipping.'); 57 | return next('skip'); 58 | } 59 | 60 | if (!data.ContentLength) { 61 | console.log('Image is empty. Skipping.'); 62 | return next('skip'); 63 | } 64 | 65 | // console.log('File size is ' + data.ContentLength + ' bytes'); 66 | if (SKIP_FILE_SIZE !== -1 && data.ContentLength > SKIP_FILE_SIZE) { 67 | console.log('Image is larger than configured threshold. Skipping.'); 68 | return next('skip'); 69 | } 70 | 71 | next(null, data); 72 | }); 73 | }, 74 | 75 | function download(meta, next) { 76 | s3.getObject({ Bucket: SOURCE_BUCKET, Key: key }, function(err, data) { 77 | if (err) return next(err); 78 | next(null, meta, data); 79 | }); 80 | }, 81 | 82 | function process(meta, obj, next) { 83 | new Imagemin() 84 | .src(obj.Body) 85 | .use(Imagemin.jpegtran(imageminOptions)) 86 | .use(Imagemin.gifsicle(imageminOptions)) 87 | .use(Imagemin.optipng(imageminOptions)) 88 | .use(Imagemin.svgo({plugins: imageminOptions.svgoPlugins || []})) 89 | .run(function(err, files) { 90 | if(err) return next(err); 91 | console.log('Optimized! Final file size reduced from ' + obj.Body.length + ' to ' + files[0].contents.length + ' bytes'); 92 | next(null, meta, obj, files[0]) 93 | }); 94 | }, 95 | 96 | function upload(meta, obj, file, next) { 97 | meta.Metadata.optimized = 'y'; 98 | 99 | s3.putObject({ 100 | ACL: UPLOAD_ACL, 101 | Bucket: UPLOAD_BUCKET || SOURCE_BUCKET, 102 | Key: key, 103 | Body: file.contents, 104 | ContentType: obj.ContentType, 105 | Metadata: meta.Metadata 106 | }, function(err) { 107 | if(err) return next(err); 108 | 109 | console.log('File uploaded', key); 110 | onImageOptimized(key); 111 | next(); 112 | }); 113 | } 114 | ], function(err) { 115 | if (err === 'skip') { 116 | fs.appendFileSync(skippedLog, key + '\n'); // add to skipped files log 117 | updateMarkerFile(key); 118 | err = null; 119 | } 120 | callback(err); 121 | }); 122 | } 123 | 124 | function loadLastMarker() { 125 | if (!fs.existsSync(markerFile)) 126 | return null; 127 | return fs.readFileSync(markerFile).toString(); 128 | } 129 | 130 | // Recursive function to be called until there are no files left to optimize. 131 | function processNext() { 132 | 133 | // Do nothing if complete 134 | if (isComplete) 135 | return false; 136 | 137 | if (keys.length == 0 && isLoadingData) 138 | return false; 139 | 140 | // If there are no keys left and not loading then load some more 141 | if (keys.length == 0) { 142 | if (!isLoadingData) { 143 | isLoadingData = true; 144 | console.log('Listing more keys:', loadLastMarker()); 145 | listKeyPage({ 146 | bucket: SOURCE_BUCKET, 147 | marker: loadLastMarker() 148 | //prefix: 'myKey/' 149 | }, 150 | function (error, nextMarker, keyset) { 151 | if (error) throw error; 152 | 153 | isLoadingData = false; 154 | 155 | // Update list of keys to process 156 | keys = keys.concat(keyset); 157 | 158 | // NOTE: Don't set the last marker here. 159 | // Since the S3 key is the marker we set is after 160 | // the last image has been optimized. 161 | 162 | if (keys.length > 0) { 163 | processNext(); 164 | // processTasks(); 165 | } else { 166 | onComplete(); 167 | } 168 | }); 169 | } 170 | return false; 171 | } 172 | 173 | // Process the next key in the queue 174 | key = keys.shift(); 175 | // numTasks++; 176 | processOne(key, function() { 177 | // numTasks--; 178 | processNext(); 179 | // processTasks(); 180 | }); 181 | return true; 182 | } 183 | 184 | function onImageOptimized(key) { 185 | updateMarkerFile(key); 186 | fs.appendFileSync(processedLog, key + '\n'); // add to processed files log 187 | } 188 | 189 | function updateMarkerFile(key) { 190 | fs.writeFileSync(markerFile, key); // update the current market 191 | } 192 | 193 | function onComplete() { 194 | isComplete = true; 195 | console.log('Optimization complete!'); 196 | } 197 | 198 | /** 199 | * List one page of a set of keys from the specified bucket. 200 | * 201 | * If providing a prefix, only keys matching the prefix will be returned. 202 | * 203 | * If providing a delimiter, then a set of distinct path segments will be 204 | * returned from the keys to be listed. This is a way of listing "folders" 205 | * present given the keys that are there. 206 | * 207 | * If providing a marker, list a page of keys starting from the marker 208 | * position. Otherwise return the first page of keys. 209 | * 210 | * @param {Object} options 211 | * @param {String} options.bucket - The bucket name. 212 | * @param {String} [options.prefix] - If set only return keys beginning with 213 | * the prefix value. 214 | * @param {String} [options.delimiter] - If set return a list of distinct 215 | * folders based on splitting keys by the delimiter. 216 | * @param {String} [options.marker] - If set the list only a paged set of keys 217 | * starting from the marker. 218 | * @param {Function} callback - Callback of the form 219 | function (error, nextMarker, keys). 220 | */ 221 | function listKeyPage(options, callback) { 222 | var params = { 223 | Bucket : options.bucket, 224 | Delimiter: options.delimiter, 225 | Marker : options.marker, 226 | MaxKeys : maxKeys, 227 | Prefix : options.prefix 228 | }; 229 | 230 | s3.listObjects(params, function (error, response) { 231 | if (error) { 232 | return callback(error); 233 | } else if (response.err) { 234 | return callback(new Error(response.err)); 235 | } 236 | 237 | // Convert the results into an array of key strings, or 238 | // common prefixes if we're using a delimiter. 239 | var keys; 240 | if (options.delimiter) { 241 | // Note that if you set MaxKeys to 1 you can see some interesting 242 | // behavior in which the first response has no response.CommonPrefix 243 | // values, and so we have to skip over that and move on to the 244 | // next page. 245 | keys = _.map(response.CommonPrefixes, function (item) { 246 | return item.Prefix; 247 | }); 248 | } else { 249 | keys = _.map(response.Contents, function (item) { 250 | return item.Key; 251 | }); 252 | } 253 | 254 | // Check to see if there are yet more keys to be obtained, and if so 255 | // return the marker for use in the next request. 256 | var nextMarker; 257 | if (response.IsTruncated) { 258 | if (options.delimiter) { 259 | // If specifying a delimiter, the response.NextMarker field exists. 260 | nextMarker = response.NextMarker; 261 | } else { 262 | // For normal listing, there is no response.NextMarker 263 | // and we must use the last key instead. 264 | nextMarker = keys[keys.length - 1]; 265 | } 266 | } 267 | 268 | callback(null, nextMarker, keys); 269 | }); 270 | } 271 | 272 | // Infer the image type. 273 | function isImageFile(key) { 274 | var extMatch = key.match(/\.([^.]*)$/); 275 | if (!extMatch) { 276 | console.error('Unable to infer image type for key ' + key); 277 | return false; 278 | } 279 | var ext = extMatch[1].toLowerCase(); 280 | if (ext != "jpg" && ext != "jpeg" && ext != "gif" && ext != "png" && ext != "svg") { 281 | // console.log('skipping non-image ' + key); 282 | return false; 283 | } 284 | return true; 285 | } 286 | 287 | module.exports.optimizer = processNext(); 288 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "s3-image-optimizer", 3 | "version": "0.1.0", 4 | "description": "Optimise images on S3", 5 | "main": "index.js", 6 | "repository": { 7 | "type": "git", 8 | "url": "git://github.com/sourcey/s3-image-optimizer.git" 9 | }, 10 | "license": "MIT", 11 | "author": "Kam Low (http://sourcey.com/)", 12 | "scripts": { 13 | "deploy": "node-lambda deploy" 14 | }, 15 | "dependencies": { 16 | "async": "^1.5.0", 17 | "dotenv": "^1.2.0", 18 | "imagemin": "^3.2.0", 19 | "imagemin-gifsicle": "^4.0.0", 20 | "imagemin-jpegtran": "^4.0.0", 21 | "imagemin-optipng": "^4.3.0", 22 | "imagemin-svgo": "^4.0.0", 23 | "underscore": "^1.8.3" 24 | }, 25 | "devDependencies": { 26 | "node-lambda": "^0.7.1" 27 | } 28 | } 29 | --------------------------------------------------------------------------------