├── .env
├── .gitignore
├── LICENSE
├── README.md
├── index.js
├── optimizer.js
└── package.json


/.env:
--------------------------------------------------------------------------------
 1 | AWS_ACCESS_KEY_ID=changeme
 2 | AWS_SECRET_ACCESS_KEY=changeme
 3 | AWS_ROLE_ARN=changeme
 4 | 
 5 | AWS_REGION=us-west-2
 6 | AWS_ENVIRONMENT=production
 7 | AWS_FUNCTION_NAME=optimizer
 8 | AWS_HANDLER=index.optimizer
 9 | 
10 | AWS_MEMORY_SIZE=1536
11 | AWS_TIMEOUT=300
12 | 
13 | AWS_RUNTIME=nodejs4.3
14 | 
15 | SOURCE_BUCKET=bluethumb-art-uploads
16 | UPLOAD_BUCKET=bluethumb-art-uploads-optim
17 | UPLOAD_ACL=public-read
18 | PNG_OPTIM_LEVEL=7
19 | GIF_OPTIM_INTERLACED=true
20 | JPG_OPTIM_PROGRESSIVE=true
21 | MAX_FILE_SIZE=-1
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | *.log
3 | .marker
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 GoSquared
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Optim
 2 | 
 3 | Automagitically optimize your images on S3 with the magic of AWS Lambda.
 4 | 
 5 | Optim is a super-simple [Lambda][l] function that can listen to an S3 bucket for uploads, and runs everything it can through [imagemin][imagemin].
 6 | 
 7 | 
 8 | ## Setup
 9 | 
10 |  * Clone this repo
11 | 
12 |  * Run `npm install`
13 | 
14 |  * Fill in `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` in `.env` to a set of credentials that can create Lambda functions (alternatively have these already in your environment)
15 | 
16 |  * Create an IAM role for Optim to use. It needs the following permissions on all the S3 buckets you want to use (allowing these operations on ARN `*` is easiest to start with):
17 |    * `getObject`
18 |    * `putObject`
19 |    * `putObjectAcl`
20 | 
21 | 
22 |  * Find the ARN for this role. It looks something like `arn:aws:iam::1234567890:role/rolename`.
23 | 
24 |  * Fill in `AWS_ROLE_ARN` in `.env`
25 | 
26 |  * Run `npm run deploy`
27 | 
28 |  * Hurrah, your Lambda function is now deployed! It'll be created with the name `optim-production` unless you changed values in `.env`
29 | 
30 |  * You can now hook this function up to any S3 bucket you like in the management console. Easiest way is to follow [AWS's guide][s3-evt-setup]
31 | 
32 | 
33 | ## Configuration
34 | 
35 | There are two sets of configuration here. The `.env` file contains configuration related to setup and deployment, and `runtime.env` is for configuration of how Optim behaves.
36 | 
37 | In `.env`:
38 | 
39 |  * `AWS_ACCESS_KEY_ID`: the AWS access key used to deploy the Lambda function
40 |  * `AWS_SECRET_ACCESS_KEY`: the corresponding secret access key
41 |  * `AWS_ROLE_ARN`: role with which the lambda function will be executed
42 |  * `AWS_REGION`: which region to deploy to
43 |  * `AWS_FUNCTION_NAME` and `AWS_ENVIRONMENT` control naming of the lambda function created
44 |  * `AWS_MEMORY_SIZE` is the amount of memory given to your Lambda. It's also related to how much CPU share it gets. Since optimizing images is fairly intensive, probably best to keep this high
45 |  * `AWS_TIMEOUT` runtime timeout for the lambda in seconds up to 5 minutes. Again, image optimization is fairly intensive so you'll probably want to leave this at the maximum of 300.
46 | 
47 | In `runtime.env`:
48 | 
49 |  * `UPLOAD_ACL`: finalised images will be uploaded with this permission level. Should be one of `private` `public-read` `public-read-write` `aws-exec-read` `authenticated-read` `bucket-owner-read` `bucket-owner-full-control`. Default is `public-read`.
50 |  * `MAX_FILE_SIZE`: files over this size in bytes will be skipped (e.g. big PNGs will probably just hit the timeout anyway). Set to `-1` for no limit
51 |  * `PNG_OPTIM_LEVEL`: Optimization level to use for PNGs, between 0 and 7. Lower level means faster optimization, higher means better results.
52 | 
53 | 
54 | [l]: https://aws.amazon.com/lambda/
55 | [imagemin]: https://github.com/imagemin/imagemin
56 | [s3-evt-setup]: http://docs.aws.amazon.com/AmazonS3/latest/UG/SettingBucketNotifications.html
57 | 


--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
1 | module.exports = require('./optimizer');
2 | 


--------------------------------------------------------------------------------
/optimizer.js:
--------------------------------------------------------------------------------
  1 | var aws = require('aws-sdk'),
  2 |   s3 = new aws.S3(), //{ apiVersion: '2006-03-01' }
  3 |   Imagemin = require('imagemin'),
  4 |   async = require('async'),
  5 |   fs = require('fs'),
  6 |   env = require('dotenv'),
  7 |   _ = require('underscore');
  8 | 
  9 | // Load environment variables if not already loaded
 10 | if (!process.env.AWS_ACCESS_KEY_ID) {
 11 |   require('dotenv').load();
 12 |   console.log(process.env);
 13 | }
 14 | 
 15 | var SOURCE_BUCKET = process.env.SOURCE_BUCKET;
 16 | var UPLOAD_BUCKET = process.env.UPLOAD_BUCKET;
 17 | var UPLOAD_ACL = process.env.UPLOAD_ACL || 'public-read';
 18 | var SKIP_FILE_SIZE = +process.env.MAX_FILE_SIZE || -1;
 19 | 
 20 | // Imagemin options object for all image types
 21 | var imageminOptions = {
 22 |   optimizationLevel: (+process.env.PNG_OPTIM_LEVEL || 7),
 23 |   progressive: (process.env.JPG_OPTIM_PROGRESSIVE == 'true'),
 24 |   interlaced: (process.env.GIF_OPTIM_INTERLACED == 'true')
 25 | };
 26 | 
 27 | // How many keys to retrieve with a single request to the S3 API.
 28 | // Larger key sets require paging and multiple calls.
 29 | var maxKeys = 100; // number os keys per query
 30 | var processedLog = './processed.log'; // file containing all precessed files
 31 | var skippedLog = './skipped.log'; // file containing all skipped files
 32 | var markerFile = './.marker'; // file containing current file marker
 33 | 
 34 | // Array of S3 keys to process
 35 | var keys = [];
 36 | 
 37 | // State flags
 38 | var isLoadingData = false;
 39 | var isComplete = false;
 40 | 
 41 | // Optimize a single image from it's AWS key
 42 | function processOne(key, callback) {
 43 |   console.log('Processing', key);
 44 | 
 45 |   async.waterfall([
 46 |     function check(next) {
 47 |       s3.headObject({ Bucket: SOURCE_BUCKET, Key: key }, function(err, data) {
 48 |         if (err) return next(err);
 49 | 
 50 |         if (data.Metadata && data.Metadata.optimized) {
 51 |           console.log('Image is already optimized. Skipping.');
 52 |           return next('skip');
 53 |         }
 54 | 
 55 |         if (!isImageFile(key)) {
 56 |             console.log('File is not an image type. Skipping.');
 57 |             return next('skip');
 58 |         }
 59 | 
 60 |         if (!data.ContentLength) {
 61 |             console.log('Image is empty. Skipping.');
 62 |             return next('skip');
 63 |         }
 64 | 
 65 |         // console.log('File size is ' + data.ContentLength + ' bytes');
 66 |         if (SKIP_FILE_SIZE !== -1 && data.ContentLength > SKIP_FILE_SIZE) {
 67 |           console.log('Image is larger than configured threshold. Skipping.');
 68 |           return next('skip');
 69 |         }
 70 | 
 71 |         next(null, data);
 72 |       });
 73 |     },
 74 | 
 75 |     function download(meta, next) {
 76 |       s3.getObject({ Bucket: SOURCE_BUCKET, Key: key }, function(err, data) {
 77 |         if (err) return next(err);
 78 |         next(null, meta, data);
 79 |       });
 80 |     },
 81 | 
 82 |     function process(meta, obj, next) {
 83 |       new Imagemin()
 84 |         .src(obj.Body)
 85 |         .use(Imagemin.jpegtran(imageminOptions))
 86 |         .use(Imagemin.gifsicle(imageminOptions))
 87 |         .use(Imagemin.optipng(imageminOptions))
 88 |         .use(Imagemin.svgo({plugins: imageminOptions.svgoPlugins || []}))
 89 |         .run(function(err, files) {
 90 |           if(err) return next(err);
 91 |           console.log('Optimized! Final file size reduced from ' + obj.Body.length + ' to ' + files[0].contents.length + ' bytes');
 92 |           next(null, meta, obj, files[0])
 93 |         });
 94 |     },
 95 | 
 96 |     function upload(meta, obj, file, next) {
 97 |       meta.Metadata.optimized = 'y';
 98 | 
 99 |       s3.putObject({
100 |         ACL: UPLOAD_ACL,
101 |         Bucket: UPLOAD_BUCKET || SOURCE_BUCKET,
102 |         Key: key,
103 |         Body: file.contents,
104 |         ContentType: obj.ContentType,
105 |         Metadata: meta.Metadata
106 |       }, function(err) {
107 |         if(err) return next(err);
108 | 
109 |         console.log('File uploaded', key);
110 |         onImageOptimized(key);
111 |         next();
112 |       });
113 |     }
114 |   ], function(err) {
115 |     if (err === 'skip') {
116 |       fs.appendFileSync(skippedLog, key + '\n'); // add to skipped files log
117 |       updateMarkerFile(key);
118 |       err = null;
119 |     }
120 |     callback(err);
121 |   });
122 | }
123 | 
124 | function loadLastMarker() {
125 |   if (!fs.existsSync(markerFile))
126 |     return null;
127 |   return fs.readFileSync(markerFile).toString();
128 | }
129 | 
130 | // Recursive function to be called until there are no files left to optimize.
131 | function processNext() {
132 | 
133 |   // Do nothing if complete
134 |   if (isComplete)
135 |     return false;
136 | 
137 |   if (keys.length == 0 && isLoadingData)
138 |     return false;
139 | 
140 |   // If there are no keys left and not loading then load some more
141 |   if (keys.length == 0) {
142 |     if (!isLoadingData) {
143 |       isLoadingData = true;
144 |       console.log('Listing more keys:', loadLastMarker());
145 |       listKeyPage({
146 |         bucket: SOURCE_BUCKET,
147 |         marker: loadLastMarker()
148 |         //prefix: 'myKey/'
149 |       },
150 |       function (error, nextMarker, keyset) {
151 |         if (error) throw error;
152 | 
153 |         isLoadingData = false;
154 | 
155 |         // Update list of keys to process
156 |         keys = keys.concat(keyset);
157 | 
158 |         // NOTE: Don't set the last marker here.
159 |         // Since the S3 key is the marker we set is after
160 |         // the last image has been optimized.
161 | 
162 |         if (keys.length > 0) {
163 |           processNext();
164 |           // processTasks();
165 |         } else {
166 |           onComplete();
167 |         }
168 |       });
169 |     }
170 |     return false;
171 |   }
172 | 
173 |   // Process the next key in the queue
174 |   key = keys.shift();
175 |   // numTasks++;
176 |   processOne(key, function() {
177 |     // numTasks--;
178 |     processNext();
179 |     // processTasks();
180 |   });
181 |   return true;
182 | }
183 | 
184 | function onImageOptimized(key) {
185 |   updateMarkerFile(key);
186 |   fs.appendFileSync(processedLog, key + '\n'); // add to processed files log
187 | }
188 | 
189 | function updateMarkerFile(key) {
190 |   fs.writeFileSync(markerFile, key); // update the current market
191 | }
192 | 
193 | function onComplete() {
194 |   isComplete = true;
195 |   console.log('Optimization complete!');
196 | }
197 | 
198 | /**
199 |  * List one page of a set of keys from the specified bucket.
200 |  *
201 |  * If providing a prefix, only keys matching the prefix will be returned.
202 |  *
203 |  * If providing a delimiter, then a set of distinct path segments will be
204 |  * returned from the keys to be listed. This is a way of listing "folders"
205 |  * present given the keys that are there.
206 |  *
207 |  * If providing a marker, list a page of keys starting from the marker
208 |  * position. Otherwise return the first page of keys.
209 |  *
210 |  * @param {Object} options
211 |  * @param {String} options.bucket - The bucket name.
212 |  * @param {String} [options.prefix] - If set only return keys beginning with
213 |  *   the prefix value.
214 |  * @param {String} [options.delimiter] - If set return a list of distinct
215 |  *   folders based on splitting keys by the delimiter.
216 |  * @param {String} [options.marker] - If set the list only a paged set of keys
217 |  *   starting from the marker.
218 |  * @param {Function} callback - Callback of the form
219 |     function (error, nextMarker, keys).
220 |  */
221 | function listKeyPage(options, callback) {
222 |   var params = {
223 |     Bucket : options.bucket,
224 |     Delimiter: options.delimiter,
225 |     Marker : options.marker,
226 |     MaxKeys : maxKeys,
227 |     Prefix : options.prefix
228 |   };
229 | 
230 |   s3.listObjects(params, function (error, response) {
231 |     if (error) {
232 |       return callback(error);
233 |     } else if (response.err) {
234 |       return callback(new Error(response.err));
235 |     }
236 | 
237 |     // Convert the results into an array of key strings, or
238 |     // common prefixes if we're using a delimiter.
239 |     var keys;
240 |     if (options.delimiter) {
241 |       // Note that if you set MaxKeys to 1 you can see some interesting
242 |       // behavior in which the first response has no response.CommonPrefix
243 |       // values, and so we have to skip over that and move on to the
244 |       // next page.
245 |       keys = _.map(response.CommonPrefixes, function (item) {
246 |         return item.Prefix;
247 |       });
248 |     } else {
249 |       keys = _.map(response.Contents, function (item) {
250 |         return item.Key;
251 |       });
252 |     }
253 | 
254 |     // Check to see if there are yet more keys to be obtained, and if so
255 |     // return the marker for use in the next request.
256 |     var nextMarker;
257 |     if (response.IsTruncated) {
258 |       if (options.delimiter) {
259 |         // If specifying a delimiter, the response.NextMarker field exists.
260 |         nextMarker = response.NextMarker;
261 |       } else {
262 |         // For normal listing, there is no response.NextMarker
263 |         // and we must use the last key instead.
264 |         nextMarker = keys[keys.length - 1];
265 |       }
266 |     }
267 | 
268 |     callback(null, nextMarker, keys);
269 |   });
270 | }
271 | 
272 | // Infer the image type.
273 | function isImageFile(key) {
274 |   var extMatch = key.match(/\.([^.]*)$/);
275 |   if (!extMatch) {
276 |     console.error('Unable to infer image type for key ' + key);
277 |     return false;
278 |   }
279 |   var ext = extMatch[1].toLowerCase();
280 |   if (ext != "jpg" && ext != "jpeg" && ext != "gif" && ext != "png" && ext != "svg") {
281 |     // console.log('skipping non-image ' + key);
282 |     return false;
283 |   }
284 |   return true;
285 | }
286 | 
287 | module.exports.optimizer = processNext();
288 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "s3-image-optimizer",
 3 |   "version": "0.1.0",
 4 |   "description": "Optimise images on S3",
 5 |   "main": "index.js",
 6 |   "repository": {
 7 |     "type": "git",
 8 |     "url": "git://github.com/sourcey/s3-image-optimizer.git"
 9 |   },
10 |   "license": "MIT",
11 |   "author": "Kam Low <auscaster@gmail.com> (http://sourcey.com/)",
12 |   "scripts": {
13 |     "deploy": "node-lambda deploy"
14 |   },
15 |   "dependencies": {
16 |     "async": "^1.5.0",
17 |     "dotenv": "^1.2.0",
18 |     "imagemin": "^3.2.0",
19 |     "imagemin-gifsicle": "^4.0.0",
20 |     "imagemin-jpegtran": "^4.0.0",
21 |     "imagemin-optipng": "^4.3.0",
22 |     "imagemin-svgo": "^4.0.0",
23 |     "underscore": "^1.8.3"
24 |   },
25 |   "devDependencies": {
26 |     "node-lambda": "^0.7.1"
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------