├── .eslintrc.json ├── .gitignore ├── History.md ├── LICENSE ├── README.md ├── index.js ├── lib ├── Request.js ├── S3.js └── S3Lambda.js ├── package.json └── test └── index.js /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "airbnb", 3 | "parserOptions": { 4 | "sourceType": "script" 5 | }, 6 | "rules": { 7 | "lines-around-comment": ["warn", { 8 | "beforeLineComment": true, 9 | "afterLineComment": false, 10 | "beforeBlockComment": true, 11 | "afterBlockComment": true 12 | }], 13 | "padded-blocks": ["off"], 14 | "comma-dangle": ["error", "never"], 15 | "no-plusplus": ["off"], 16 | "no-param-reassign": ["off"], 17 | "no-undef": ["off"], 18 | "strict": ["error", "global"], 19 | "semi": ["error", "never"] 20 | }, 21 | "plugins": [ 22 | "import" 23 | ] 24 | } 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | test/buckets 3 | -------------------------------------------------------------------------------- /History.md: -------------------------------------------------------------------------------- 1 | 2 | 5.1.8 / 2017-05-17 3 | ================== 4 | 5 | * add disclaimer to README 6 | 7 | 5.1.7 / 2017-03-31 8 | ================== 9 | 10 | * Add Match Option in Context 11 | * Add Support for signatureVersion v4. Can be set in Config Object. 12 | * ignore empty objects (including folder prefixes) when listing keys 13 | 14 | 5.1.6 / 2017-03-28 15 | ================== 16 | 17 | * added better defensive code around transformer 18 | 19 | 5.1.5 / 2017-03-28 20 | ================== 21 | 22 | * added rename parameter to output method 23 | 24 | 5.1.4 / 2017-03-22 25 | ================== 26 | 27 | * Add exclude modifier 28 | 29 | 5.1.3 / 2017-03-21 30 | ================== 31 | 32 | * Pass key to transformer function 33 | 34 | 5.1.2 / 2017-03-15 35 | ================== 36 | 37 | * fixed concurrency in reduce (used to be forced to 1) 38 | * cleaned code 39 | * improved documentation 40 | 41 | 5.1.1 / 2017-03-13 42 | ================== 43 | 44 | * fix edgecase where no keys are returned 45 | 46 | 5.1.0 / 2017-03-09 47 | ================== 48 | 49 | * add inplace option for destructive actions and make it necessary 50 | * forEach should not permanently overwrite the context's concurrency 51 | * Use tape's built-in assertions for comparisons in tests 52 | * Pass transformer and encoding from context to s3 53 | 54 | 5.0.2 / 2017-02-12 55 | ================== 56 | * Updated options object for context 57 | 58 | 5.0.0 / 2017-02-09 59 | ================== 60 | 61 | * S3renity is deprecated, moved to s3-lambda 62 | 63 | 4.0.0 / 2017-01-23 64 | ================== 65 | 66 | * Added precommit hooks to lint and validate 67 | * fix all linting errors. removed join function 68 | * removed join 69 | * reorganized the way s3 objects are handled 70 | * implemented 'endPrefix', removed superfluous 'list' function 71 | * fixed encoding settings (inherit from s3 class). fixed some file names/comments. 72 | * bug fixes 73 | 74 | 3.0.4 / 2016-08-12 75 | ================== 76 | 77 | * added async modifier function 78 | 79 | 3.0.3 / 2016-07-21 80 | ================== 81 | 82 | * changed a showProgess property to a verbose property 83 | 84 | 3.0.2 / 2016-07-20 85 | ================== 86 | 87 | * Merge pull request #30 from littlstar/make-config-backwards-compatible 88 | * updated config so that you can use a real AWS config, wile making the old keys backwards compatible 89 | 90 | 3.0.1 / 2016-07-13 91 | ================== 92 | 93 | * renamed s3renity file (oops) 94 | 95 | 3.0.0 / 2016-07-13 96 | ================== 97 | 98 | * Merge pull request #29 from littlstar/fix-deferred-references 99 | * code refactoring, creates new request now (again) per batch 100 | * split out s3 wrapper to its own module 101 | * switched the rest of the deferred references to promises 102 | * big refactor 103 | 104 | 2.2.11 / 2016-06-20 105 | =================== 106 | 107 | * removed unnecessary try-catch blocks and custom error (better to use stack) 108 | 109 | 2.2.10 / 2016-06-20 110 | =================== 111 | 112 | * small update 113 | 114 | 2.2.9 / 2016-06-20 115 | ================== 116 | 117 | * fix bug with reverse 118 | 119 | 2.2.8 / 2016-06-19 120 | ================== 121 | 122 | * added reverse function to batch request 123 | 124 | 2.2.7 / 2016-06-19 125 | ================== 126 | 127 | * better error handling 128 | 129 | 2.2.6 / 2016-06-19 130 | ================== 131 | 132 | * removed unnecessary try-catch blocks 133 | * added limit function to batch request 134 | * Update README.md 135 | 136 | 2.2.5 / 2016-05-16 137 | ================== 138 | 139 | * Merge pull request #25 from littlstar/reverse-option 140 | * added option to reverse the order of the objects traversed in context 141 | 142 | 2.2.4 / 2016-05-12 143 | ================== 144 | 145 | * fixed incorrectly calling progress.tick() for show_progress 146 | * cleaned some promise code up 147 | 148 | 2.2.3 / 2016-05-05 149 | ================== 150 | 151 | * fixed error handling bug and tightened up some code 152 | 153 | 2.2.2 / 2016-04-28 154 | ================== 155 | 156 | * update readme whitespace and wording 157 | 158 | 2.2.1 / 2016-04-27 159 | ================== 160 | 161 | * Merge pull request #22 from littlstar/fix-edgecase-bug 162 | * changed default timeout from 1 second to 10 seconds 163 | * fix edgecase bug when operating over multiple of 1000 files 164 | 165 | 2.2.0 / 2016-04-26 166 | ================== 167 | 168 | * properly show batch progress with a progress bar. separate verbose from show_progress options 169 | * removed documentation, moving to README 170 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Little Star Media, Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## s3-lambda 2 | 3 | `s3-lambda` enables you to run lambda functions over a context of [S3](https://aws.amazon.com/s3/) objects. It has a stateless architecture with concurrency control, allowing you to process a large number of files very quickly. This is useful for quickly prototyping complex data jobs without an infrastructure like Hadoop or Spark. 4 | 5 | At Littlstar, we use `s3-lambda` for all sorts of data pipelining and analytics. 6 | 7 | **Disclaimer** This module does *not* interact with the AWS Lambda service; the name `s3-lambda` is referring to lambda functions in computer science, and all s3 file processing happens locally. 8 | 9 | ## Install 10 | ```bash 11 | npm install s3-lambda --save 12 | ``` 13 | 14 | ## Quick Example 15 | ```javascript 16 | const S3Lambda = require('s3-lambda') 17 | 18 | // example options 19 | const lambda = new S3Lambda({ 20 | accessKeyId: 'aws-access-key', // Optional. (falls back on local AWS credentials) 21 | secretAccessKey: 'aws-secret-key', // Optional. (falls back on local AWS credentials) 22 | showProgress: true, // Optional. Show progress bar in stdout 23 | verbose: true, // Optional. Show all S3 operations in stdout (GET, PUT, DELETE) 24 | signatureVersion: 'v4', // Optional. Signature Version used in Authentication. Defaults to "v4" 25 | maxRetries: 10, // Optional. Maximum request retries on an S3 object. Defaults to 10. 26 | timeout: 10000 // Optional. Amount of time for request to timeout. Defaults to 10000 (10s) 27 | }) 28 | 29 | const context = { 30 | bucket: 'my-bucket', 31 | prefix: 'path/to/files/' 32 | } 33 | 34 | lambda 35 | .context(context) 36 | .forEach(object => { 37 | // do something with object 38 | }) 39 | .then(_ => console.log('done!')) 40 | .catch(console.error) 41 | ``` 42 | 43 | ## Setting Context 44 | Before initiating a lambda expression, you must tell `s3-lambda` what files to operate over by calling `context`. A context is defined with an options object with the following properties: **bucket**, **prefix**, **marker**, **limit**, and **reverse**. 45 | 46 | ```javascript 47 | lambda.context({ 48 | bucket: 'my-bucket', // The S3 bucket to use 49 | prefix: 'prefix/', // The prefix of the files to use - s3-lambda will operate over every file with this prefix. 50 | marker: 'prefix/file1', // Optional. Start at the first file with this prefix. If it is a full file path, starts with next file. Defaults to null. 51 | endPrefix: 'prefix/file3', // Optional. Process files up to (not including) this prefix. Defaults to null. 52 | match: /2017/i, // Optional. Process files matching this regex / string. Defaults to null. 53 | limit: 1000, // Optional. Limit the # of files operated over. Default is Infinity. 54 | reverse: false // Optional. If true, operate over all files in reverse. Defaults to false. 55 | }) 56 | ``` 57 | You can also provide an array of context options, which will tell `ls-lambda` to operate over all the files in each. 58 | ```javascript 59 | const ctx1 = { 60 | bucket: 'my-bucket', 61 | prefix: 'path/to/files/', 62 | marker: 'path/to/logs/2017' 63 | } 64 | const ctx2 = { 65 | bucket: 'my-other-bucket', 66 | prefix: 'path/to/other/logs/', 67 | limit: 100 68 | } 69 | 70 | lambda.context([ctx1, ctx2]) 71 | ``` 72 | 73 | ## Modifiers 74 | After setting context, you can chain several other functions that modify the operation. Each returns a `Request` object, so they can be chained. All of these are optional. 75 | ### .concurrency(c) 76 | {Number} Set the request concurrency level (default is `Infinity`). 77 | 78 | ### .exclude(e) 79 | {Function} Sets the exclude function to use before getting objects from S3. This function will be called with the key and should return `true` if the object should be excluded. 80 | **Example:** exclude png files 81 | ```javascript 82 | lambda 83 | .context(context) 84 | .exclude(key => /.png$/.test(key)) 85 | .each(...) 86 | ``` 87 | 88 | ### .transform(f) 89 | {Function} Sets the transformation function to use when getting objects. This transformer will be called with the raw object that is returned by the [`S3#getObject()`](http://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html#getObject-property) method in the AWS SDK and the key, and should return the transformed object. When a transformer function is provided, objects are not automatically converted to strings, and the `encoding` parameter is ignored. 90 | **Example:** unzipping compressed S3 files before each operation 91 | ```javascript 92 | const zlib = require('zlib') 93 | 94 | lambda 95 | .context(context) 96 | .transform((object) => { 97 | return zlib.gunzipSync(object.Body).toString('utf8') 98 | }) 99 | .each(...) 100 | ``` 101 | ### .encode(e) 102 | {String} Sets the string encoding to use when getting objects. This setting is ignored if a transformer function is used. 103 | ### limit(l) 104 | {Number} Limit the number of files operated over. 105 | ### reverse(r) 106 | {Boolean} Reverse the order of files operated over. 107 | ### async() 108 | Lets the resolver know that your function is async (returns a Promise). 109 | 110 | ## Lambda Functions 111 | Perform synchronous or asynchronous functions over each file in the set context. 112 | - each 113 | - forEach 114 | - map 115 | - reduce 116 | - filter 117 | 118 | ### each 119 | each(fn[, isasync]) 120 | 121 | Performs `fn` on each S3 object in parallel. You can set the concurrency level (defaults to `Infinity`). 122 | If `isasync` is true, `fn` should return a Promise. 123 | ```javascript 124 | lambda 125 | .context(bucket, prefix) 126 | .concurrency(5) // operates on 5 objects at a time 127 | .each(object => console.log(object)) 128 | .then(_ => console.log('done!')) 129 | .catch(console.error) 130 | ``` 131 | 132 | ### forEach 133 | forEach(fn[, isasync]) 134 | 135 | Same as `each`, but operates sequentially, one file at a time. Setting concurrency for this function is superfluous. 136 | ```javascript 137 | lambda 138 | .context(bucket, prefix) 139 | .forEach(object => { /* do something with object */ }) 140 | .then(_ => console.log('done!')) 141 | .catch(console.error) 142 | ``` 143 | ### map 144 | map(fn[, isasync]) 145 | 146 | Maps `fn` over each file in an S3 directory, replacing each file with what is returned 147 | from the mapper function. If `isasync` is true, `fn` should return a Promise. 148 | 149 | This is a **destructive** action, meaning what you return from `fn` will change the S3 object itself. For your protection, you must specify `inplace()` to map over the existing files. Alternatively, you can use `output()` to output the results of the mapper function elsewhere (as demonstrated below). You can pass a third argument (a function) to rename the output key (bucket + prefix). 150 | ```javascript 151 | const addSmiley = object => object + ':)' 152 | 153 | lambda 154 | .context(bucket, prefix) 155 | .inplace() 156 | .map(addSmiley) 157 | .then(console.log('done!')) 158 | .catch(console.error) 159 | ``` 160 | Make this *non-destructive* by specifying an `output` directory. 161 | ```javascript 162 | const outputBucket = 'my-bucket' 163 | const outputPrefix = 'path/to/output/' 164 | 165 | lambda 166 | .context(bucket, prefix) 167 | .output(outputBucket, outputPrefix, (key) => key.replace('-', '/')) 168 | .map(addSmiley) 169 | .then(console.log('done!')) 170 | .catch(console.error) 171 | ``` 172 | ### reduce 173 | reduce(func[, isasync]) 174 | 175 | Reduces the objects in the working context to a single value. 176 | ```javascript 177 | // concatonates all the files 178 | const reducer = (previousValue, currentValue, key) => { 179 | return previousValue + currentValue 180 | } 181 | 182 | lambda 183 | .context(bucket, prefix) 184 | .reduce(reducer) 185 | .then(result => { /* do something with result */ }) 186 | .catch(console.error) 187 | ``` 188 | ### filter 189 | filter(func[, isasync]) 190 | 191 | **Destructive**. Filters (deletes) files in S3. `func` should return `true` to keep the object, and `false` to delete it. If `isasync` is true, `func` returns a Promise. 192 | 193 | This is a **destructive** action, meaning if `fn` is `false`, the object will be deleted from S3. For your protection, you must specify `inplace()` to filter the existing files. Alternatively, you can use `output()` to output the results of the filter function elsewhere (as demonstrated below). As with map, you can pass a function to output to rename the output key. 194 | 195 | ```javascript 196 | // filters empty files 197 | const fn = object => object.length > 0 198 | 199 | lambda 200 | .context(bucket, prefix) 201 | .inplace() 202 | .filter(fn) 203 | .then(_ => console.log('done!')) 204 | .catch(console.error) 205 | ``` 206 | Make this *non-destructive* by specifying an `output` directory. 207 | ```javascript 208 | lambda 209 | .context(bucket, prefix) 210 | .output(outputBucket, outputPrefix, (key) => key.replace('-', '/')) 211 | .filter(filter) 212 | .then(console.log('done!')) 213 | .catch(console.error() 214 | ``` 215 | ## S3 Functions 216 | Promise-based wrapper around common S3 methods. 217 | - list 218 | - keys 219 | - get 220 | - put 221 | - copy 222 | - delete 223 | 224 | ### list 225 | list(bucket, prefix[, marker]) 226 | 227 | List all keys in `s3://bucket/prefix`. If you use a marker, `s3-lambda` will start listing alphabetically from there. 228 | ```javascript 229 | lambda 230 | .list(bucket, prefix) 231 | .then(list => console.log(list)) 232 | .catch(console.error) 233 | ``` 234 | ### keys 235 | keys(bucket, prefix[, marker]) 236 | 237 | Returns an array of keys for the given `bucket` and `prefix`. 238 | ```javascript 239 | lambda 240 | .keys(bucket, prefix) 241 | .then(keys => console.log(keys)) 242 | .catch(console.error) 243 | ``` 244 | ### get 245 | get(bucket, key[, encoding[, transformer]]) 246 | 247 | Gets an object in S3, calling `toString(encoding` on objects. 248 | ```javascript 249 | lambda 250 | .get(bucket, key) 251 | .then(object => { /* do something with object */ }) 252 | .catch(console.error) 253 | ``` 254 | 255 | Optionally you can supply your own transformer function to use when retrieving objects. This transformer will be called with the raw object that is returned by the [`S3#getObject()`](http://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html#getObject-property) method in the AWS SDK, and should return the transformed object. When a transformer function is provided, objects are not automatically converted to strings, and the `encoding` parameter is ignored. 256 | 257 | ```javascript 258 | const zlib = require('zlib') 259 | 260 | const transformer = object => { 261 | return zlib.gunzipSync(object.Body).toString('utf8') 262 | } 263 | 264 | lambda 265 | .get(bucket, key, null, transformer) 266 | .then(object => { /* do something with object */ }) 267 | .catch(console.error) 268 | ``` 269 | 270 | ### put 271 | put(bucket, key, object[, encoding]) 272 | 273 | Puts an object in S3. Default encoding is `utf8`. 274 | ```javascript 275 | lambda 276 | .put(bucket, key, 'hello world!') 277 | .then(console.log('done!')).catch(console.error) 278 | ``` 279 | ### copy 280 | copy(bucket, key, targetBucket, targetKey) 281 | 282 | Copies an object in S3 from `s3://sourceBucket/sourceKey` to `s3://targetBucket/targetKey`. 283 | ```javascript 284 | lambda 285 | .copy(sourceBucket, sourceKey, targetBucket, targetKey) 286 | .then(console.log('done!')).catch(console.error) 287 | ``` 288 | ### delete 289 | delete(bucket, key) 290 | 291 | Deletes an object in S3 (`s3://bucket/key`). 292 | ```javascript 293 | lambda 294 | .delete(bucket, key) 295 | .then(console.log('done!')).catch(console.error) 296 | ``` 297 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | 3 | const S3Lambda = require('./lib/S3Lambda') 4 | 5 | module.exports = S3Lambda 6 | -------------------------------------------------------------------------------- /lib/Request.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Set and run a batch request 3 | */ 4 | 5 | 'use strict' 6 | 7 | /** 8 | * dependencies 9 | */ 10 | 11 | const ProgressBar = require('progress') 12 | const Batch = require('batch') 13 | 14 | class Request { 15 | 16 | /** 17 | * @constructor 18 | * 19 | * @param {Promise} getObjects A promise that resolves to the target objects 20 | * @param {S3} The S3 instance used to interact with remote files 21 | */ 22 | 23 | constructor(getObjects, s3) { 24 | this.showProgress = s3.showProgress 25 | this.getObjects = getObjects 26 | this.s3 = s3 27 | this.opts = { 28 | concurrency: Infinity, 29 | transformer: null, 30 | encoding: s3.encoding, 31 | reverse: null, 32 | exclude: null, 33 | async: false, 34 | limit: null 35 | } 36 | this.target = null 37 | } 38 | 39 | /** 40 | * Renames the output key when used in map or filter. 41 | * 42 | * @param {String} key The string to rename 43 | * @return {String} key The resulting string 44 | */ 45 | 46 | rename(key) { 47 | const newFileName = this.target.rename(key.slice(this.target.prefix.length)) 48 | return `${this.target.prefix}${newFileName}` 49 | } 50 | 51 | /** 52 | * Enable modifications to the initial context. 53 | * 54 | * @return {Promise} 55 | */ 56 | 57 | resolveSources() { 58 | return new Promise((success, fail) => { 59 | this.getObjects.then((objects) => { 60 | objects = this.opts.reverse ? objects.reverse() : objects 61 | objects = this.opts.limit ? objects.slice(0, this.opts.limit) : objects 62 | objects = this.opts.exclude ? objects.filter(obj => !this.opts.exclude(obj.key)) : objects 63 | success(objects) 64 | }).catch(fail) 65 | }) 66 | } 67 | 68 | /** 69 | * Creates a progress bar for the request. 70 | * 71 | * @param {Batch} batch The Batch instance used for the request. 72 | * @param {Number} numSources The number of files being processed. 73 | */ 74 | 75 | handleProgress(batch, numSources) { 76 | if (this.showProgress) { 77 | const progress = new ProgressBar('[:bar] :percent', { 78 | total: numSources, 79 | width: 40 80 | }) 81 | batch.on('progress', () => progress.tick()) 82 | } 83 | } 84 | 85 | /** 86 | * Sets an exclude function to be used before getting objects from s3. 87 | * 88 | * @param {Function} e The function to use to exclude objects. The exclude 89 | * functions takes an s3 key as a parameter and should return true if the 90 | * object should be excluded. 91 | * @return {Request} The instance on which this method was called. 92 | */ 93 | 94 | exclude(e) { 95 | this.opts.exclude = e 96 | return this 97 | } 98 | 99 | /** 100 | * Sets the encoding to use when getting s3 objects with 101 | * object.Body.toString(encoding). If not set, utf8 is used. 102 | * 103 | * @param {String} encoding The encoding. 104 | * @return {Request} The instance on which this method was called. 105 | */ 106 | 107 | encode(encoding) { 108 | this.opts.encoding = encoding 109 | return this 110 | } 111 | 112 | /** 113 | * Sets a transformation function to be used when getting objects from s3. 114 | * Using transform takes precedence over encode. 115 | * 116 | * @param {Function} transformer The function to use to transform the 117 | * object. The transformation function takes an s3 object and a key as 118 | * parameters and should return the file's contents as a string. 119 | * @return {Request} The instance on which this method was called. 120 | */ 121 | 122 | transform(transformer) { 123 | this.opts.transformer = transformer 124 | return this 125 | } 126 | 127 | /** 128 | * Set the concurrency for requests. Default is Infinity (as many as 129 | * the computer can handle). Has no effect with reduce. 130 | * 131 | * @param {Integer} concurrency The concurrency level to use in the request. 132 | * @return {Request} The instance on which this method was called. 133 | */ 134 | 135 | concurrency(concurrency) { 136 | this.opts.concurrency = concurrency 137 | return this 138 | } 139 | 140 | /** 141 | * Limits the number of sources being operated on. 142 | * 143 | * @param {Integer} limit 144 | * @return {Request} The instance on which this method was called. 145 | */ 146 | 147 | limit(limit) { 148 | this.opts.limit = limit 149 | return this 150 | } 151 | 152 | /** 153 | * Reverse the sources being operated on. 154 | * 155 | * @return {Request} The instance on which this method was called. 156 | */ 157 | 158 | reverse() { 159 | this.opts.reverse = true 160 | return this 161 | } 162 | 163 | /** 164 | * Enables destructive actions (map, filter) to occur inplace. 165 | * 166 | * @return {Request} The instance on which this method was called. 167 | */ 168 | 169 | inplace() { 170 | this.destructive = true 171 | return this 172 | } 173 | 174 | /** 175 | * Sets the output directory for map or filter. If a target is set, map and 176 | * filter write to that location instead of changing the original objects 177 | * themselves. 178 | * 179 | * @param {String} bucket The target bucket. 180 | * @param {String} prefix The target prefix (folder) where the output will go. 181 | * @param {Function} rename Optional. A function to rename the output file 182 | * names (after the prefix). Takes the file name as an argument, and should 183 | * return the new file name. Defaults to `null`. 184 | * @return {Request} The instance on which this method was called. 185 | */ 186 | 187 | output(bucket, prefix, rename) { 188 | rename = rename || null 189 | this.target = { 190 | bucket, 191 | prefix, 192 | rename 193 | } 194 | return this 195 | } 196 | 197 | /** 198 | * Run a function over s3 objects in series. This is just a wrapper around each 199 | * with concurrency 1. 200 | * 201 | * @param {Function} func The function to perform over the working context. 202 | * @param {Boolean} [isasync=false] Set to true if `func` is async (returns a 203 | * Promise). 204 | * @return {Promise} Resolves after processing has completed, 205 | * returning an object that contains the bucket, prefix, and key of the last 206 | * S3 object that was processed. 207 | */ 208 | 209 | forEach(func, isasync) { 210 | return this.each(func, isasync, 1) 211 | } 212 | 213 | /** 214 | * Run a function over s3 objects in parallel. 215 | * 216 | * @param {Function} func The function to perform over the working context. 217 | * @param {Boolean} [isAsync=false] Set to true if `func` is async (returns a 218 | * Promise). 219 | * @return {Promise} Resolves after processing has completed, 220 | * returning an object that contains the bucket, prefix, and key of the last 221 | * S3 object that was processed. 222 | */ 223 | 224 | each(func, isAsync, concurrency) { 225 | 226 | isAsync = isAsync || this.opts.async 227 | const batch = new Batch().concurrency(concurrency || this.opts.concurrency) 228 | 229 | return new Promise((success, fail) => { 230 | this.resolveSources().then((sources) => { 231 | 232 | this.handleProgress(batch, sources.length) 233 | const lastKey = sources[sources.length - 1] 234 | 235 | // Loop over sources, apply function to each 236 | sources.forEach((source) => { 237 | const bucket = source.bucket 238 | const key = source.key 239 | const encoding = this.opts.encoding 240 | const transformer = this.opts.transformer 241 | batch.push((done) => { 242 | this.s3.get(bucket, key, encoding, transformer).then((body) => { 243 | if (isAsync) { 244 | func(body, key).then(done).catch(done) 245 | } else { 246 | func(body, key) 247 | done() 248 | } 249 | }).catch(done) 250 | }) 251 | }) 252 | 253 | // Resolve error or last key processed. 254 | batch.end((err) => { 255 | if (err) { 256 | fail(err) 257 | } else { 258 | success(lastKey) 259 | } 260 | }) 261 | }).catch(fail) 262 | }) 263 | } 264 | 265 | /** 266 | * Maps a function over the objects in the working context in parallel, replacing each 267 | * object with the return value. If an output is specified, the objects will not be 268 | * overwritten, but rather copied to the target location. 269 | * 270 | * @param {Function} func The function to map over each object in the working 271 | * context. func takes a string as a parameter and should return a 272 | * string that will replace the given s3 object. 273 | * @param {Boolean} [isAsync=false] If set to true, this indicates that func 274 | * is async and returns a promise. 275 | * @return {Promise} Resolves after processing has completed, 276 | * returning an object that contains the bucket, prefix, and key of the last 277 | * S3 object that was processed. 278 | */ 279 | 280 | map(func, isAsync) { 281 | if (this.target == null && this.destructive !== true) { 282 | throw new Error('must use target() or inplace() for destructive operations (map, filter)') 283 | } 284 | 285 | isAsync = isAsync || this.opts.async 286 | 287 | // Used to output from the map function (S3Lambda.context.output.map) 288 | const mapOutput = (bucket, key, prefix, body, done) => { 289 | if (body == null) { 290 | throw new Error('mapper function must return a value') 291 | } 292 | if (this.target == null) { 293 | this.s3.put(bucket, key, body, this.opts.encoding).then(() => { 294 | done() 295 | }).catch(done) 296 | } else { 297 | 298 | const outputBucket = this.target.bucket 299 | let outputKey = key.replace(prefix, this.target.prefix) 300 | 301 | // Rename output key (if necessary) 302 | outputKey = this.target.rename ? this.rename(outputKey) : outputKey 303 | 304 | this.s3.put(outputBucket, outputKey, body, this.opts.encoding).then(() => { 305 | done() 306 | }).catch((e) => { 307 | done(e) 308 | }) 309 | } 310 | } 311 | 312 | const batch = new Batch() 313 | batch.concurrency(this.opts.concurrency) 314 | 315 | return new Promise((success, fail) => { 316 | this.resolveSources().then((sources) => { 317 | 318 | this.handleProgress(batch, sources.length) 319 | const lastKey = sources[sources.length - 1] 320 | 321 | // Loop over sources, apply mapper function to each 322 | sources.forEach((source) => { 323 | const bucket = source.bucket 324 | const key = source.key 325 | const encoding = this.opts.encoding 326 | const transformer = this.opts.transformer 327 | batch.push((done) => { 328 | this.s3.get(bucket, key, encoding, transformer).then((val) => { 329 | if (isAsync) { 330 | func(val, source.key).then((newval) => { 331 | mapOutput(bucket, key, source.prefix, newval, done) 332 | }).catch(done) 333 | } else { 334 | const newval = func(val, source.key) 335 | mapOutput(bucket, key, source.prefix, newval, done) 336 | } 337 | }).catch(done) 338 | }) 339 | }) 340 | 341 | // Resolve error or last key processed 342 | batch.end((err) => { 343 | if (err) { 344 | fail(err) 345 | } 346 | success(lastKey) 347 | }) 348 | }).catch(fail) 349 | }) 350 | } 351 | 352 | /** 353 | * Reduce the objects in the working context to a single value. 354 | * 355 | * @param {Function} func Function to execute on each value in the array, taking 356 | * three arguments: 357 | * `accumulator`: The accumulated value previously returned from the last 358 | * invocation of func 359 | * `currentValue`: The current entry being processed 360 | * `key`: The key of the current object being processed 361 | * func either returns the updated value, or a promise that resolves to the 362 | * updated value. 363 | * @param {String} initialValue Optional. Initial value to use as the first 364 | * `previousValue` in `func`. Defaults to `null`. 365 | * @param {Boolean} isAsync Optional, defaults to false. If set to true, this 366 | * indicates that func returns a promise. 367 | * @return {Promise} Resolves after processing has completed, 368 | * returning the result of the reducer. 369 | */ 370 | 371 | reduce(func, initialValue, isAsync) { 372 | 373 | isAsync = isAsync || this.opts.async 374 | initialValue = initialValue || null 375 | const batch = new Batch() 376 | batch.concurrency(this.opts.concurrency) 377 | 378 | return new Promise((success, fail) => { 379 | this.resolveSources().then((sources) => { 380 | 381 | this.handleProgress(batch, sources.length) 382 | let accumulator = initialValue 383 | 384 | // Loop over sources, update `accumulator` 385 | sources.forEach((source) => { 386 | const bucket = source.bucket 387 | const key = source.key 388 | const encoding = this.opts.encoding 389 | const transformer = this.opts.transformer 390 | batch.push((done) => { 391 | this.s3.get(bucket, key, encoding, transformer).then((body) => { 392 | if (isAsync) { 393 | func(accumulator, body, key).then((newval) => { 394 | accumulator = newval 395 | done() 396 | }).catch(done) 397 | } else { 398 | accumulator = func(accumulator, body, key) 399 | done() 400 | } 401 | }).catch(done) 402 | }) 403 | }) 404 | 405 | // Resolve error or reducer result `accumulator` 406 | batch.end((err) => { 407 | if (err) { 408 | fail(err) 409 | } else { 410 | success(accumulator) 411 | } 412 | }) 413 | }).catch(fail) 414 | }) 415 | } 416 | 417 | /** 418 | * Filter the objects in the working context. 419 | * 420 | * @param {Function} func The function to filter objects by, returning true for 421 | * objects that should not be filtered and false for those that should. If 422 | * isAsync is set to true, func returns a promise that resolves to true or 423 | * false. 424 | * @param {Boolean} isAsync Optional, defaults to false. If set to true, this 425 | * indicates that func returns a promise. 426 | * @return {Promise} Resolves after processing has completed, 427 | * returning an object that contains the bucket, prefix, and key of the last 428 | * S3 object that was processed. 429 | */ 430 | 431 | filter(func, isAsync) { 432 | if (this.target == null && this.destructive !== true) { 433 | throw new Error('must use target() or inplace() for destructive operations (map, filter)') 434 | } 435 | 436 | isAsync = isAsync || this.opts.async 437 | const batch = new Batch() 438 | 439 | // Keep a file when filtering 440 | const keep = source => new Promise((success, fail) => { 441 | if (this.target == null) { 442 | 443 | // Since we are keeping the file and there is no output, there is 444 | // nothing else to do 445 | success() 446 | } else { 447 | const bucket = source.bucket 448 | const key = source.key 449 | const targetBucket = this.target.bucket 450 | let targetKey = key.replace(source.prefix, this.target.prefix) 451 | 452 | // Rename output key (if necessary) 453 | targetKey = this.target.rename ? this.rename(targetKey) : targetKey 454 | 455 | this.s3.copy(bucket, key, targetBucket, targetKey) 456 | .then(() => success()) 457 | .catch(fail) 458 | } 459 | }) 460 | 461 | // Remove a file when filtering 462 | const remove = source => new Promise((success, fail) => { 463 | if (this.target == null) { 464 | 465 | // For inplace filtering, we remove the actual file 466 | this.s3.delete(source.bucket, source.key) 467 | .then(() => success()) 468 | .catch(fail) 469 | } else { 470 | 471 | // If output is specified, there is nothing else to do, since we are 472 | // simply not copying the file anywhere 473 | success() 474 | } 475 | }) 476 | 477 | // Ensure the filter function returns a boolean 478 | const check = (result) => { 479 | if (typeof result !== 'boolean') { 480 | throw new TypeError('filter function must return a boolean') 481 | } 482 | } 483 | 484 | return new Promise((success, fail) => { 485 | this.resolveSources().then((sources) => { 486 | 487 | this.handleProgress(batch, sources.length) 488 | const lastKey = sources[sources.length - 1] 489 | 490 | // Loop over every key and run the filter function on each object. keep 491 | // track of files to keep and remove. 492 | sources.forEach((source) => { 493 | const bucket = source.bucket 494 | const key = source.key 495 | const encoding = this.opts.encoding 496 | const transformer = this.opts.transformer 497 | batch.push((done) => { 498 | this.s3.get(bucket, key, encoding, transformer).then((body) => { 499 | if (isAsync) { 500 | func(body, source).then((result) => { 501 | check(result) 502 | if (result) { 503 | keep(source).then(() => done()).catch(done) 504 | } else { 505 | remove(source).then(() => done()).catch(done) 506 | } 507 | }).catch(done) 508 | } else { 509 | let result = null 510 | result = func(body, source) 511 | check(result) 512 | if (result) { 513 | keep(source).then(() => done()).catch(done) 514 | } else { 515 | remove(source).then(() => done()).catch(done) 516 | } 517 | } 518 | }).catch(done) 519 | }) 520 | }) 521 | 522 | // Resolve error or last source processed 523 | batch.end((err) => { 524 | if (err) { 525 | fail(err) 526 | } else { 527 | success(lastKey) 528 | } 529 | }) 530 | }).catch(fail) 531 | }) 532 | } 533 | } 534 | 535 | /** 536 | * Exports 537 | */ 538 | 539 | module.exports = Request 540 | -------------------------------------------------------------------------------- /lib/S3.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Promise wrapper around aws s3 sdk 3 | */ 4 | 5 | 'use strict' 6 | 7 | /** 8 | * Dependencies 9 | */ 10 | 11 | const s3Mock = require('mock-aws-s3') 12 | const aws = require('aws-sdk') 13 | 14 | class S3 { 15 | 16 | /** 17 | * @constructor 18 | * 19 | * @param {Object} config - Options to initialize with. If accessKeyId and 20 | * secretAccessKey are left out, the aws sdk will attempt to use the 21 | * system's default credentials. 22 | * @param {String} [config.accessKeyId=null] AWS Access Key 23 | * @param {String} [config.secretAccessKey=null] AWS Secret Key 24 | * @param {Integer} [config.maxRetries=30] Max retries allowed for aws api requets 25 | * @param {Integer} [config.timeout=120] Timeout allowed for aws api requests 26 | * @param {Boolean} [config.verbose=false] Whether to use verbose mode when making requets 27 | */ 28 | 29 | constructor(config) { 30 | this.verbose = config.verbose || false 31 | this.encoding = config.encoding || 'utf8' 32 | if (config.localPath) { 33 | 34 | // use local files (using mock aws sdk) 35 | s3Mock.config.basePath = config.localPath 36 | this.s3Instance = new s3Mock.S3() 37 | } else { 38 | 39 | // use the aws sdk. attempt to use aws credentials in config. if they 40 | // are not present, the aws sdk could pick them up in ~/.aws/credentials 41 | if (config.accessKeyId && config.secretAccessKey) { 42 | aws.config.update({ 43 | accessKeyId: config.accessKeyId, 44 | secretAccessKey: config.secretAccessKey 45 | }) 46 | } 47 | 48 | // Create AWS S3 object 49 | this.s3Instance = new aws.S3({ 50 | maxRetries: config.maxRetries || 10, 51 | signatureVersion: config.signatureVersion || 'v4', 52 | httpOptions: { 53 | timeout: config.timeout || 10000 54 | }, 55 | apiVersion: '2006-03-01' 56 | }) 57 | } 58 | } 59 | 60 | /** 61 | * Gets an object in s3. 62 | * 63 | * @param {String} bucket - The bucket to get from 64 | * @param {String} key - The key of the object to get 65 | * @param {Function} [transformer] - If supplied, this function will be 66 | * run on Object.Body before returning. Useful for dealing with compressed 67 | * files or weird formats 68 | * @returns {Promise} The s3 text object. 69 | */ 70 | 71 | get(bucket, key, encoding, transformer) { 72 | 73 | // Default transform is to assume a text file, and call toString() 74 | // with the set encoding 75 | if (!transformer) { 76 | transformer = obj => obj.Body.toString(encoding || this.encoding) 77 | } 78 | 79 | return new Promise((success, fail) => { 80 | this.s3Instance.getObject({ 81 | Bucket: bucket, 82 | Key: key 83 | }, (err, object) => { 84 | if (err) { 85 | fail(err) 86 | } else { 87 | try { 88 | success(transformer(object, key)) 89 | if (this.verbose) { 90 | console.info(`GET OBJECT s3://${bucket}/${key}`) 91 | } 92 | } catch (e) { 93 | fail(e) 94 | } 95 | } 96 | }) 97 | }) 98 | } 99 | 100 | /** 101 | * Puts a text object in S3. 102 | * 103 | * @param {String} bucket - The s3 bucket to use 104 | * @param {String} key - The key path where the object will be placed 105 | * @param {String} body - The object body 106 | * @return {Promise} Promise that resolves when the object is written to s3 107 | */ 108 | 109 | put(bucket, key, body) { 110 | return new Promise((success, fail) => { 111 | this.s3Instance.putObject({ 112 | ContentEncoding: this.encoding, 113 | Bucket: bucket, 114 | Body: body, 115 | Key: key 116 | }, (err, res) => { 117 | if (err) { 118 | fail(err) 119 | } else { 120 | if (this.verbose) { 121 | console.info(`PUT OBJECT s3://${bucket}/${key}`) 122 | } 123 | success(res) 124 | } 125 | }) 126 | }) 127 | } 128 | 129 | /** 130 | * Copies an object in S3. 131 | * 132 | * @public 133 | * @param {String} bucket The source bucket 134 | * @param {String} key The source key 135 | * @param {String} targetBucket The target bucket 136 | * @param {String} targetKey The target key 137 | * @return {Promise} 138 | */ 139 | 140 | copy(bucket, key, targetBucket, targetKey) { 141 | return new Promise((success, fail) => { 142 | this.s3Instance.copyObject({ 143 | Bucket: targetBucket, 144 | Key: targetKey, 145 | CopySource: `${bucket}/${key}` 146 | }, (err) => { 147 | if (err) { 148 | fail(err) 149 | } else { 150 | if (this.verbose) { 151 | console.info(`COPY OBJECT s3://${bucket}/${key} --> s3://${targetBucket}/${targetKey}`) 152 | } 153 | success() 154 | } 155 | }) 156 | }) 157 | } 158 | 159 | /** 160 | * Deletes an object or array of objects in S3. 161 | * 162 | * @public 163 | * @param {String} bucket - The bucket 164 | * @param {String|Array} key - The key to delete 165 | * @returns {Promise} The key (or array of keys) that was deleted. 166 | */ 167 | 168 | delete(bucket, key) { 169 | return new Promise((success, fail) => { 170 | this.s3Instance.deleteObject({ 171 | Bucket: bucket, 172 | Key: key 173 | }, (err) => { 174 | if (err) { 175 | fail(err) 176 | } else { 177 | success() 178 | if (this.verbose) { 179 | console.info(`DELETE OBJECT s3://${bucket}/${key}`) 180 | } 181 | } 182 | }) 183 | }) 184 | } 185 | 186 | /** 187 | * Deletes a list of objects in S3. 188 | * 189 | * @private 190 | * @param {String} bucket - The s3 bucket to use 191 | * @param {Array} keys - The keys of the objects to delete 192 | * @returns {Promise} 193 | */ 194 | 195 | deleteObjects(bucket, keys) { 196 | 197 | // creates input with format: { Key: key } required by s3 198 | const input = keys.map(key => ({ 199 | Key: key 200 | })) 201 | 202 | return new Promise((success, fail) => { 203 | this.s3Instance.deleteObjects({ 204 | Bucket: bucket, 205 | Delete: { 206 | Objects: input 207 | } 208 | }, (err, res) => { 209 | if (err) { 210 | fail(err) 211 | } else { 212 | success(res) 213 | if (this.verbose) { 214 | keys.forEach((key) => { 215 | console.info(`DELETE OBJECT s3://${bucket}/${key}`) 216 | }) 217 | } 218 | } 219 | }) 220 | }) 221 | } 222 | 223 | /** 224 | * Lists all the keys in the given S3 folder. 225 | * 226 | * @param {String} bucket - The bucket 227 | * @param {String} prefix - The prefix for the folder to list keys for 228 | * @param {String} [endPrefix] Process all files up to this key prefix 229 | * @param {String} [marker] - The key to start listing from, alphabetically 230 | * @returns {Promise} An array containing all the keys in s3://bucket/prefix 231 | */ 232 | 233 | keys(bucket, prefix, endPrefix, marker) { 234 | 235 | endPrefix = endPrefix || '' 236 | marker = marker || '' 237 | 238 | return new Promise((success, fail) => { 239 | this.listRecursive(bucket, prefix, endPrefix, marker, [], (err, allKeys) => { 240 | if (err) { 241 | fail(err) 242 | } else { 243 | allKeys = allKeys.filter(key => key.length > 0) 244 | success(allKeys) 245 | } 246 | }) 247 | }) 248 | } 249 | 250 | /** 251 | * Recursively list all S3 objects, circumventing AWS's 1000 limit 252 | * 253 | * @param {String} bucket - The bucket 254 | * @param {String} prefix - The prefix for the folder to list keys for 255 | * @param {String} [endPrefix] Process all files up to this key prefix 256 | * @param {String} [marker] - The key to start listing from, alphabetically 257 | * @param {Array} allKeys - The aggregator used in recursion 258 | * @param {Function} done - Callback used when recursion is finished. Takes 259 | * two parameters: first is an error, if any (set to null if no error), and 260 | * the second is an array of keys 261 | */ 262 | 263 | listRecursive(bucket, prefix, endPrefix, marker, allKeys, done) { 264 | allKeys = allKeys || [] 265 | 266 | this.s3Instance.listObjects({ 267 | Bucket: bucket, 268 | Prefix: prefix, 269 | Marker: marker 270 | }, (err, keys) => { 271 | if (err) { 272 | done(err) 273 | } else { 274 | if (this.verbose) { 275 | console.info(`LIST OBJECTS s3://${bucket}/${marker === '' ? prefix : marker}`) 276 | } 277 | 278 | // No keys found 279 | if (keys.Contents.length === 0) { 280 | done(null, []) 281 | } else { 282 | 283 | // Update key values, removing the prefix 284 | let keyValues = keys.Contents 285 | .filter(object => { 286 | const empty = object.Size === 0 287 | const isDir = object.Key.lastIndexOf('/') === object.Key.length - 1 288 | return !(empty && isDir) 289 | }) 290 | .map(key => key.Key) 291 | 292 | // If the stop prefix is reached, ignore the rest of the keys 293 | let stopPrefixReached = false 294 | 295 | if (endPrefix.length > 0) { 296 | for (let i = 0; i < keyValues.length; i++) { 297 | if (keyValues[i].indexOf(endPrefix) > -1) { 298 | keyValues = keyValues.slice(0, i) 299 | stopPrefixReached = true 300 | break 301 | } 302 | } 303 | } 304 | 305 | allKeys = allKeys.concat(keyValues) 306 | 307 | // `keys.IsTruncated` indicates whether there are more keys to list 308 | // if so, we continue with `marker` 309 | if (keys.IsTruncated && !stopPrefixReached) { 310 | marker = keys.Contents[keys.Contents.length - 1].Key 311 | this.listRecursive(bucket, prefix, endPrefix, marker, allKeys, done) 312 | } else { 313 | done(null, allKeys) 314 | } 315 | } 316 | } 317 | }) 318 | } 319 | } 320 | 321 | /** 322 | * Exports 323 | */ 324 | 325 | module.exports = S3 326 | -------------------------------------------------------------------------------- /lib/S3Lambda.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Create new contexts for batch requests, or access the s3 wrapper. 3 | */ 4 | 5 | 'use strict' 6 | 7 | const S3 = require('./S3') 8 | const Request = require('./Request') 9 | const Batch = require('batch') 10 | 11 | /** 12 | * S3Lambda allows you to run batch requests, as well as interact with s3 13 | * objects directly through a promise-based api. 14 | */ 15 | 16 | class S3Lambda extends S3 { 17 | 18 | /** 19 | * @constructor 20 | * @param {Object} config - Options to initialize S3Lambda with. 21 | * @param {String} [config.encoding='utf8'] Encoding of the objects 22 | * @param {Boolean} [config.showProgress=false] Show progress bar for S3 operations 23 | */ 24 | 25 | constructor(config) { 26 | config = config || {} 27 | super(config) 28 | this.showProgress = config.showProgress || false 29 | this.verbose = config.verbose || false 30 | } 31 | 32 | /** 33 | * Creates a new batch request 34 | * 35 | * @param {Object|Array} context An object representing an S3 context. 36 | * Alternatively, you can supply an array of context objects. 37 | * @param {String} context.bucket The S3 bucket 38 | * @param {String} context.prefix The prefix key to use to find objects 39 | * @param {String} [context.endPrefix] Optional. The prefix to stop at 40 | * @param {String} [context.marker] Optional. The marker to use for listing 41 | * @param {Boolean} [context.reverse] Optional. Reverse the order of the 42 | * files in the context 43 | * @param {Number} [context.limit] Optional. Limit the number of files in the 44 | * context 45 | */ 46 | 47 | context(context) { 48 | 49 | let contexts = null 50 | 51 | if (Array.isArray(context)) { 52 | contexts = context 53 | } else if (typeof context === 'object') { 54 | contexts = [context] 55 | } else { 56 | throw Error('`context` expects an options object, or an array of options objects.') 57 | } 58 | if (this.verbose) { 59 | console.info('finding objects') 60 | } 61 | 62 | return new Request(this.findObjects(contexts), this) 63 | } 64 | 65 | /** 66 | * Find all objects based on the context provided 67 | * 68 | * @param {Array} contexts[context] An array of objects representing s3 contexts to find keys 69 | * @param {Object} context An object representing an S3 context 70 | * @param {String} context.bucket The S3 bucket 71 | * @param {String} context.prefix The prefix key to use to find objects 72 | * @param {String} context.match A string or regex for the key to match 73 | * @param {String} [context.endPrefix] Optional. The prefix to stop at (alphabetically) 74 | * @param {String} [context.marker] Optional. The marker to use to start listing keys at 75 | */ 76 | 77 | findObjects(contexts) { 78 | const batch = new Batch() 79 | 80 | return new Promise((success, fail) => { 81 | 82 | contexts.forEach((context) => { 83 | batch.push((done) => { 84 | 85 | const bucket = context.bucket 86 | const prefix = context.prefix 87 | const endPrefix = context.endPrefix 88 | const match = context.match 89 | const marker = context.marker 90 | const reverse = context.reverse 91 | const limit = context.limit 92 | 93 | this.keys(bucket, prefix, endPrefix, marker).then((keys) => { 94 | 95 | // Format keys 96 | let sources = keys.map(key => ({ 97 | bucket: context.bucket, 98 | prefix: context.prefix, 99 | key 100 | })) 101 | if (match) { 102 | sources = sources.filter(object => object.key.match(match)) 103 | } 104 | if (reverse) { 105 | sources = sources.reverse() 106 | } 107 | if (limit) { 108 | sources = sources.slice(0, limit) 109 | } 110 | done(null, sources) 111 | }).catch((e) => { 112 | done(e) 113 | }) 114 | }) 115 | }) 116 | 117 | batch.end((err, sources) => { 118 | if (err) { 119 | fail(err) 120 | } else { 121 | 122 | // Flatten the array (of array) of sources and impose limit 123 | sources = sources.reduce((prev, cur) => prev.concat(cur), []) 124 | success(sources) 125 | } 126 | }) 127 | }) 128 | } 129 | } 130 | 131 | /** 132 | * Exports 133 | */ 134 | 135 | module.exports = S3Lambda 136 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "s3-lambda", 3 | "version": "5.1.8", 4 | "description": "Lambda functions over S3 objects with concurrency control (forEach, map, reduce, filter)", 5 | "main": "index.js", 6 | "author": "Wells Johnston", 7 | "license": "MIT", 8 | "repository": { 9 | "type": "git", 10 | "url": "https://github.com/littlstar/s3-lambda" 11 | }, 12 | "scripts": { 13 | "test": "node test", 14 | "lint": "node_modules/.bin/eslint . --fix --ignore-path test/index.js", 15 | "validate": "node_modules/.bin/eslint . --ignore-path test/index.js" 16 | }, 17 | "dependencies": { 18 | "aws-sdk": "^2.2.31", 19 | "batch": "^0.5.3", 20 | "mock-aws-s3": "^2.1.0", 21 | "progress": "^1.1.8" 22 | }, 23 | "devDependencies": { 24 | "eslint": "^3.13.1", 25 | "eslint-config-airbnb": "^14.0.0", 26 | "eslint-config-airbnb-base": "^11.0.1", 27 | "eslint-plugin-import": "^2.2.0", 28 | "eslint-plugin-jsx-a11y": "^3.0.2", 29 | "eslint-plugin-react": "^6.9.0", 30 | "mkdirp": "^0.5.1", 31 | "rimraf": "^2.5.4", 32 | "tape": "^4.4.0" 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /test/index.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | 3 | // Dependencies 4 | const S3Lambda = require(`${__dirname}/..`) 5 | const mkdirp = require('mkdirp').sync 6 | const rimraf = require('rimraf').sync 7 | const test = require('tape') 8 | const path = require('path') 9 | const fs = require('fs') 10 | 11 | // Path variables 12 | const folder = 'buckets' 13 | const bucket = 'S3Lambda' 14 | const prefix = 'files' 15 | const outputPrefix = 'output-files' 16 | const files = ['file1', 'file2', 'file3', 'file4'] 17 | const localPath = path.resolve(__dirname, folder) 18 | const bucketPath = path.resolve(__dirname, folder, bucket) 19 | const prefixPath = path.resolve(__dirname, folder, bucket, prefix) 20 | const outputPrefixPath = path.resolve(__dirname, folder, bucket, outputPrefix) 21 | const filePaths = files.map(f => `${prefixPath}/${f}`) 22 | 23 | // s3-lambda object 24 | const lambda = new S3Lambda({ 25 | localPath, 26 | showProgress: false, 27 | verbose: false 28 | }) 29 | 30 | resetSandbox() 31 | 32 | function resetSandbox() { 33 | rimraf(path.resolve(__dirname, 'buckets')) 34 | mkdirp(prefixPath) 35 | files.forEach((file) => { 36 | const filePath = path.resolve(__dirname, folder, bucketPath, prefixPath, file) 37 | fs.writeFileSync(filePath, file) 38 | }) 39 | } 40 | 41 | /** 42 | * Returns the contents of a file 43 | */ 44 | 45 | function readFile(path) { 46 | return fs.readFileSync(path).toString().trim() 47 | } 48 | 49 | /** 50 | * Returns an array of the contents of each file in a directory 51 | */ 52 | 53 | function readFiles(files) { 54 | return files.map(readFile) 55 | } 56 | 57 | /** 58 | * Returns true if all the files in an array exist 59 | */ 60 | 61 | function filesExist(paths) { 62 | return paths.map(fileExists).every(f => f) 63 | } 64 | 65 | /** 66 | * Returns true if file exists 67 | */ 68 | 69 | function fileExists(path) { 70 | return fs.existsSync(path) 71 | } 72 | 73 | /** 74 | * List files in a directory 75 | */ 76 | 77 | function readDir(dir) { 78 | return fs.readdirSync(dir) 79 | } 80 | 81 | /** 82 | * Test key listing function 83 | * TODO test with endPrefix and marker 84 | */ 85 | 86 | test('S3Lambda.keys', (t) => { 87 | t.plan(1) 88 | const answer = files.map(f => `${prefix}/${f}`) 89 | lambda 90 | .keys(bucket, prefix) 91 | .then((keys) => { 92 | t.deepEqual(keys, answer, 'keys length matches') 93 | }) 94 | .catch(e => console.error(e.stack)) 95 | }) 96 | 97 | /** 98 | * Test S3 methods get, put, and delete 99 | */ 100 | 101 | test('S3Lambda.put, S3Lambda.get, S3Lambda.delete', (t) => { 102 | 103 | resetSandbox() 104 | t.plan(3) 105 | 106 | const file = files[0] 107 | const key = `${prefix}/${file}` 108 | const body = 'hello world' 109 | const name = 'test' 110 | 111 | lambda 112 | .put(bucket, key, body) 113 | .then(() => { 114 | const fileContents = readFile(`${prefixPath}/${file}`) 115 | t.equal(fileContents, body, 'put object') 116 | lambda.get(bucket, key).then((obj) => { 117 | t.equal(obj, body, 'get object') 118 | lambda.delete(bucket, key).then(() => { 119 | t.notOk(fs.existsSync(`${key}`), 'delete object') 120 | }).catch(console.error) 121 | }) 122 | .catch(console.error) 123 | }) 124 | .catch(console.error) 125 | }) 126 | 127 | test('S3Lambda.delete (batch)', (t) => { 128 | 129 | t.plan(1) 130 | 131 | const files = ['file2', 'file3', 'file4'] 132 | const keys = files.map(file => `${prefix}/${file}`) 133 | 134 | lambda.deleteObjects(bucket, keys).then(() => { 135 | t.notOk(filesExist(keys), 'delete multiple objects') 136 | }).catch(console.error) 137 | }) 138 | 139 | test('S3Lambda.context.forEach (sync)', (t) => { 140 | 141 | resetSandbox() 142 | t.plan(1) 143 | 144 | const objects = [] 145 | const answer = [{ object: 'file1', key: 'files/file1' }, 146 | { object: 'file2', key: 'files/file2' }, 147 | { object: 'file3', key: 'files/file3' }, 148 | { object: 'file4', key: 'files/file4' }] 149 | 150 | const context = { 151 | bucket: bucket, 152 | prefix: prefix 153 | } 154 | 155 | lambda 156 | .context(context).forEach((obj, key) => { 157 | objects.push({ 158 | object: obj, 159 | key 160 | }) 161 | }) 162 | .then(() => { 163 | t.deepEqual(objects, answer, 'forEach sync') 164 | }) 165 | .catch(e => console.error(e.stack)) 166 | }) 167 | 168 | test('S3Lambda.context.forEach (async)', (t) => { 169 | 170 | resetSandbox() 171 | t.plan(10) 172 | 173 | const objects = [] 174 | const answer = [ 175 | { object: 'file1', key: 'files/file1' }, 176 | { object: 'file2', key: 'files/file2' }, 177 | { object: 'file3', key: 'files/file3' }, 178 | { object: 'file4', key: 'files/file4' } 179 | ] 180 | 181 | const opts = { 182 | bucket: bucket, 183 | prefix: prefix 184 | } 185 | 186 | let concurrentOperations = 0 187 | 188 | const context = lambda 189 | .context(opts) 190 | .concurrency(4) 191 | 192 | context.forEach((obj, key) => new Promise((success) => { 193 | t.equal(concurrentOperations, 0, 'forEach concurrency <= 1') 194 | concurrentOperations++ 195 | setTimeout(() => { 196 | t.equal(concurrentOperations, 1, 'forEach concurrency <= 1') 197 | objects.push({ 198 | object: obj, 199 | key 200 | }) 201 | concurrentOperations-- 202 | success() 203 | }, 10) 204 | }), true).then(() => { 205 | t.equal(context.opts.concurrency, 4) 206 | t.deepEqual(objects, answer, 'forEach async') 207 | }) 208 | }) 209 | 210 | test('S3Lambda.context.exclude and S3Lambda.context.forEach (async)', (t) => { 211 | 212 | resetSandbox() 213 | t.plan(1) 214 | 215 | const objects = [] 216 | const answer = [ 217 | { object: 'file1', key: 'files/file1' }, 218 | { object: 'file3', key: 'files/file3' }, 219 | { object: 'file4', key: 'files/file4' } 220 | ] 221 | 222 | const context = { 223 | bucket: bucket, 224 | prefix: prefix 225 | } 226 | 227 | lambda 228 | .context(context) 229 | .exclude(key => /file2/.test(key)) 230 | .forEach((obj, key) => new Promise((success, fail) => { 231 | objects.push({ 232 | object: obj, 233 | key 234 | }) 235 | success() 236 | }), true).then(() => { 237 | t.deepEqual(objects, answer, 'forEach async with exclude') 238 | }) 239 | }) 240 | 241 | test('S3Lambda.context.transform and S3Lambda.context.forEach (async)', (t) => { 242 | 243 | resetSandbox() 244 | t.plan(1) 245 | 246 | const objects = [] 247 | const answer = [ 248 | { object: 'FILE1 files/file1', key: 'files/file1' }, 249 | { object: 'FILE2 files/file2', key: 'files/file2' }, 250 | { object: 'FILE3 files/file3', key: 'files/file3' }, 251 | { object: 'FILE4 files/file4', key: 'files/file4' } 252 | ] 253 | 254 | const context = { 255 | bucket: bucket, 256 | prefix: prefix 257 | } 258 | 259 | lambda 260 | .context(context) 261 | .transform((obj, key) => obj.Body.toString('utf8').toUpperCase() + ' ' + key) 262 | .forEach((obj, key) => new Promise((success, fail) => { 263 | objects.push({ 264 | object: obj, 265 | key 266 | }) 267 | success() 268 | }), true).then(() => { 269 | t.deepEqual(objects, answer, 'forEach async with transform') 270 | }) 271 | }) 272 | 273 | test('S3Lambda.context.map (sync)', (t) => { 274 | 275 | resetSandbox() 276 | t.plan(1) 277 | 278 | const answer = [ 279 | 'files/file1file1', 280 | 'files/file2file2', 281 | 'files/file3file3', 282 | 'files/file4file4' 283 | ] 284 | 285 | const context = { 286 | bucket: bucket, 287 | prefix: prefix 288 | } 289 | 290 | lambda 291 | .context(context) 292 | .inplace() 293 | .map((obj, key) => 294 | 295 | // update each object with the key prefixed 296 | key + obj).then(() => { 297 | t.deepEqual(answer, readFiles(filePaths), 'map sync') 298 | }).catch(console.error) 299 | }) 300 | 301 | test('S3Lambda.context.map (async)', (t) => { 302 | 303 | resetSandbox() 304 | t.plan(1) 305 | 306 | // Tests rename function 307 | const outputPaths = files.map(f => `${outputPrefixPath}/${f}ab`) 308 | 309 | const answer = [ 310 | 'files/file1file1', 311 | 'files/file2file2', 312 | 'files/file3file3', 313 | 'files/file4file4' 314 | ] 315 | 316 | const context = { 317 | bucket: bucket, 318 | prefix: prefix 319 | } 320 | 321 | lambda 322 | .context(context) 323 | .inplace() 324 | .map((obj, key) => new Promise((success, fail) => { 325 | success(key + obj) 326 | }), true).then(() => { 327 | t.deepEqual(answer, readFiles(filePaths), 'map async over 3 objects') 328 | }).catch(console.error) 329 | }) 330 | 331 | test('S3Lambda.context.output.map (sync)', (t) => { 332 | 333 | resetSandbox() 334 | t.plan(1) 335 | 336 | const outputPaths = files.map(f => `${outputPrefixPath}/${f}ab`) 337 | 338 | const answer = [ 339 | 'files/file1file1', 340 | 'files/file2file2', 341 | 'files/file3file3', 342 | 'files/file4file4' 343 | ] 344 | 345 | const context = { 346 | bucket: bucket, 347 | prefix: prefix 348 | } 349 | 350 | lambda 351 | .context(context) 352 | .output(bucket, outputPrefix, (key) => key.concat('ab')) 353 | .map((obj, key) => key + obj).then(() => { 354 | t.deepEqual(answer, readFiles(outputPaths), 'map sync over') 355 | }).catch(console.error) 356 | }) 357 | 358 | test('S3Lambda.context.output.map (async)', (t) => { 359 | 360 | resetSandbox() 361 | t.plan(1) 362 | 363 | const outputPaths = files.map(f => `${outputPrefixPath}/${f}ab`) 364 | 365 | const answer = [ 366 | 'files/file1file1', 367 | 'files/file2file2', 368 | 'files/file3file3', 369 | 'files/file4file4' 370 | ] 371 | 372 | const context = { 373 | bucket: bucket, 374 | prefix: prefix 375 | } 376 | 377 | lambda 378 | .context(context) 379 | .output(bucket, outputPrefix, (key) => key.concat('ab')) 380 | .map((obj, key) => new Promise((success, fail) => { 381 | success(key + obj) 382 | }), true).then(() => { 383 | t.deepEqual(answer, readFiles(outputPaths), 'map async') 384 | }).catch(console.error) 385 | }) 386 | 387 | test('S3Lambda.context.reduce (sync)', (t) => { 388 | 389 | resetSandbox() 390 | t.plan(1) 391 | 392 | const answer = 20 393 | 394 | const context = { 395 | bucket: bucket, 396 | prefix: prefix 397 | } 398 | 399 | // Add the length of the contents of each file, starting at 0 400 | lambda 401 | .context(context) 402 | .reduce((acc, cur, key) => acc + cur.length , 0) 403 | .then((result) => { 404 | t.equal(result, answer, 'reduce sync') 405 | }).catch(e => console.error(e.stack)) 406 | }) 407 | 408 | test('S3Lambda.context.reduce (async)', (t) => { 409 | 410 | resetSandbox() 411 | t.plan(1) 412 | 413 | const answer = 20 414 | 415 | const context = { 416 | bucket: bucket, 417 | prefix: prefix 418 | } 419 | 420 | // Add the length of the contents of each file, starting at 0 421 | lambda 422 | .context(context) 423 | .reduce((prev, cur, key) => new Promise((success, fail) => { 424 | success(prev + cur.length) 425 | }), 0, true) 426 | .then((result) => { 427 | t.equal(result, answer, 'reduce async') 428 | }).catch(e => console.error(e.stack)) 429 | }) 430 | 431 | test('S3Lambda.context.filter (sync)', (t) => { 432 | 433 | resetSandbox() 434 | t.plan(1) 435 | 436 | const answer = ['file1'] 437 | const context = { 438 | bucket: bucket, 439 | prefix: prefix 440 | } 441 | 442 | lambda 443 | .context(context) 444 | .inplace() 445 | .filter(obj => obj == 'file1') 446 | .then(() => { 447 | t.deepEqual(answer, readDir(prefixPath), 'filter inplace (sync)') 448 | }) 449 | .catch(e => console.error(e)) 450 | }) 451 | 452 | test('S3Lambda.context.filter (async)', (t) => { 453 | 454 | resetSandbox() 455 | t.plan(1) 456 | 457 | // Tests rename output 458 | const answer = ['file1'] 459 | 460 | const context = { 461 | bucket: bucket, 462 | prefix: prefix 463 | } 464 | 465 | lambda 466 | .context(context) 467 | .inplace() 468 | .filter(obj => new Promise((success, fail) => { 469 | success(obj == 'file1') 470 | }), true) 471 | .then(() => { 472 | t.deepEqual(fs.readdirSync(prefixPath), answer, 'filter 3 inplace (async)') 473 | }) 474 | .catch(e => console.error(e.stack)) 475 | }) 476 | 477 | test('S3Lambda.context.output.filter (sync)', (t) => { 478 | 479 | resetSandbox() 480 | t.plan(1) 481 | 482 | const answer = ['file1ab'] 483 | 484 | const context = { 485 | bucket: bucket, 486 | prefix: prefix 487 | } 488 | 489 | lambda 490 | .context(context) 491 | .output(bucket, outputPrefix, (key) => key.concat('ab')) 492 | .filter(obj => obj == 'file1') 493 | .then(() => { 494 | t.deepEqual(readDir(outputPrefixPath), answer, 'filter to output (sync)') 495 | }) 496 | .catch(e => console.error(e.stack)) 497 | }) 498 | 499 | test('S3Lambda.context.output.filter (async)', (t) => { 500 | 501 | resetSandbox() 502 | t.plan(1) 503 | 504 | const answer = ['file1ab'] 505 | 506 | const context = { 507 | bucket: bucket, 508 | prefix: prefix 509 | } 510 | 511 | lambda.context(context) 512 | .output(bucket, outputPrefix, (key) => key.concat('ab')) 513 | .filter(obj => new Promise((success, fail) => { 514 | success(obj == 'file1') 515 | }), true) 516 | .then(() => { 517 | t.deepEqual(readDir(outputPrefixPath), answer, 'filter to output (async)') 518 | }) 519 | .catch(e => console.error(e.stack)) 520 | }) 521 | 522 | test('end', (t) => { 523 | rimraf(path.resolve(__dirname, 'buckets')) 524 | t.end() 525 | }) 526 | --------------------------------------------------------------------------------