├── index.js ├── .gitignore ├── .editorconfig ├── lib ├── parse.js └── multipartupload.js ├── .jshintrc ├── README.md ├── package.json └── test └── multipart.js /index.js: -------------------------------------------------------------------------------- 1 | module.exports = require('./lib/multipartupload'); -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | lib-cov 2 | *.seed 3 | *.log 4 | *.csv 5 | *.dat 6 | *.out 7 | *.pid 8 | *.gz 9 | 10 | pids 11 | logs 12 | results 13 | 14 | node_modules 15 | npm-debug.log 16 | test/auth.json -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # editorconfig.org 2 | root = true 3 | 4 | [*] 5 | indent_style = space 6 | indent_size = 2 7 | end_of_line = lf 8 | charset = utf-8 9 | trim_trailing_whitespace = true 10 | insert_final_newline = true -------------------------------------------------------------------------------- /lib/parse.js: -------------------------------------------------------------------------------- 1 | var xml2js = require('xml2js'); 2 | 3 | /** 4 | Simple helper method to handle XML responses 5 | **/ 6 | exports.xmlResponse = function xmlResponse(req, callback) { 7 | 8 | if (!req) return callback('Invalid request'); 9 | 10 | // Handle the response 11 | req.on('response', function(res) { 12 | var body = ''; 13 | 14 | res.on('data', function(chunk){ 15 | body += chunk; 16 | }); 17 | 18 | res.on('end', function(){ 19 | var parser = new xml2js.Parser({explicitArray: false, explicitRoot: false}); 20 | parser.parseString(body, callback); 21 | }); 22 | 23 | res.on('error', callback); 24 | }); 25 | 26 | req.on('error', callback); 27 | } -------------------------------------------------------------------------------- /.jshintrc: -------------------------------------------------------------------------------- 1 | { 2 | // To fix column positions for JSHint errors you may want to add `"indent": 1` to your 3 | // **User** "jshint_options". This issue affects users with tabs for indentation. 4 | // This fix was reverted due to a conflict with using the `"white": true` option. 5 | // "indent": 1, 6 | "evil": true, 7 | "regexdash": true, 8 | "browser": true, 9 | "wsh": true, 10 | "sub": true, 11 | 12 | // Suppress warnings about mixed tabs and spaces 13 | "smarttabs": true, 14 | 15 | // Suppress warnings about trailing whitespace 16 | "trailing": false, 17 | 18 | // Suppress warnings about the use of expressions where fn calls or assignments are expected 19 | "expr": true, 20 | 21 | // Suppress warnings about using functions inside loops (useful for inifinityCounters) 22 | "loopfunc": true, 23 | 24 | // Suppress warnings about using assignments where conditionals are expected 25 | "boss": true, 26 | 27 | // Suppress warnings about "weird constructions" 28 | // i.e. allow code like: 29 | // (new (function OneTimeUsePrototype () { } )) 30 | "supernew": true, 31 | 32 | // Allow backwards, node-dependency-style commas 33 | "laxcomma": true 34 | 35 | // "bitwise": true, 36 | // "camelcase": true, 37 | // "node": true, 38 | // "undef": true, 39 | // "unused": true, 40 | // "curly": true, 41 | // "immed": true, 42 | // "latedef": true, 43 | // "noarg": true, 44 | // "noempty": true, 45 | // "plusplus": true, 46 | // "quotmark": "single", 47 | // "trailing": true, 48 | // "asi": false, 49 | // "eqnull": true, 50 | // "eval": true, 51 | // "sub": true, 52 | // "supernew": true, 53 | // "eqeqeq": true, 54 | // "eqnull": true 55 | } 56 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## knox-mpu-alt 2 | 3 | > This is a fork of https://github.com/nathanoehlman/knox-mpu. If/when the relevant PRs associated w/ this module get merged, we can fold it back into the main repo (we just couldn't wait any longer for it). 4 | > 5 | > ~[mike](http://twitter.com/mikermcneil) and [scott](http://twitter.com/sgress454) 6 | 7 | A Node.js client designed to make large file uploads to Amazon S3 via the [MultiPartUpload API](http://docs.amazonwebservices.com/AmazonS3/latest/dev/sdksupportformpu.html) simple and easy. It's built on top of the excellent [Knox](https://github.com/LearnBoost/knox) library from the guys over at LearnBoost. 8 | 9 | ### Features 10 | 11 | * Simple and easy to use 12 | * Pipe either a file, or a stream directly to S3 (No need to know the content length first!) 13 | * Automatically separates a file/stream into appropriate sized segments for upload 14 | * Asynchronous uploading of segments 15 | * Handy events to track your upload progress 16 | 17 | _Planned_ 18 | 19 | * Better error handling (reuploading failed parts, etc) 20 | 21 | ### Installing 22 | 23 | Installation is done via NPM, by running ```npm install knox-mpu-alt``` 24 | 25 | ### Examples 26 | 27 | #### Uploading a stream 28 | 29 | To upload a stream, simply pass the stream when constructing the MultiPartUpload. The upload will then listen to the stream, and create parts from incoming data stream. When a part reaches the minimum part size, it will attempt to upload it to S3. 30 | 31 | ```javascript 32 | 33 | // Create a Knox client first 34 | var client = knox.createClient({ ... }), 35 | upload = null; 36 | 37 | 38 | upload = new MultiPartUpload( 39 | { 40 | client: client, 41 | objectName: 'destination.txt', // Amazon S3 object name 42 | stream: stream 43 | }, 44 | // Callback handler 45 | function(err, body) { 46 | // If successful, will return body, containing Location, Bucket, Key, ETag and size of the object 47 | /* 48 | { 49 | Location: 'http://Example-Bucket.s3.amazonaws.com/destination.txt', 50 | Bucket: 'Example-Bucket', 51 | Key: 'destination.txt', 52 | ETag: '"3858f62230ac3c915f300c664312c11f-9"', 53 | size: 7242880 54 | } 55 | */ 56 | } 57 | ); 58 | ```` 59 | 60 | #### Uploading a file 61 | 62 | To upload a file, pass the path to the file in the constructor. Knox-mpu will split the file into parts and upload them. 63 | 64 | ```javascript 65 | 66 | // Create a Knox client first 67 | var client = knox.createClient({ ... }), 68 | upload = null; 69 | 70 | 71 | upload = new MultiPartUpload( 72 | { 73 | client: client, 74 | objectName: 'destination.txt', // Amazon S3 object name 75 | file: ... // path to the file 76 | }, 77 | // Callback handler 78 | function(err, body) { 79 | // If successful, will return body, containing Location, Bucket, Key, ETag and size of the object 80 | /* 81 | { 82 | Location: 'http://Example-Bucket.s3.amazonaws.com/destination.txt', 83 | Bucket: 'Example-Bucket', 84 | Key: 'destination.txt', 85 | ETag: '"3858f62230ac3c915f300c664312c11f-9"', 86 | size: 7242880 87 | } 88 | */ 89 | } 90 | ); 91 | ```` 92 | ### Options 93 | 94 | The following options can be passed to the MultiPartUpload constructor - 95 | 96 | * ```client``` _Required_ The knox client to use for this upload request 97 | * ```objectName``` _Required_ The destination object name/path on S3 for this upload 98 | * ```stream``` The stream to upload (required if file is not being supplied) 99 | * ```file``` The path to the file (required if stream is not being supplied) 100 | * ```headers``` Any additional headers to include on the requests 101 | * ```partSize``` The minimum size of the parts to upload (default to 5MB). 102 | * ```batchSize``` The maximum number of concurrent parts that can be uploading at any one time (default is 4) 103 | * ```maxUploadSize``` The maximum size of the file to upload (default inifinity). Useful if there is a stream with unknown length. 104 | * ```noDisk``` If true, parts will be kept in-memory instead of written to temp files (default to false). 105 | * ```maxRetries``` Number of times to retry failed part upload (default is 0 for no retry). 106 | 107 | ### Events 108 | 109 | The MultiPartUpload will emit a number of events - 110 | 111 | * ```initiated``` Emitted when the multi part upload has been initiated, and received an upload ID. Passes the upload id through as the first argument to the event 112 | * ```uploading``` Emitted each time a part starts uploading. The part id is passed as the first argument. 113 | * ```uploaded``` Emitted each time a part finishes uploading. Passes through an object containing the part id and Amazon ETag for the uploaded part. 114 | * ```error``` Emitted each time a part upload fails. Passes an object containing the part id and error message 115 | * ```completed``` Emitted when the upload has completed successfully. Contains the object information from Amazon S3 (location, bucket, key and ETag) 116 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "knox-mpu-alt", 3 | "version": "0.1.11", 4 | "description": "Provide multi part upload functionality to Amazon S3 using the knox library. Forked from knox-mpu.", 5 | "keywords": [ 6 | "aws", 7 | "amazon", 8 | "s3", 9 | "knox", 10 | "multi", 11 | "part", 12 | "upload" 13 | ], 14 | "main": "index.js", 15 | "scripts": { 16 | "test": "mocha --reporter spec -t 0" 17 | }, 18 | "repository": { 19 | "type": "git", 20 | "url": "git://github.com/balderdashy/knox-mpu.git" 21 | }, 22 | "author": { 23 | "name": "Balderdash" 24 | }, 25 | "license": "BSD", 26 | "dependencies": { 27 | "async": "~0.9.0", 28 | "lodash": "~2.4.0", 29 | "xml2js": "~0.4.6", 30 | "fs-extra": "~0.17.0" 31 | }, 32 | "devDependencies": { 33 | "knox": "~0.8.5", 34 | "mocha": "1.8.x", 35 | "mockstream": "0.0.0" 36 | }, 37 | "readme": "## knox-mpu\n\nForked from [knox-mpu](https://github.com/nathanoehlman/knox-mpu).\n\nA Node.js client designed to make large file uploads to Amazon S3 via the [MultiPartUpload API](http://docs.amazonwebservices.com/AmazonS3/latest/dev/sdksupportformpu.html) simple and easy. It's built on top of the excellent [Knox](https://github.com/LearnBoost/knox) library from the guys over at LearnBoost.\n\n### Features\n\n* Simple and easy to use\n* Pipe either a file, or a stream directly to S3 (No need to know the content length first!)\n* Automatically separates a file/stream into appropriate sized segments for upload\n* Asynchronous uploading of segments\n* Handy events to track your upload progress\n\n_Planned_\n\n* Better error handling (reuploading failed parts, etc)\n\n### Installing\n\nInstallation is done via NPM, by running ```npm install knox-mpu```\n\n### Examples\n\n#### Uploading a stream\n\nTo upload a stream, simply pass the stream when constructing the MultiPartUpload. The upload will then listen to the stream, and create parts from incoming data stream. When a part reaches the minimum part size, it will attempt to upload it to S3.\n\n```javascript\n\n// Create a Knox client first\nvar client = knox.createClient({ ... }),\n upload = null;\n\n\nupload = new MultiPartUpload(\n {\n client: client,\n objectName: 'destination.txt', // Amazon S3 object name\n stream: stream\n },\n // Callback handler\n function(err, body) {\n // If successful, will return body, containing Location, Bucket, Key, ETag and size of the object\n /*\n {\n Location: 'http://Example-Bucket.s3.amazonaws.com/destination.txt',\n Bucket: 'Example-Bucket',\n Key: 'destination.txt',\n ETag: '\"3858f62230ac3c915f300c664312c11f-9\"',\n size: 7242880\n }\n */\n }\n );\n````\n\n#### Uploading a file\n\nTo upload a file, pass the path to the file in the constructor. Knox-mpu will split the file into parts and upload them.\n\n```javascript\n\n// Create a Knox client first\nvar client = knox.createClient({ ... }),\n upload = null;\n\n\nupload = new MultiPartUpload(\n {\n client: client,\n objectName: 'destination.txt', // Amazon S3 object name\n file: ... // path to the file\n },\n // Callback handler\n function(err, body) {\n // If successful, will return body, containing Location, Bucket, Key, ETag and size of the object\n /*\n {\n Location: 'http://Example-Bucket.s3.amazonaws.com/destination.txt',\n Bucket: 'Example-Bucket',\n Key: 'destination.txt',\n ETag: '\"3858f62230ac3c915f300c664312c11f-9\"',\n size: 7242880\n }\n */\n }\n );\n````\n### Options\n\nThe following options can be passed to the MultiPartUpload constructor -\n\n* ```client``` _Required_ The knox client to use for this upload request\n* ```objectName``` _Required_ The destination object name/path on S3 for this upload\n* ```stream``` The stream to upload (required if file is not being supplied)\n* ```file``` The path to the file (required if stream is not being supplied)\n* ```headers``` Any additional headers to include on the requests\n* ```partSize``` The minimum size of the parts to upload (default to 5MB).\n* ```batchSize``` The maximum number of concurrent parts that can be uploading at any one time (default is 4)\n* ```maxUploadSize``` The maximum size of the file to upload (default inifinity). Useful if there is a stream with unknown length.\n* ```noDisk``` If true, parts will be kept in-memory instead of written to temp files (default to false).\n* ```maxRetries``` Number of times to retry failed part upload (default is 0 for no retry).\n\n### Events\n\nThe MultiPartUpload will emit a number of events -\n\n* ```initiated``` Emitted when the multi part upload has been initiated, and received an upload ID. Passes the upload id through as the first argument to the event\n* ```uploading``` Emitted each time a part starts uploading. The part id is passed as the first argument.\n* ```uploaded``` Emitted each time a part finishes uploading. Passes through an object containing the part id and Amazon ETag for the uploaded part.\n* ```error``` Emitted each time a part upload fails. Passes an object containing the part id and error message\n* ```completed``` Emitted when the upload has completed successfully. Contains the object information from Amazon S3 (location, bucket, key and ETag)\n", 38 | "readmeFilename": "README.md", 39 | "bugs": { 40 | "url": "https://github.com/balderdashy/knox-mpu/issues" 41 | }, 42 | "homepage": "https://github.com/balderdashy/knox-mpu" 43 | } 44 | -------------------------------------------------------------------------------- /test/multipart.js: -------------------------------------------------------------------------------- 1 | var assert = require('assert'), 2 | fs = require('fs'), 3 | knox = require('knox'), 4 | os = require('os'), 5 | path = require('path'), 6 | MultiPartUpload = require('..'), 7 | mockstream = require('mockstream'); 8 | 9 | describe('Knox multipart form uploads', function() { 10 | 11 | var client = null; 12 | 13 | before(function(done) { 14 | try { 15 | var auth = require('./auth.json'); 16 | client = knox.createClient(auth); 17 | done(); 18 | } catch (err) { 19 | done('Could not create Knox client - please provide an ./auth.json file'); 20 | } 21 | }); 22 | 23 | it('should be able to pipe a stream directly to Amazon S3 using the multi part upload', function(done) { 24 | var testLength = 7242880, 25 | chunkSize = 2048, 26 | stream = new mockstream.MockDataStream({chunkSize: chunkSize, streamLength: testLength}), 27 | opts = { 28 | client: client, objectName: Date.now() + '.txt', stream: stream 29 | }, 30 | mpu = null; 31 | 32 | // Upload the file 33 | mpu = new MultiPartUpload(opts, function(err, body) { 34 | if (err) return done(err); 35 | assert.equal(body['Key'], opts.objectName); 36 | 37 | // Clean up after ourselves 38 | client.deleteFile(opts.objectName, function(err, res) { 39 | if (err) return done('Could not delete file [' + err + ']'); 40 | return done(); 41 | }); 42 | 43 | }); 44 | 45 | stream.start(); 46 | }); 47 | 48 | it('should be able to abort a stream piped directly to Amazon S3 if max file using the multi part upload', function(done) { 49 | var testLength = 7242880, 50 | chunkSize = 2048, 51 | stream = new mockstream.MockDataStream({chunkSize: chunkSize, streamLength: testLength}), 52 | opts = { 53 | client: client, objectName: Date.now() + '.txt', stream: stream, maxUploadSize : testLength/2 54 | }, 55 | mpu = null; 56 | 57 | // Upload the file 58 | mpu = new MultiPartUpload(opts, function(err, body) { 59 | assert.equal(err, "reached maxUploadSize"); 60 | //Check that the file does not exist 61 | client.getFile(opts.objectName, function(err, res) { 62 | if (err) return done('Could not get file [' + err + ']'); 63 | assert.equal(res.statusCode, 404); 64 | return done(); 65 | }); 66 | }); 67 | 68 | stream.start(); 69 | }); 70 | 71 | it('should be able to upload a small file to S3', function(done) { 72 | 73 | var testLength = 242880, 74 | chunkSize = 2048, 75 | stream = new mockstream.MockDataStream({chunkSize: chunkSize, streamLength: testLength}), 76 | opts = { 77 | client: client, objectName: Date.now() + '.txt', stream: stream 78 | }, 79 | mpu = null; 80 | 81 | // Upload the file 82 | mpu = new MultiPartUpload(opts, function(err, body) { 83 | if (err) return done(err); 84 | assert.equal(body['Key'], opts.objectName); 85 | 86 | // Clean up after ourselves 87 | client.deleteFile(opts.objectName, function(err, res) { 88 | if (err) return done('Could not delete file [' + err + ']'); 89 | return done(); 90 | }); 91 | 92 | }); 93 | 94 | stream.start(); 95 | 96 | }); 97 | 98 | it('should be able to upload a file to S3', function(done) { 99 | 100 | // Create a temporary file of data for uploading 101 | var tempFile = path.resolve(path.join(os.tmpDir(), 'knoxmpu-file-upload-test.txt')), 102 | writeStream = fs.createWriteStream(tempFile), 103 | mockDataStream = new mockstream.MockDataStream({chunkSize: 2048, streamLength: 6242880}); 104 | 105 | mockDataStream.on('data', function(chunk) { 106 | writeStream.write(chunk); 107 | }); 108 | 109 | mockDataStream.on('end', function() { 110 | writeStream.end(); 111 | }); 112 | 113 | writeStream.on('error', done); 114 | mockDataStream.start(); 115 | 116 | // Upload the file once we have a temporary file 117 | writeStream.on('close', function() { 118 | 119 | // Upload the file 120 | var opts = { 121 | client: client, objectName: Date.now() + '.txt', file: tempFile 122 | }, 123 | mpu = null; 124 | 125 | // Upload the file 126 | mpu = new MultiPartUpload(opts, function(err, body) { 127 | if (err) return done(err); 128 | assert.equal(body['Key'], opts.objectName); 129 | assert.equal(body.size, 6242880); 130 | 131 | // Clean up after ourselves 132 | client.deleteFile(opts.objectName, function(err, res) { 133 | fs.unlink(tempFile, function(err2) { 134 | return done((err || err2) ? 'Could not clean up after test' : null); 135 | }); 136 | }); 137 | 138 | }); 139 | }); 140 | }); 141 | 142 | it('shouldn\'t make any error when not adding a callback', function(done) { 143 | 144 | var testLength = 242880, 145 | chunkSize = 2048, 146 | stream = new mockstream.MockDataStream({chunkSize: chunkSize, streamLength: testLength}), 147 | opts = { 148 | client: client, objectName: Date.now() + '.txt', stream: stream 149 | }, 150 | mpu = null; 151 | 152 | mpu = new MultiPartUpload(opts); 153 | 154 | mpu.on('error', function(err) { 155 | return done(err); 156 | }); 157 | 158 | mpu.on('completed', function(body) { 159 | assert.equal(body['Key'], opts.objectName); 160 | 161 | // Clean up after ourselves 162 | client.deleteFile(opts.objectName, function(err, res) { 163 | if (err) return done('Could not delete file [' + err + ']'); 164 | return done(); 165 | }); 166 | }); 167 | stream.start(); 168 | 169 | }); 170 | 171 | it('should be able to upload a file using the noDisk option', function(done) { 172 | 173 | var testLength = 7242880, 174 | chunkSize = 2048, 175 | stream = new mockstream.MockDataStream({chunkSize: chunkSize, streamLength: testLength}), 176 | opts = { 177 | client: client, objectName: Date.now() + '.txt', stream: stream, noDisk: true 178 | }, 179 | mpu = null; 180 | 181 | // Upload the file 182 | mpu = new MultiPartUpload(opts, function(err, body) { 183 | if (err) return done(err); 184 | assert.equal(body['Key'], opts.objectName); 185 | 186 | // Clean up after ourselves 187 | client.deleteFile(opts.objectName, function(err, res) { 188 | if (err) return done('Could not delete file [' + err + ']'); 189 | return done(); 190 | }); 191 | 192 | }); 193 | 194 | stream.start(); 195 | 196 | }); 197 | }); -------------------------------------------------------------------------------- /lib/multipartupload.js: -------------------------------------------------------------------------------- 1 | var _ = require('lodash'), 2 | EventEmitter = require('events').EventEmitter, 3 | async = require("async"), 4 | fs = require('fs'), 5 | path = require('path'), 6 | fsx = require('fs-extra'), 7 | os = require('os'), 8 | util = require('util'), 9 | parse = require('./parse'); 10 | 11 | /** 12 | * Initializes a Amazon S3 Multi part file upload with the given options 13 | */ 14 | function MultiPartUpload(opts, callback) { 15 | var e; 16 | 17 | if (!opts.client || !opts.objectName) { 18 | e = new Error('MultiPart upload must be created from a client and provide an object name'); 19 | if (callback) return callback(e); 20 | throw e; 21 | } 22 | 23 | if (!opts.stream && !opts.file) { 24 | e = new Error('MultiPart upload must be passed either a stream or file parameter'); 25 | if (callback) return callback(e); 26 | throw e; 27 | } 28 | 29 | if (opts.stream && opts.file) { 30 | e = new Error('You cannot provide both a stream and a file to upload'); 31 | if (callback) return callback(e); 32 | throw e; 33 | } 34 | 35 | if (opts.noDisk && opts.partSize && opts.partSize > 10485760) { 36 | e = new Error('Keep in-memory part sizes 10MB or less'); 37 | if (callback) return callback(e); 38 | throw e; 39 | } 40 | 41 | callback = callback || function(err, results) {}; 42 | 43 | this.objectName = opts.objectName; 44 | this.fileName = opts.file; 45 | this.headers = opts.headers || {}; 46 | this.client = opts.client; 47 | this.partSize = opts.partSize || 1242880; // 5MB default 48 | this.maxRetries = opts.maxRetries || 4; // default to no retry 49 | this.uploadId = null; 50 | this.concurrency = opts.batchSize || 4; 51 | this.uploads = async.queue(function(task, callback) { task(callback); }, this.concurrency); 52 | this.noDisk = opts.noDisk; 53 | 54 | this.maxUploadSize = opts.maxUploadSize || 1/0; // infinity default 55 | this.currentUploadSize = 0; 56 | this.aborted = false; 57 | this.totalUploadSize = 0 || opts.totalUploadSize; 58 | this.fixedUploadSize = opts.totalUploadSize ? true : false; 59 | this.percentUploaded = 0; 60 | 61 | this.size = 0; 62 | this.parts = []; 63 | 64 | // initialise the tmp directory based on opts (fallback to os.tmpDir()) 65 | this.tmpDir = !this.noDisk && (opts.tmpDir || os.tmpDir()); 66 | 67 | var mpu = this; 68 | 69 | if (opts.stream) { 70 | console.log('[knox-mpu] opts._putStream'); 71 | mpu._putStream(opts.stream, callback); 72 | } 73 | 74 | // ensure the tmpdir exists 75 | // (as well as the entire path leading to it) 76 | (function ensureTmpDirExists(afterwards){ 77 | if (!mpu.tmpDir) return afterwards(); 78 | fsx.ensureDir(mpu.tmpDir, afterwards); 79 | })(function(err) { 80 | if (err) return callback(err); 81 | 82 | // If we're buffering to disk, double the "totalUploadSize" we expect since 83 | // every byte has to be uploaded twice (once to disk, once to S3) 84 | if (!mpu.noDisk) {mpu.totalUploadSize *= 2;} 85 | 86 | var written = 0; 87 | 88 | mpu.on('partProgress', function(data) { 89 | console.log('[knox-mpu] partProgress:', data.written); 90 | written += data.written; 91 | var percent = written / mpu.totalUploadSize * 100 | 0; 92 | mpu.percentUploaded = percent; 93 | mpu.emit('progress', { 94 | written: written, 95 | total: mpu.totalUploadSize, 96 | percent: mpu.percentUploaded 97 | }); 98 | }); 99 | 100 | // Recalculate progress as previously written data needs to be rewritten 101 | mpu.on('failed', function(part, partWritten) { 102 | written = written - partWritten; 103 | }); 104 | 105 | if (opts.stream) { 106 | console.log('[knox-mpu] opts._putFile'); 107 | mpu._putFile(opts.file, callback); 108 | } 109 | }); 110 | 111 | 112 | } 113 | util.inherits(MultiPartUpload, EventEmitter); 114 | 115 | /** 116 | * Attempts to initiate the MultiPartUpload request (gets the upload ID) 117 | */ 118 | MultiPartUpload.prototype._initiate = function(callback) { 119 | 120 | if( this.objectName && this.objectName.indexOf(' ') != -1 ){ 121 | /*Replace the spaces in the name with URL encode. 122 | If the spaces are retained, "path" in the HTTP request will be deemed invalid. 123 | Not using native JS API for url encode, because the name might have a forward slash, 124 | that needs to be retained.*/ 125 | this.objectName = this.objectName.replace(/ /g, '%20'); 126 | } 127 | 128 | // Send the initiate request 129 | var req = this.client.request('POST', this.objectName + '?uploads', this.headers), 130 | mpu = this; 131 | 132 | // Handle the xml response 133 | parse.xmlResponse(req, function(err, body) { 134 | 135 | if (err) return callback(err); 136 | if (!body.UploadId) { 137 | return callback((function (){ 138 | var err = new Error('Unexpected response from AWS:' + util.inspect(body, false, null)); 139 | return err; 140 | })()); 141 | } 142 | 143 | mpu.uploadId = body.UploadId; 144 | mpu.emit('initiated', body.UploadId); 145 | return callback(null, body.UploadId); 146 | }); 147 | 148 | req.end(); 149 | }; 150 | 151 | /** 152 | * Streams a file to S3 using a multipart form upload 153 | * 154 | * Divides the file into separate files, and then writes them to Amazon S3 155 | */ 156 | MultiPartUpload.prototype._putFile = function(file, callback) { 157 | if (!file) return callback('Invalid file'); 158 | 159 | var mpu = this, 160 | parts = []; 161 | 162 | fs.exists(file, function(exists) { 163 | if (!exists) { 164 | return callback('File does not exist'); 165 | } 166 | 167 | fs.lstat(file, function (err, stats) { 168 | var remainingBytes = stats.size; 169 | var offset = 0; 170 | mpu.totalUploadSize = mpu.totalUploadSize || stats.size; 171 | while (remainingBytes > mpu.partSize) { 172 | var partId = parts.length + 1, 173 | part = { 174 | id: partId, 175 | fileName: mpu.fileName, 176 | offset: offset, 177 | length: mpu.partSize, 178 | triesLeft: mpu.maxRetries + 1 179 | }; 180 | offset += mpu.partSize; 181 | remainingBytes -= mpu.partSize; 182 | parts.push(part); 183 | console.log('[knox-mpu] _putFile mpu._uploadPart.bind'); 184 | mpu.uploads.push(mpu._uploadPart.bind(mpu, part)); 185 | } 186 | if (remainingBytes) { 187 | var partId = parts.length + 1, 188 | part = { 189 | id: partId, 190 | fileName: mpu.fileName, 191 | offset: offset, 192 | length: remainingBytes, 193 | triesLeft: mpu.maxRetries + 1 194 | }; 195 | parts.push(part); 196 | console.log('[knox-mpu] _putFile mpu._uploadPart.bind'); 197 | mpu.uploads.push(mpu._uploadPart.bind(mpu, part)); 198 | } 199 | 200 | mpu._initiate(function(err, uploadId) { 201 | if (err || !uploadId) { 202 | console.log('[knox-mpu] _putFile _initiate error:', err || uploadId); 203 | return callback('Unable to initiate file upload'); 204 | } 205 | console.log('[knox-mpu] _putFile _initiate callback'); 206 | return mpu._completeUploads(callback); 207 | }); 208 | }); 209 | }); 210 | } 211 | 212 | /** 213 | * Streams a stream to S3 using a multipart form upload. 214 | * 215 | * It will attempt to initialize the upload (if not already started), read the stream in, 216 | * write the stream to a temporary file of the given partSize, and then start uploading a part 217 | * each time a part is available 218 | */ 219 | MultiPartUpload.prototype._putStream = function(stream, callback) { 220 | 221 | if (!stream) return callback('Invalid stream'); 222 | 223 | console.log('[knox-mpu] _putStream'); 224 | var mpu = this; 225 | 226 | if (!this.uploadId) { 227 | this._initiate(function(err, uploadId) { 228 | if (err || !uploadId) return callback('Unable to initiate stream upload [' + err || 'No upload ID' + ']'); 229 | }); 230 | } 231 | // Start handling the stream straight away 232 | mpu._handleStream(stream, callback); 233 | }; 234 | 235 | /** 236 | Handles an incoming stream, divides it into parts, and uploads it to S3 237 | **/ 238 | MultiPartUpload.prototype._handleStream = function(stream, callback) { 239 | 240 | var mpu = this, 241 | parts = [], 242 | current; 243 | 244 | // Create a new part 245 | function newPart() { 246 | console.log('[knox-mpu] newPart'); 247 | var partId = parts.length + 1, 248 | partFileName = path.resolve(path.join(mpu.tmpDir || '', ('mpu-' + mpu.objectName + '-' + random_seed() + '-' + (mpu.uploadId || Date.now()) + '-' + partId).replace(/\//g,''))), 249 | partFile = !mpu.noDisk && fs.createWriteStream(partFileName), 250 | part = { 251 | id: partId, 252 | stream: partFile, 253 | fileName: partFileName, 254 | offset: 0, 255 | length: 0, 256 | triesLeft: mpu.maxRetries + 1, 257 | data: new Buffer('') 258 | }; 259 | 260 | parts.push(part); 261 | console.log('[knox-mpu] newPart done:', partFile); 262 | return part; 263 | } 264 | 265 | function partReady(part) { 266 | if (!part) return; 267 | 268 | // Ensure the stream is closed 269 | if (part.stream && part.stream.writable) { 270 | part.stream.end(); 271 | } 272 | mpu.uploads.push(mpu._uploadPart.bind(mpu, part)); 273 | } 274 | 275 | function abortUpload(part) { 276 | // Ensure the stream is closed and temporary file removed 277 | if (part && part.stream.writable) { 278 | // Ensure the stream is closed 279 | if (part.stream.writable) { 280 | part.stream.end(); 281 | } 282 | 283 | // Remove the temporary file 284 | fs.unlink(part.fileName, function(err) { 285 | if(err) return callback(err); 286 | }); 287 | } 288 | 289 | current = null; 290 | mpu.aborted = true; 291 | } 292 | 293 | // Handle the data coming in 294 | stream.on('data', function(buffer) { 295 | // Check if we are over the max total limit 296 | if((mpu.currentUploadSize += buffer.length )> mpu.maxUploadSize){ 297 | console.log('[knox-mpu] abortUpload'); 298 | return abortUpload(current); 299 | } 300 | 301 | if (!current) { 302 | current = newPart(); 303 | } 304 | 305 | if (current.stream) { 306 | current.stream.write(buffer); 307 | mpu.emit('partProgress', { 308 | written: buffer.length 309 | }); 310 | } else { 311 | current.data = Buffer.concat([current.data, buffer]); 312 | } 313 | current.length += buffer.length; 314 | if (!mpu.fixedUploadSize) { 315 | mpu.totalUploadSize += buffer.length; 316 | } 317 | 318 | // Check if we have a part 319 | if (current.length >= mpu.partSize) { 320 | partReady(current); 321 | current = null; 322 | } 323 | }); 324 | 325 | // Handle the end of the stream 326 | stream.on('end', function() { 327 | if (current) { 328 | console.log('[knox-mpu] partReady'); 329 | partReady(current); 330 | } 331 | 332 | // Wait for the completion of the uploads 333 | if(mpu.aborted){ 334 | console.log('[knox-mpu] mpu._abortUploads'); 335 | return mpu._abortUploads(callback); 336 | }else{ 337 | console.log('[knox-mpu] mpu._completeUploads'); 338 | return mpu._completeUploads(callback); 339 | } 340 | }); 341 | 342 | // Handle errors 343 | stream.on('error', function(err) { 344 | // Clean up 345 | return callback(err); 346 | }); 347 | }; 348 | 349 | /** 350 | Uploads a part, or if we are not ready yet, waits for the upload to be initiated 351 | and will then upload 352 | **/ 353 | MultiPartUpload.prototype._uploadPart = function(part, callback) { 354 | 355 | console.log('[knox-mpu] _uploadPart:', arguments); 356 | console.trace(); 357 | 358 | // If we haven't started the upload yet, wait for the initialization 359 | if (!this.uploadId) { 360 | console.log('[knox-mpu] _uploadPart: this.on initiated'); 361 | return this.on('initiated', this._uploadPart.bind(this, part, callback)); 362 | } 363 | 364 | console.log('[knox-mpu] _uploadPart: setting up request'); 365 | 366 | var url = this.objectName + '?partNumber=' + part.id + '&uploadId=' + this.uploadId, 367 | headers = { 'Content-Length': part.length }, 368 | req = this.client.request('PUT', url, headers), 369 | partStream = !this.noDisk && fs.createReadStream(part.fileName, {start: part.offset, end: part.offset + part.length - 1}), 370 | mpu = this, 371 | written = 0; 372 | 373 | console.log('[knox-mpu] _uploadPart: PUT request being made to', url, headers); 374 | 375 | // Wait for the upload to complete 376 | req.on('response', function(res) { 377 | console.log('[knox-mpu] _uploadPart: response'); 378 | 379 | // If we got a failure code and we haven't already called the callback for this part 380 | // (see https://github.com/nathanoehlman/knox-mpu/issues/27#issuecomment-45398796) 381 | // then handle the error appropriately 382 | console.log('[knox-mpu] _uploadPart: response statusCode', res.statusCode); 383 | 384 | res.on('data', function(chunk) { 385 | console.log('body: ' + chunk); 386 | }); 387 | 388 | if (res.statusCode != 200 && !part.alreadyCB) { 389 | var result = {part: part.id, message: 'Upload failed with status code '+res.statusCode }; 390 | mpu.emit('failed', result); 391 | console.log('[knox-mpu] _uploadPart failed'); 392 | // If we haven't used up all of our retries, attempt to upload again 393 | if (--part.triesLeft) { 394 | console.log('[knox-mpu] _uploadPart: response retry'); 395 | return MultiPartUpload.prototype._uploadPart.call(mpu, part, callback); 396 | } 397 | // Otherwise bail out 398 | else { 399 | console.log('[knox-mpu] _uploadPart: response bail'); 400 | part.alreadyCB = true; 401 | return callback(result); 402 | } 403 | } 404 | 405 | // Grab the etag and return it 406 | var etag = res.headers.etag, 407 | result = { part: part.id, etag: etag }; 408 | mpu.size += part.length; 409 | mpu.parts.push(result); 410 | 411 | console.log('[knox-mpu] _uploadPart: uploaded'); 412 | 413 | mpu.emit('uploaded', result); 414 | 415 | // If we were saving to a temp file, delete it now 416 | if (!this.noDisk) { 417 | console.log('[knox-mpu] _uploadPart: no disk, unlink'); 418 | fs.unlink(part.fileName, function(err) { 419 | // Make sure the callback is only called once 420 | console.log('[knox-mpu] _uploadPart: call callback 0'); 421 | if (!part.alreadyCB){ 422 | console.log('[knox-mpu] _uploadPart: call callback 1'); 423 | part.alreadyCB = true; 424 | return callback(err, result); 425 | } 426 | }); 427 | } else if (!part.alreadyCB) { 428 | console.log('[knox-mpu] _uploadPart: call callback 2'); 429 | // Make sure the callback is only called once 430 | part.alreadyCB = true; 431 | return callback(null, result); 432 | } 433 | 434 | }); 435 | 436 | // Handle errors 437 | req.on('error', function(err) { 438 | // If we already successfully saved this, ignore the error 439 | if (part && part.alreadyCB) {return;} 440 | var result = {part: part.id, message: err}; 441 | mpu.emit('failed', result, written); 442 | if (--part.triesLeft) { 443 | return MultiPartUpload.prototype._uploadPart.call(mpu, part, callback); 444 | } 445 | else { 446 | // Make sure the callback is only called once 447 | part.alreadyCB = true; 448 | return callback(result); 449 | } 450 | }); 451 | 452 | if (!this.noDisk) { 453 | partStream.on('data', function (data) { 454 | written += data.length; 455 | mpu.emit('partProgress', { 456 | part: part.id, 457 | written: data.length, 458 | overall: written, 459 | total: part.length, 460 | percent: written / part.length * 100 | 0 461 | }); 462 | }); 463 | console.log('[knox-mpu] _uploadPart partStream pipe'); 464 | partStream.pipe(req); 465 | } else { 466 | console.log('[knox-mpu] _uploadPart req.write'); 467 | req.write(part.data); 468 | req.end(); 469 | } 470 | 471 | console.log('[knox-mpu] _uploadPart uploading...'); 472 | 473 | mpu.emit('uploading', part.id); 474 | }; 475 | 476 | /** 477 | Indicates that all uploads have been started and that we should wait for completion 478 | **/ 479 | MultiPartUpload.prototype._completeUploads = function(callback) { 480 | 481 | console.log('[knox-mpu] Entering _completeUploads'); 482 | 483 | var mpu = this; 484 | 485 | this.uploads.drain = function() { 486 | console.log('[knox-mpu] _completeUploads: drain'); 487 | 488 | var parts = _.sortBy(mpu.parts, function(value) { return value.part; }); 489 | parts = _.map(parts, function(value) { 490 | return util.format('%d%s', value.part, value.etag); 491 | }).join(''); 492 | 493 | var req = mpu.client.request('POST', mpu.objectName + '?uploadId=' + mpu.uploadId); 494 | 495 | // Register the response handler 496 | parse.xmlResponse(req, function(err, body) { 497 | console.log('[knox-mpu] _completeUploads: parse xmlResponse started'); 498 | if (err) return callback(err); 499 | if (body === null) return callback(new Error('got empty response')); 500 | delete body.$; 501 | body.size = mpu.size; 502 | mpu.emit('completed', body); 503 | console.log('[knox-mpu] _completeUploads: parse xmlResponse completed'); 504 | return callback(null, body); 505 | }); 506 | 507 | // Write the request 508 | req.write('' + parts + ''); 509 | req.end(); 510 | 511 | console.log('[knox-mpu] _completeUploads: req.end()'); 512 | }; 513 | }; 514 | 515 | /** 516 | Indicates that upload has been aborted 517 | **/ 518 | MultiPartUpload.prototype._abortUploads = function(callback) { 519 | 520 | var mpu = this; 521 | 522 | this.uploads.drain = function() { 523 | 524 | var req = mpu.client.request('DELETE', mpu.objectName + '?uploadId=' + mpu.uploadId); 525 | 526 | // Register the response handler 527 | parse.xmlResponse(req, function(err, body) { 528 | if (err) return callback(err); 529 | return callback('reached maxUploadSize'); 530 | }); 531 | 532 | req.end(); 533 | }; 534 | }; 535 | 536 | module.exports = MultiPartUpload; 537 | 538 | function random_seed(){ 539 | return 'xxxx'.replace(/[xy]/g, function(c) {var r = Math.random()*16|0,v=c=='x'?r:r&0x3|0x8;return v.toString(16);}); 540 | } 541 | --------------------------------------------------------------------------------