├── .gitignore ├── package.json ├── LICENSE ├── multipart.js └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | 6 | # Runtime data 7 | pids 8 | *.pid 9 | *.seed 10 | 11 | # Directory for instrumented libs generated by jscoverage/JSCover 12 | lib-cov 13 | 14 | # Coverage directory used by tools like istanbul 15 | coverage 16 | 17 | # nyc test coverage 18 | .nyc_output 19 | 20 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 21 | .grunt 22 | 23 | # node-waf configuration 24 | .lock-wscript 25 | 26 | # Compiled binary addons (http://nodejs.org/api/addons.html) 27 | build/Release 28 | 29 | # Dependency directories 30 | node_modules 31 | jspm_packages 32 | 33 | # Optional npm cache directory 34 | .npm 35 | 36 | # Optional REPL history 37 | .node_repl_history 38 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "parse-multipart", 3 | "version": "1.0.0004", 4 | "description": "A javascript/nodejs multipart/form-data parser which operates on raw data.", 5 | "main": "multipart.js", 6 | "author": "Cristian Salazar (christiansalazarh@gmail.com)", 7 | "license": "MIT", 8 | "scripts": { 9 | "test": "echo \"Error: no test specified\" && exit 1" 10 | }, 11 | "repository": { 12 | "type": "git", 13 | "url": "git+https://github.com/freesoftwarefactory/parse-multipart.git" 14 | }, 15 | "keywords": [ 16 | "multipart/form-data", 17 | "form", 18 | "fileuploader" 19 | ], 20 | "bugs": { 21 | "url": "https://github.com/freesoftwarefactory/parse-multipart/issues" 22 | }, 23 | "homepage": "https://github.com/freesoftwarefactory/parse-multipart#readme" 24 | } 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 The Free Software Factory 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /multipart.js: -------------------------------------------------------------------------------- 1 | /** 2 | Multipart Parser (Finite State Machine) 3 | 4 | usage: 5 | 6 | var multipart = require('./multipart.js'); 7 | var body = multipart.DemoData(); // raw body 8 | var body = new Buffer(event['body-json'].toString(),'base64'); // AWS case 9 | 10 | var boundary = multipart.getBoundary(event.params.header['content-type']); 11 | var parts = multipart.Parse(body,boundary); 12 | 13 | // each part is: 14 | // { filename: 'A.txt', type: 'text/plain', data: } 15 | 16 | author: Cristian Salazar (christiansalazarh@gmail.com) www.chileshift.cl 17 | Twitter: @AmazonAwsChile 18 | */ 19 | exports.Parse = function(multipartBodyBuffer,boundary){ 20 | var process = function(part){ 21 | // will transform this object: 22 | // { header: 'Content-Disposition: form-data; name="uploads[]"; filename="A.txt"', 23 | // info: 'Content-Type: text/plain', 24 | // part: 'AAAABBBB' } 25 | // into this one: 26 | // { filename: 'A.txt', type: 'text/plain', data: } 27 | var obj = function(str){ 28 | var k = str.split('='); 29 | var a = k[0].trim(); 30 | var b = JSON.parse(k[1].trim()); 31 | var o = {}; 32 | Object.defineProperty( o , a , 33 | { value: b, writable: true, enumerable: true, configurable: true }) 34 | return o; 35 | } 36 | var header = part.header.split(';'); 37 | var file = obj(header[2]); 38 | var contentType = part.info.split(':')[1].trim(); 39 | Object.defineProperty( file , 'type' , 40 | { value: contentType, writable: true, enumerable: true, configurable: true }) 41 | Object.defineProperty( file , 'data' , 42 | { value: new Buffer(part.part), writable: true, enumerable: true, configurable: true }) 43 | return file; 44 | } 45 | var prev = null; 46 | var lastline=''; 47 | var header = ''; 48 | var info = ''; var state=0; var buffer=[]; 49 | var allParts = []; 50 | 51 | for(i=0;i 0 ? multipartBodyBuffer[i-1] : null; 54 | var newLineDetected = ((oneByte == 0x0a) && (prevByte == 0x0d)) ? true : false; 55 | var newLineChar = ((oneByte == 0x0a) || (oneByte == 0x0d)) ? true : false; 56 | 57 | if(!newLineChar) 58 | lastline += String.fromCharCode(oneByte); 59 | 60 | if((0 == state) && newLineDetected){ 61 | if(("--"+boundary) == lastline){ 62 | state=1; 63 | } 64 | lastline=''; 65 | }else 66 | if((1 == state) && newLineDetected){ 67 | header = lastline; 68 | state=2; 69 | lastline=''; 70 | }else 71 | if((2 == state) && newLineDetected){ 72 | info = lastline; 73 | state=3; 74 | lastline=''; 75 | }else 76 | if((3 == state) && newLineDetected){ 77 | state=4; 78 | buffer=[]; 79 | lastline=''; 80 | }else 81 | if(4 == state){ 82 | if(lastline.length > (boundary.length+4)) lastline=''; // mem save 83 | if(((("--"+boundary) == lastline))){ 84 | var j = buffer.length - lastline.length; 85 | var part = buffer.slice(0,j-1); 86 | var p = { header : header , info : info , part : part }; 87 | allParts.push(process(p)); 88 | buffer = []; lastline=''; state=5; header=''; info=''; 89 | }else{ 90 | buffer.push(oneByte); 91 | } 92 | if(newLineDetected) lastline=''; 93 | }else 94 | if(5==state){ 95 | if(newLineDetected) 96 | state=1; 97 | } 98 | } 99 | return allParts; 100 | }; 101 | 102 | // read the boundary from the content-type header sent by the http client 103 | // this value may be similar to: 104 | // 'multipart/form-data; boundary=----WebKitFormBoundaryvm5A9tzU1ONaGP5B', 105 | exports.getBoundary = function(header){ 106 | var items = header.split(';'); 107 | if(items) 108 | for(i=0;i= 0){ 111 | var k = item.split('='); 112 | return (new String(k[1])).trim(); 113 | } 114 | } 115 | return ""; 116 | } 117 | 118 | exports.DemoData = function(){ 119 | body = "trash1\r\n" 120 | body += "------WebKitFormBoundaryvef1fLxmoUdYZWXp\r\n"; 121 | body += "Content-Disposition: form-data; name=\"uploads[]\"; filename=\"A.txt\"\r\n"; 122 | body += "Content-Type: text/plain\r\n", 123 | body += "\r\n\r\n"; 124 | body += "@11X"; 125 | body += "111Y\r\n"; 126 | body += "111Z\rCCCC\nCCCC\r\nCCCCC@\r\n\r\n"; 127 | body += "------WebKitFormBoundaryvef1fLxmoUdYZWXp\r\n"; 128 | body += "Content-Disposition: form-data; name=\"uploads[]\"; filename=\"B.txt\"\r\n"; 129 | body += "Content-Type: text/plain\r\n", 130 | body += "\r\n\r\n"; 131 | body += "@22X"; 132 | body += "222Y\r\n"; 133 | body += "222Z\r222W\n2220\r\n666@\r\n"; 134 | body += "------WebKitFormBoundaryvef1fLxmoUdYZWXp--\r\n"; 135 | return (new Buffer(body,'utf-8')); 136 | // returns a Buffered payload, so the it will be treated as a binary content. 137 | }; 138 | 139 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # parse-multipart 2 | 3 | A javascript/nodejs multipart/form-data parser which operates on raw data. 4 | 5 | # note 6 | 7 | this code is not abandoned. i just keep it as its minimum as possible. 8 | 9 | # author 10 | 11 | My name is Cristian Salazar (christiansalazarh@gmail.com) from Santiago Chile (South America). 12 | 13 | I'm an Amazon AWS developper focused in Serverless software development, i call it: the new age of software development. 14 | 15 | *im open to remote job:* 16 | 17 | please visit my profile on: [https://www.linkedin.com/in/cristian-salazar-h](https://www.linkedin.com/in/cristian-salazar-h) 18 | 19 | # Video/Tutorial 20 | 21 | You can Watch my video on which i expose the necesary steps to [implement a Multiform/form-data parser inside a Amazon Aws ApiGateway](https://www.youtube.com/watch?v=BrYJlR0yRnw). 22 | 23 | # Background 24 | 25 | Sometimes you have a server listeing for data arriving to it (example: the Amazon API Gateway in AWS), this is called a "endpoint". Some website or script may send data to this endpoint. It may contain files or text, a encoded video and so on. The data is packaged in a well know format called "multipart/form-data". This data must be parsed, you must know where the data is, how many data comes to you and other aspects. This component will help you on this. 26 | 27 | As an example, The Amazon AWS ApiGateway. It operates as a facade between the http/s client (as an exampe, the browser) and your component (your lambda function, an action script etc). The "component" is the one written by you designed to extract the uploaded files or the data contained on it and then perform operations with it (storage etc). 28 | 29 | # What means "Multipart/form-data". 30 | 31 | The 'mutipart/form-data' is the raw data attached to a POST coming inside a Http request, it has a format and marks to let you now when it starts and when it ends, this marks are also used to separate each "part of the data" that means: the same POST may have different parts: text and/or files or video,sound etc. 32 | 33 | First, i need to clarify this point: some people think that "multipart" means: "a POST comming several times to an endpoint" each part having a "new or the next part" of the whole data", this is wrong approach and may confuse you when trying to undestand this work. Please have it in mind. The data arrives as a whole package in the same POST, that is: if you send a file, the entire file is contained in the POST, it will have a initial mark and a finalization mark, the mark is also sent to you in the same post. In other words, The "multipart" means: in the same post you will find many parts of different data separated by a mark. It may be different files, or text, etc. 34 | 35 | The header in the multipart/form-data has fields, this let you know about what is coming to you, the next paragraph will explain this: 36 | 37 | # Data Fields 38 | 39 | It is important to mention that sometimes the raw data contains some fields (look at the example below this lines, "filename=.." etc). So you must deal with it and parse the "field=value;" token. Some persons wrote a very nice modifications to my code in order to handle this fields in a better approach than mine, you may find the solution here: https://github.com/freesoftwarefactory/parse-multipart/pull/7. 40 | 41 | # Raw data example received in an endpoint: 42 | 43 | The raw payload is formatted as a "multipart/form-data" will look like this one: 44 | 45 | ``` 46 | ------WebKitFormBoundaryDtbT5UpPj83kllfw 47 | Content-Disposition: form-data; name="uploads[]"; filename="somebinary.dat" 48 | Content-Type: application/octet-stream 49 | 50 | some binary data...maybe the bits of a image.. 51 | ------WebKitFormBoundaryDtbT5UpPj83kllfw 52 | Content-Disposition: form-data; name="uploads[]"; filename="sometext.txt" 53 | Content-Type: text/plain 54 | 55 | hello how are you 56 | ------WebKitFormBoundaryDtbT5UpPj83kllfw-- 57 | ``` 58 | 59 | The lines above represents a raw multipart/form-data payload sent by some HTTP client via form submission containing two files. We need to extract the all files contained inside it. The multipart format allows you to send more than one file in the same payload, that's why it is called: multipart. 60 | 61 | # Usage 62 | 63 | In the next lines you can see a implementation. In this case two key values 64 | needs to be present: 65 | 66 | * body, which can be: 67 | 68 | ``` 69 | ------WebKitFormBoundaryDtbT5UpPj83kllfw 70 | Content-Disposition: form-data; name="uploads[]"; filename="sometext.txt" 71 | Content-Type: application/octet-stream 72 | 73 | hello how are you 74 | ------WebKitFormBoundaryDtbT5UpPj83kllfw-- 75 | ``` 76 | 77 | * boundary, the string which serve as a 'separator' between parts, it normally 78 | comes to you via headers. In this case, the boundary is: 79 | 80 | ``` 81 | ----WebKitFormBoundaryDtbT5UpPj83kllfw 82 | ``` 83 | 84 | Now, having this two key values then you can implement it: 85 | 86 | ``` 87 | var multipart = require('parse-multipart'); 88 | var body = "..the multipart raw body.."; 89 | var boundary = "----WebKitFormBoundaryDtbT5UpPj83kllfw"; 90 | var parts = multipart.Parse(body,boundary); 91 | 92 | for(var i=0;i } 97 | } 98 | ``` 99 | 100 | The returned data is an array of parts, each one described by a filename, a type and a data, this last one is a Buffer (see also Node Buffer). 101 | --------------------------------------------------------------------------------