├── .circleci └── config.yml ├── .gitignore ├── .npmignore ├── README.md ├── package-lock.json ├── package.json ├── src └── multipart.ts ├── test └── multipart.spec.ts └── tsconfig.json /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | jobs: 3 | test: 4 | docker: 5 | - image: circleci/node:16 6 | working_directory: /tmp/multipart 7 | 8 | steps: 9 | - checkout 10 | - restore_cache: 11 | keys: 12 | - dependencies-{{ checksum "package-lock.json" }} 13 | - run: 14 | name: Install node dependencies 15 | command: npm install 16 | - save_cache: 17 | paths: 18 | - node_modules 19 | key: dependencies-{{ checksum "package-lock.json" }} 20 | - run: 21 | name: Run tests 22 | command: npm run test 23 | 24 | build: 25 | docker: 26 | - image: circleci/node:16 27 | 28 | working_directory: /tmp/multipart 29 | 30 | steps: 31 | - checkout 32 | - restore_cache: 33 | keys: 34 | - dependencies-{{ checksum "package-lock.json" }} 35 | - run: 36 | name: Install node dependencies 37 | command: npm install 38 | - run: 39 | name: Build library 40 | command: npm run build 41 | - save_cache: 42 | paths: 43 | - dist 44 | key: dist-{{ .Branch }}-{{ .Revision }} 45 | - run: 46 | name: Release 47 | command: npx semantic-release 48 | 49 | workflows: 50 | version: 2 51 | build: 52 | jobs: 53 | - test 54 | - build: 55 | requires: 56 | - test 57 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | 6 | # Runtime data 7 | pids 8 | *.pid 9 | *.seed 10 | 11 | # Directory for instrumented libs generated by jscoverage/JSCover 12 | lib-cov 13 | 14 | # Coverage directory used by tools like istanbul 15 | coverage 16 | 17 | # nyc test coverage 18 | .nyc_output 19 | 20 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 21 | .grunt 22 | 23 | # node-waf configuration 24 | .lock-wscript 25 | 26 | # Compiled binary addons (http://nodejs.org/api/addons.html) 27 | build/Release 28 | 29 | # Dependency directories 30 | node_modules 31 | jspm_packages 32 | 33 | # Optional npm cache directory 34 | .npm 35 | 36 | # Optional REPL history 37 | .node_repl_history 38 | 39 | #build 40 | dist -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | tsconfig.json 2 | tslint.json 3 | .nyc_output 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # parse-multipart-data 2 | 3 | A Typescript lib multipart/form-data parser which operates on raw data. 4 | Forked from [freesoftwarefactory/parse-multipart](https://github.com/freesoftwarefactory/parse-multipart) 5 | 6 | # Background 7 | 8 | Sometimes you only have access to the raw multipart payload and it needs to be 9 | parsed in order to extract the files or data contained on it. As an example: 10 | the Amazon AWS ApiGateway, which will operate as a facade between the http 11 | client and your component (the one written by you designed to extract the 12 | uploaded files or data). 13 | 14 | The raw payload formatted as multipart/form-data will looks like this one: 15 | 16 | ```bash 17 | ------WebKitFormBoundaryDtbT5UpPj83kllfw 18 | Content-Disposition: form-data; name="uploads[]"; filename="somebinary.dat" 19 | Content-Type: application/octet-stream 20 | 21 | some binary data...maybe the bits of a image.. 22 | ------WebKitFormBoundaryDtbT5UpPj83kllfw 23 | Content-Disposition: form-data; name="uploads[]"; filename="sometext.txt" 24 | Content-Type: text/plain 25 | 26 | hello how are you 27 | 28 | ------WebKitFormBoundaryDtbT5UpPj83kllfw 29 | Content-Disposition: form-data; name="input1"; 30 | 31 | value1 32 | ------WebKitFormBoundaryDtbT5UpPj83kllfw-- 33 | ``` 34 | 35 | The lines above represents a raw multipart/form-data payload sent by some 36 | HTTP client via form submission containing two files and an input text with id `input1` and value `value1`. We need to extract everything contained inside it. The multipart format allows you to send more 37 | than one file in the same payload, that's why it is called: multipart. 38 | 39 | # Usage 40 | 41 | In the next lines you can see a implementation. In this case two key values 42 | needs to be present: 43 | 44 | * body, which can be: 45 | 46 | ```bash 47 | ------WebKitFormBoundaryDtbT5UpPj83kllfw 48 | Content-Disposition: form-data; name="uploads[]"; filename="sometext.txt" 49 | Content-Type: application/octet-stream 50 | 51 | hello how are you 52 | ------WebKitFormBoundaryDtbT5UpPj83kllfw-- 53 | ``` 54 | 55 | * boundary, the string which serve as a 'separator' between parts, it normally 56 | comes to you via headers. In this case, the boundary is: 57 | 58 | ```bash 59 | ----WebKitFormBoundaryDtbT5UpPj83kllfw 60 | ``` 61 | 62 | Now, having this two key values then you can implement it: 63 | 64 | ```typescript 65 | const multipart = require('parse-multipart-data'); 66 | const body = "..the multipart raw body.."; 67 | const boundary = "----WebKitFormBoundaryDtbT5UpPj83kllfw"; 68 | const parts = multipart.parse(body,boundary); 69 | 70 | for (let i = 0; i < parts.length; i++) { 71 | const part = parts[i]; 72 | // will be: { filename: 'A.txt', type: 'text/plain', data: } 73 | } 74 | ``` 75 | 76 | The returned data is a `part` array with properties: `filename`, `type` and `data`. `data` is type of [Buffer](https://nodejs.org/api/buffer.html). 77 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "parse-multipart-data", 3 | "version": "0.0.0-development", 4 | "description": 5 | "A javascript/nodejs multipart/form-data parser which operates on raw data.", 6 | "main": "dist/multipart.js", 7 | "author": "Ignacio Mazzara (nachomazzara@gmail.com)", 8 | "license": "MIT", 9 | "scripts": { 10 | "prebuild": "rm -rf dist", 11 | "build": "tsc --project tsconfig.json", 12 | "test": "nyc mocha --require ts-node/register test/*.spec.ts", 13 | "semantic-release": "semantic-release" 14 | }, 15 | "nyc": { 16 | "all": true, 17 | "include": ["src"], 18 | "extension": [".ts"], 19 | "reporter": ["text-summary"], 20 | "sourceMap": true, 21 | "instrument": true 22 | }, 23 | "repository": { 24 | "type": "git", 25 | "url": "https://github.com/nachomazzara/parse-multipart-data.git" 26 | }, 27 | "keywords": ["multipart/form-data", "form", "fileuploader"], 28 | "bugs": { 29 | "url": "https://github.com/nachomazzara/parse-multipart-data/issues" 30 | }, 31 | "homepage": "https://github.com/nachomazzara/parse-multipart-data#readme", 32 | "dependencies": { 33 | }, 34 | "devDependencies": { 35 | "@types/chai": "^4.3.0", 36 | "@types/mocha": "^9.1.0", 37 | "@types/node": "^17.0.23", 38 | "chai": "^4.3.6", 39 | "mocha": "^9.2.2", 40 | "nyc": "^15.1.0", 41 | "prettier": "^2.6.2", 42 | "semantic-release": "^19.0.2", 43 | "ts-node": "^10.7.0", 44 | "typescript": "^4.6.3" 45 | }, 46 | "prettier": { 47 | "printWidth": 80, 48 | "singleQuote": true, 49 | "semi": false, 50 | "trailingComma": "none" 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/multipart.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Multipart Parser (Finite State Machine) 3 | * usage: 4 | * const multipart = require('./multipart.js'); 5 | * const body = multipart.DemoData(); // raw body 6 | * const body = Buffer.from(event['body-json'].toString(),'base64'); // AWS case 7 | * const boundary = multipart.getBoundary(event.params.header['content-type']); 8 | * const parts = multipart.Parse(body,boundary); 9 | * each part is: 10 | * { filename: 'A.txt', type: 'text/plain', data: } 11 | * or { name: 'key', data: } 12 | */ 13 | 14 | type Part = { 15 | contentDispositionHeader: string 16 | contentTypeHeader: string 17 | part: number[] 18 | } 19 | 20 | type Input = { 21 | filename?: string 22 | name?: string 23 | type: string 24 | data: Buffer 25 | } 26 | 27 | enum ParsingState { 28 | INIT, 29 | READING_HEADERS, 30 | READING_DATA, 31 | READING_PART_SEPARATOR 32 | } 33 | 34 | export function parse(multipartBodyBuffer: Buffer, boundary: string): Input[] { 35 | let lastline = '' 36 | let contentDispositionHeader = '' 37 | let contentTypeHeader = '' 38 | let state: ParsingState = ParsingState.INIT 39 | let buffer: number[] = [] 40 | const allParts: Input[] = [] 41 | 42 | let currentPartHeaders: string[] = [] 43 | 44 | for (let i = 0; i < multipartBodyBuffer.length; i++) { 45 | const oneByte: number = multipartBodyBuffer[i] 46 | const prevByte: number | null = i > 0 ? multipartBodyBuffer[i - 1] : null 47 | // 0x0a => \n 48 | // 0x0d => \r 49 | const newLineDetected: boolean = oneByte === 0x0a && prevByte === 0x0d 50 | const newLineChar: boolean = oneByte === 0x0a || oneByte === 0x0d 51 | 52 | if (!newLineChar) lastline += String.fromCharCode(oneByte) 53 | if (ParsingState.INIT === state && newLineDetected) { 54 | // searching for boundary 55 | if ('--' + boundary === lastline) { 56 | state = ParsingState.READING_HEADERS // found boundary. start reading headers 57 | } 58 | lastline = '' 59 | } else if (ParsingState.READING_HEADERS === state && newLineDetected) { 60 | // parsing headers. Headers are separated by an empty line from the content. Stop reading headers when the line is empty 61 | if (lastline.length) { 62 | currentPartHeaders.push(lastline) 63 | } else { 64 | // found empty line. search for the headers we want and set the values 65 | for (const h of currentPartHeaders) { 66 | if (h.toLowerCase().startsWith('content-disposition:')) { 67 | contentDispositionHeader = h 68 | } else if (h.toLowerCase().startsWith('content-type:')) { 69 | contentTypeHeader = h 70 | } 71 | } 72 | state = ParsingState.READING_DATA 73 | buffer = [] 74 | } 75 | lastline = '' 76 | } else if (ParsingState.READING_DATA === state) { 77 | // parsing data 78 | if (lastline.length > boundary.length + 4) { 79 | lastline = '' // mem save 80 | } 81 | if ('--' + boundary === lastline) { 82 | const j = buffer.length - lastline.length 83 | const part = buffer.slice(0, j - 1) 84 | 85 | allParts.push( 86 | process({ contentDispositionHeader, contentTypeHeader, part }) 87 | ) 88 | buffer = [] 89 | currentPartHeaders = [] 90 | lastline = '' 91 | state = ParsingState.READING_PART_SEPARATOR 92 | contentDispositionHeader = '' 93 | contentTypeHeader = '' 94 | } else { 95 | buffer.push(oneByte) 96 | } 97 | if (newLineDetected) { 98 | lastline = '' 99 | } 100 | } else if (ParsingState.READING_PART_SEPARATOR === state) { 101 | if (newLineDetected) { 102 | state = ParsingState.READING_HEADERS 103 | } 104 | } 105 | } 106 | return allParts 107 | } 108 | 109 | // read the boundary from the content-type header sent by the http client 110 | // this value may be similar to: 111 | // 'multipart/form-data; boundary=----WebKitFormBoundaryvm5A9tzU1ONaGP5B', 112 | export function getBoundary(header: string): string { 113 | const items = header.split(';') 114 | if (items) { 115 | for (let i = 0; i < items.length; i++) { 116 | const item = new String(items[i]).trim() 117 | if (item.indexOf('boundary') >= 0) { 118 | const k = item.split('=') 119 | return new String(k[1]).trim().replace(/^["']|["']$/g, '') 120 | } 121 | } 122 | } 123 | return '' 124 | } 125 | 126 | export function DemoData(): { body: Buffer; boundary: string } { 127 | let body = 'trash1\r\n' 128 | body += '------WebKitFormBoundaryvef1fLxmoUdYZWXp\r\n' 129 | body += 'Content-Type: text/plain\r\n' 130 | body += 131 | 'Content-Disposition: form-data; name="uploads[]"; filename="A.txt"\r\n' 132 | body += '\r\n' 133 | body += '@11X' 134 | body += '111Y\r\n' 135 | body += '111Z\rCCCC\nCCCC\r\nCCCCC@\r\n\r\n' 136 | body += '------WebKitFormBoundaryvef1fLxmoUdYZWXp\r\n' 137 | body += 'Content-Type: text/plain\r\n' 138 | body += 139 | 'Content-Disposition: form-data; name="uploads[]"; filename="B.txt"\r\n' 140 | body += '\r\n' 141 | body += '@22X' 142 | body += '222Y\r\n' 143 | body += '222Z\r222W\n2220\r\n666@\r\n' 144 | body += '------WebKitFormBoundaryvef1fLxmoUdYZWXp\r\n' 145 | body += 'Content-Disposition: form-data; name="input1"\r\n' 146 | body += '\r\n' 147 | body += 'value1\r\n' 148 | body += '------WebKitFormBoundaryvef1fLxmoUdYZWXp--\r\n' 149 | return { 150 | body: Buffer.from(body), 151 | boundary: '----WebKitFormBoundaryvef1fLxmoUdYZWXp' 152 | } 153 | } 154 | 155 | function process(part: Part): Input { 156 | // will transform this object: 157 | // { header: 'Content-Disposition: form-data; name="uploads[]"; filename="A.txt"', 158 | // info: 'Content-Type: text/plain', 159 | // part: 'AAAABBBB' } 160 | // into this one: 161 | // { filename: 'A.txt', type: 'text/plain', data: } 162 | const obj = function (str: string) { 163 | const k = str.split('=') 164 | const a = k[0].trim() 165 | 166 | const b = JSON.parse(k[1].trim()) 167 | const o = {} 168 | Object.defineProperty(o, a, { 169 | value: b, 170 | writable: true, 171 | enumerable: true, 172 | configurable: true 173 | }) 174 | return o 175 | } 176 | const header = part.contentDispositionHeader.split(';') 177 | 178 | const filenameData = header[2] 179 | let input = {} 180 | if (filenameData) { 181 | input = obj(filenameData) 182 | const contentType = part.contentTypeHeader.split(':')[1].trim() 183 | Object.defineProperty(input, 'type', { 184 | value: contentType, 185 | writable: true, 186 | enumerable: true, 187 | configurable: true 188 | }) 189 | } 190 | // always process the name field 191 | Object.defineProperty(input, 'name', { 192 | value: header[1].split('=')[1].replace(/"/g, ''), 193 | writable: true, 194 | enumerable: true, 195 | configurable: true 196 | }) 197 | 198 | Object.defineProperty(input, 'data', { 199 | value: Buffer.from(part.part), 200 | writable: true, 201 | enumerable: true, 202 | configurable: true 203 | }) 204 | return input as Input 205 | } 206 | -------------------------------------------------------------------------------- /test/multipart.spec.ts: -------------------------------------------------------------------------------- 1 | import * as chai from 'chai' 2 | 3 | import { parse, DemoData, getBoundary } from '../src/multipart' 4 | 5 | const expect = chai.expect 6 | 7 | const expected = [ 8 | { 9 | name: 'uploads[]', 10 | filename: 'A.txt', 11 | type: 'text/plain', 12 | data: Buffer.from('@11X111Y\r\n111Z\rCCCC\nCCCC\r\nCCCCC@\r\n') 13 | }, 14 | { 15 | name: 'uploads[]', 16 | filename: 'B.txt', 17 | type: 'text/plain', 18 | data: Buffer.from('@22X222Y\r\n222Z\r222W\n2220\r\n666@') 19 | }, 20 | { name: 'input1', data: Buffer.from('value1') } 21 | ] 22 | describe('Multipart', function() { 23 | it('should parse multipart', function() { 24 | const { body, boundary } = DemoData() 25 | const parts = parse(body, boundary) 26 | 27 | expect(parts.length).to.be.equal(3) 28 | for (let i = 0; i < expected.length; i++) { 29 | const data = expected[i] 30 | const part = parts[i] 31 | 32 | expect(data.filename).to.be.equal(part.filename) 33 | expect(data.name).to.be.equal(part.name) 34 | expect(data.type).to.be.equal(part.type) 35 | expect(data.data.toString()).to.be.equal(part.data.toString()) 36 | } 37 | }) 38 | 39 | it('should get boundary', function() { 40 | const header = 41 | 'multipart/form-data; boundary=----WebKitFormBoundaryvm5A9tzU1ONaGP5B' 42 | const boundary = getBoundary(header) 43 | 44 | expect(boundary).to.be.equal('----WebKitFormBoundaryvm5A9tzU1ONaGP5B') 45 | }) 46 | 47 | it('should get boundary in single quotes', function() { 48 | const header = 49 | 'multipart/form-data; boundary="----WebKitFormBoundaryvm5A9tzU1ONaGP5B"' 50 | const boundary = getBoundary(header) 51 | 52 | expect(boundary).to.be.equal('----WebKitFormBoundaryvm5A9tzU1ONaGP5B') 53 | }) 54 | 55 | it('should get boundary in double quotes', function() { 56 | const header = 57 | "multipart/form-data; boundary='----WebKitFormBoundaryvm5A9tzU1ONaGP5B'" 58 | const boundary = getBoundary(header) 59 | 60 | expect(boundary).to.be.equal('----WebKitFormBoundaryvm5A9tzU1ONaGP5B') 61 | }) 62 | 63 | it('should not parse multipart if boundary is not correct', function() { 64 | const { body, boundary } = DemoData() 65 | const parts = parse(body, boundary + 'bad') 66 | 67 | expect(parts.length).to.be.equal(0) 68 | }) 69 | 70 | it('should not parse if multipart is not correct', function() { 71 | const { boundary } = DemoData() 72 | const parts = parse(Buffer.from('hellow world'), boundary) 73 | 74 | expect(parts.length).to.be.equal(0) 75 | }) 76 | }) 77 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "baseUrl": ".", 4 | "outDir": "dist", 5 | "module": "commonjs", 6 | "target": "es5", 7 | "lib": ["es2017", "dom"], 8 | "sourceMap": true, 9 | "moduleResolution": "node", 10 | "forceConsistentCasingInFileNames": true, 11 | "noImplicitReturns": true, 12 | "noImplicitThis": true, 13 | "noImplicitAny": false, 14 | "strictNullChecks": true, 15 | "suppressImplicitAnyIndexErrors": true, 16 | "noUnusedParameters": true, 17 | "noUnusedLocals": true, 18 | "plugins": [{ "name": "tslint-language-service" }], 19 | "declaration": true 20 | }, 21 | "exclude": [ 22 | "node_modules", 23 | "build", 24 | "scripts", 25 | "acceptance-tests", 26 | "webpack", 27 | "jest", 28 | "src/setupTests.ts" 29 | ], 30 | "include": ["./src"] 31 | } 32 | --------------------------------------------------------------------------------