├── .gitignore ├── License ├── Makefile ├── Readme.md ├── benchmark ├── benchmark.js ├── helper.js └── parsers │ ├── formidable.js │ └── multipart_parser.js ├── index.js ├── lib ├── multipart_parser.js └── part.js ├── package.json ├── rfc ├── 0822-arpa-internet-text-messages.txt ├── 1341-the-multipart-content-type.html ├── 2045-format-of-internet-message-bodies.txt ├── 2046-media-types.txt ├── 2047-message-header-extensions-for-non-ascii-text.txt ├── 2048-registration-procedures.txt ├── 2049-conformance-criteria-and-examples.txt ├── 2387-mime-multipart-content-type.txt └── 2388-returning-values-from-forms-multipart-form-data.txt └── test ├── common.js ├── fast ├── test-fixtures.js └── test-multipart-parser.js └── run.js /.gitignore: -------------------------------------------------------------------------------- 1 | *.un~ 2 | /node_modules 3 | -------------------------------------------------------------------------------- /License: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011 Felix Geisendörfer (felix@debuggable.com) and contributors 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SHELL := /bin/bash 2 | 3 | test: 4 | @./test/run.js 5 | 6 | build: npm test 7 | 8 | npm: 9 | npm install . 10 | 11 | clean: 12 | rm test/tmp/* 13 | 14 | .PHONY: test clean build 15 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # multipart-parser 2 | 3 | A fast and streaming multipart parser. 4 | 5 | ## Is it any good? 6 | 7 | No, this is still being developed. 8 | 9 | ## Is it fast? 10 | 11 | Yes. According to the benchmark suite shipped with this parser, it is on par 12 | with existing implementations, and can easily exceed typical disk/storage 13 | throughputs. 14 | 15 | ``` 16 | $ node --version 17 | v0.4.12 18 | $ node benchmark/benchmark.js -r 100 19 | Options: 20 | Entity Size : 10 mb 21 | Chunk Size : 32 kb 22 | Runs : 100 23 | Iterations per run : 10 24 | 25 | .................................................................................................... 26 | Benchmark took: 33.8 seconds 27 | 28 | formidable: 740.47 mb/sec (95% of 1000 iterations) 29 | multipart_parser: 846.75 mb/sec (95% of 1000 iterations) 30 | ``` 31 | 32 | ``` 33 | $ node --version 34 | v0.5.10-pre 35 | $ node benchmark/benchmark.js -r 100 36 | Options: 37 | Entity Size : 10 mb 38 | Chunk Size : 32 kb 39 | Runs : 100 40 | Iterations per run : 10 41 | 42 | .................................................................................................... 43 | Benchmark took: 33.4 seconds 44 | 45 | formidable: 775.19 mb/sec (95% of 1000 iterations) 46 | multipart_parser: 934.58 mb/sec (95% of 1000 iterations) 47 | ``` 48 | 49 | ## Is it secure? 50 | 51 | Blah. 52 | 53 | ## Is it compliant? 54 | 55 | Blah. 56 | 57 | ## Is it user friendly? 58 | 59 | Blah. 60 | 61 | ## License 62 | 63 | MIT License. 64 | -------------------------------------------------------------------------------- /benchmark/benchmark.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | var util = require('util'); 3 | var uubench = require('uubench'); 4 | var helper = require('./helper'); 5 | var options = require('commander'); 6 | 7 | options 8 | .option('-e, --entitysize [size]', 'The size of the entity [10mb].', '10mb') 9 | .option('-c, --chunksize [size]', 'The write chunksize in kb [32kb]', '32kb') 10 | .option('-r, --runs [runs]', 'How many times to run the benchmarks [10]', 10) 11 | .option('-i, --iterations [iterations]', 'The minimum amount of iterations for each run. [10]', 10) 12 | .parse(process.argv); 13 | 14 | options.runs = parseInt(options.runs, 10); 15 | options.iterations = parseInt(options.iterations, 10); 16 | 17 | console.log('Options:'); 18 | console.log(' Entity Size : %s', helper.toHuman(options.entitysize)); 19 | console.log(' Chunk Size : %s', helper.toHuman(options.chunksize)); 20 | console.log(' Runs : %s', options.runs); 21 | console.log(' Iterations per run : %s', options.iterations); 22 | console.log(''); 23 | 24 | var start = Date.now(); 25 | 26 | var results = {}; 27 | var suite = new uubench.Suite({ 28 | iterations: options.iterations, 29 | type: 'fixed', 30 | result: function(name, stats) { 31 | var seconds = (stats.elapsed / 1000); 32 | var mb = helper.toUnit('mb', options.entitysize); 33 | var mbPerSec = helper.round(((stats.iterations * mb) / seconds), 2); 34 | 35 | results[name] = results[name] || {iterations: 0, series: []}; 36 | results[name].iterations += stats.iterations; 37 | results[name].series.push(mbPerSec); 38 | }, 39 | done: function() { 40 | process.stdout.write('.'); 41 | if (--options.runs) return suite.run(); 42 | 43 | var duration = helper.round((Date.now() - start) / 1000, 1); 44 | console.log('\nBenchmark took: %d seconds\n', duration); 45 | 46 | for (var name in results) { 47 | var result = results[name]; 48 | var series = result.series; 49 | series.sort(function(a, b) { 50 | return b - a; 51 | }); 52 | 53 | var percentile = 95; 54 | var speed = helper.round(helper.quantile(series, percentile / 100), 2) 55 | console.log( 56 | '%s: %s mb/sec (%d% of %d iterations)', 57 | name, 58 | speed, 59 | percentile, 60 | result.iterations 61 | ); 62 | } 63 | }, 64 | }); 65 | 66 | var boundary = helper.boundary(); 67 | var buffer = helper.multipartMessage(boundary, options.entitysize); 68 | var chunkSize = helper.toBytes(options.chunksize); 69 | 70 | var parsers = helper.parsers(); 71 | for (var name in parsers) { 72 | (function(name) { 73 | suite.bench(name, function(next) { 74 | var write = parsers[name](boundary, next); 75 | 76 | for (var i = 0; i < buffer.length; i += chunkSize) { 77 | var end = (i + chunkSize < buffer.length) 78 | ? i + chunkSize 79 | : buffer.length; 80 | 81 | write(buffer.slice(i, end)); 82 | } 83 | }); 84 | })(name); 85 | } 86 | 87 | suite.run(); 88 | -------------------------------------------------------------------------------- /benchmark/helper.js: -------------------------------------------------------------------------------- 1 | var _ = require('underscore'); 2 | var fs = require('fs'); 3 | var units = { 4 | 'gb' : Math.pow(1024, 3), 5 | 'mb' : Math.pow(1024, 2), 6 | 'kb' : Math.pow(1024, 1), 7 | 'bytes' : Math.pow(1024, 0), 8 | }; 9 | 10 | exports.toBytes = function(str) { 11 | if (typeof str === 'number') return str; 12 | 13 | var bytes = str.replace(/^([\d.]+)(.*)/i, function(m, size, unit) { 14 | size = parseFloat(size, 10); 15 | 16 | switch (unit) { 17 | case 'g' : 18 | case 'gb' : return size * Math.pow(1024, 2); 19 | case 'm' : 20 | case 'mb' : return size * Math.pow(1024, 2); 21 | case 'k' : 22 | case 'kb' : return size * Math.pow(1024, 1); 23 | case 'b' : return size * Math.pow(1024, 0); 24 | default : throw new Error('Unknown size unit: "' + unit + '"'); 25 | } 26 | }); 27 | 28 | return parseInt(bytes, 10); 29 | }; 30 | 31 | exports.toUnit = function(unit, size) { 32 | var bytes = this.toBytes(size); 33 | var limit = units[unit]; 34 | 35 | return (bytes / limit); 36 | }; 37 | 38 | exports.toHuman = function(size) { 39 | size = (typeof size === 'string') 40 | ? size = this.toBytes(size) 41 | : size; 42 | 43 | for (var unit in units) { 44 | var limit = units[unit]; 45 | if (size < limit) continue; 46 | 47 | size = (size / limit) 48 | .toFixed(1) 49 | .replace(/\.0$/, ''); 50 | 51 | return size + ' ' + unit; 52 | } 53 | }; 54 | 55 | exports.boundary = function() { 56 | return '-----------------------------168072824752491622650073'; 57 | }; 58 | 59 | exports.multipartMessage = function(boundary, size) { 60 | size = this.toBytes(size); 61 | 62 | var head = 63 | '--'+boundary+'\r\n' 64 | + 'content-disposition: form-data; name="field1"\r\n' 65 | + '\r\n' 66 | , tail = '\r\n--'+boundary+'--\r\n' 67 | , buffer = new Buffer(size); 68 | 69 | buffer.write(head, 'ascii', 0); 70 | buffer.write(tail, 'ascii', buffer.length - tail.length); 71 | return buffer; 72 | }; 73 | 74 | exports.parsers = function() { 75 | var dir = __dirname + '/parsers'; 76 | var parsers = {}; 77 | 78 | fs 79 | .readdirSync(dir) 80 | .filter(function(name) { 81 | return /\.js$/.test(name); 82 | }) 83 | .forEach(function(file) { 84 | var parser = require(dir + '/' + file); 85 | var name = file.replace(/\.js$/, ''); 86 | 87 | parsers[name] = parser; 88 | }); 89 | 90 | return parsers; 91 | }; 92 | 93 | // From: https://gist.github.com/642690 94 | (function(uustats){ 95 | uustats.sdev = function(series) { 96 | return Math.sqrt(uustats.variance(series)); 97 | }; 98 | 99 | uustats.variance = function(series) { 100 | var t = 0, squares = 0, len = series.length; 101 | 102 | for (var i=0; i= len) { return series[len - 1] } 143 | return series[f] * (t - pos) + series[t] * (pos - f); 144 | }; 145 | 146 | uustats.round = function(x, n) { 147 | return Math.round(x*Math.pow(10, n))/Math.pow(10, n); 148 | }; 149 | })(typeof exports !== 'undefined' ? exports : window.uustats = {}); 150 | -------------------------------------------------------------------------------- /benchmark/parsers/formidable.js: -------------------------------------------------------------------------------- 1 | var MultipartParser = require('formidable/lib/multipart_parser').MultipartParser; 2 | 3 | module.exports = function(boundary, next) { 4 | var parser = new MultipartParser(); 5 | parser.onEnd = next; 6 | parser.initWithBoundary(boundary); 7 | 8 | return parser.write.bind(parser); 9 | }; 10 | -------------------------------------------------------------------------------- /benchmark/parsers/multipart_parser.js: -------------------------------------------------------------------------------- 1 | var MultipartParser = require('../../index'); 2 | 3 | module.exports = function(boundary, next) { 4 | var parser = MultipartParser.create(boundary); 5 | parser.on('end', next); 6 | return parser.write.bind(parser); 7 | }; 8 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | module.exports = require('./lib/multipart_parser.js'); 2 | -------------------------------------------------------------------------------- /lib/multipart_parser.js: -------------------------------------------------------------------------------- 1 | var EventEmitter = require('events').EventEmitter; 2 | var util = require('util'); 3 | var Part = require('./part'); 4 | 5 | /* Tokens as defined by rfc 2616. Also lowercases them. 6 | * token = 1* 7 | * separators = "(" | ")" | "<" | ">" | "@" 8 | * | "," | ";" | ":" | "\" | <"> 9 | * | "/" | "[" | "]" | "?" | "=" 10 | * | "{" | "}" | SP | HT 11 | * 12 | * From Ryan Dahl's http_parser.c 13 | */ 14 | var TOKENS = [ 15 | /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ 16 | 0, 0, 0, 0, 0, 0, 0, 0, 17 | /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ 18 | 0, 0, 0, 0, 0, 0, 0, 0, 19 | /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ 20 | 0, 0, 0, 0, 0, 0, 0, 0, 21 | /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ 22 | 0, 0, 0, 0, 0, 0, 0, 0, 23 | /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ 24 | ' ', '!', '"', '#', '$', '%', '&', '\'', 25 | /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ 26 | 0, 0, '*', '+', 0, '-', '.', '/', 27 | /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ 28 | '0', '1', '2', '3', '4', '5', '6', '7', 29 | /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ 30 | '8', '9', 0, 0, 0, 0, 0, 0, 31 | /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ 32 | 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 33 | /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ 34 | 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 35 | /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ 36 | 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 37 | /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ 38 | 'x', 'y', 'z', 0, 0, 0, '^', '_', 39 | /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ 40 | '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 41 | /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ 42 | 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 43 | /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ 44 | 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 45 | /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ 46 | 'x', 'y', 'z', 0, '|', '}', '~', 0 ]; 47 | 48 | var LF = 10; 49 | var CR = 13; 50 | var COLON = 58; 51 | var DASH = 45; 52 | var CRLF = new Buffer('\r\n'); 53 | var DASH_END = new Buffer('--\r\n'); 54 | 55 | module.exports = MultipartParser; 56 | util.inherits(MultipartParser, EventEmitter); 57 | function MultipartParser() { 58 | EventEmitter.call(this); 59 | 60 | this.writable = false; 61 | this._state = 'NO_BOUNDARY'; 62 | this._boundary = null; 63 | this._preamble = true; 64 | this._counter = 0; 65 | this._marker = 0; 66 | this._offset = 0; 67 | this._part = null; 68 | this._headerBufferLimit = 4 * 1024; 69 | this._headerBufferLength = 0; 70 | this._headerField = ''; 71 | this._headerValue = ''; 72 | this._boundaryChars = {}; 73 | } 74 | 75 | 76 | MultipartParser.create = function(boundary) { 77 | var instance = new this(); 78 | instance.boundary(boundary); 79 | return instance; 80 | }; 81 | 82 | MultipartParser.prototype.boundary = function(boundary) { 83 | // Last 3 bytes are used for lookbehind 84 | this._boundary = new Buffer('\r\n--' + boundary + '???'); 85 | this._state = 'PREAMBLE'; 86 | this.writable = true; 87 | 88 | this._boundaryChars = {}; 89 | for (var i = 0; i < this._boundary.length - 3; i++) { 90 | this._boundaryChars[this._boundary[i]] = true; 91 | } 92 | }; 93 | 94 | 95 | MultipartParser.prototype.write = function(buffer) { 96 | if (!this.writable) { 97 | throw this._error('NotWritable', 'Bad state: ' + this._state); 98 | } 99 | 100 | var i = 0; 101 | var byte = buffer[i]; 102 | 103 | while (true) { 104 | switch (this._state) { 105 | case 'PREAMBLE': 106 | switch (byte) { 107 | case CR: 108 | this._state = 'BOUNDARY'; 109 | this._marker = 0; 110 | break; 111 | case DASH: 112 | // As per RFC-1341 we have to accept this, but complying clients 113 | // should not generate this. 114 | this._state = 'BOUNDARY'; 115 | this._marker = 2; 116 | break; 117 | } 118 | break; 119 | case 'BOUNDARY': 120 | if (byte !== this._boundary[++this._marker]) { 121 | this._state = 'BOUNDARY_MISMATCH'; 122 | continue; 123 | } 124 | 125 | if (this._marker === this._boundary.length - 4) { 126 | this._state = 'BOUNDARY_END'; 127 | } 128 | break; 129 | case 'BOUNDARY_END': 130 | this._counter = 0; 131 | this._boundary[++this._marker] = byte; 132 | 133 | switch (byte) { 134 | case CR: 135 | this._state = 'BOUNDARY_LINE_END'; 136 | break; 137 | case DASH: 138 | if (this._preamble) { 139 | this._state = 'PREAMBLE'; 140 | break; 141 | } 142 | 143 | this._state = 'BOUNDARY_DASH_END'; 144 | break; 145 | default: 146 | this._state = 'BOUNDARY_MISMATCH'; 147 | continue; 148 | } 149 | break; 150 | case 'BOUNDARY_LINE_END': 151 | switch (byte) { 152 | case LF: 153 | if (this._part) this._part.end(); 154 | 155 | this._preamble = false; 156 | this._state = 'HEADER_FIELD'; 157 | this._counter = 0; 158 | this._marker = 0; 159 | this._part = new Part(); 160 | break 161 | default: 162 | this._state = 'BOUNDARY_MISMATCH'; 163 | continue; 164 | } 165 | break; 166 | case 'BOUNDARY_DASH_END': 167 | if (byte !== DASH_END[++this._counter]) { 168 | this._state = 'BOUNDARY_MISMATCH'; 169 | continue; 170 | } 171 | 172 | if (this._counter === DASH_END.length - 1) { 173 | this._part.end(); 174 | this.emit('end'); 175 | this._state = 'EPILOGUE'; 176 | break; 177 | } 178 | 179 | this._boundary[++this._marker] = byte; 180 | break; 181 | case 'BOUNDARY_MISMATCH': 182 | if (this._preamble) { 183 | this._state = 'PREAMBLE'; 184 | continue; 185 | } 186 | 187 | this._part.write(this._boundary, 0, this._marker); 188 | this._state = 'PART_BODY'; 189 | continue; 190 | case 'HEADER_FIELD': 191 | switch (byte) { 192 | case COLON: 193 | this._state = 'HEADER_VALUE'; 194 | break 195 | case CR: 196 | if (this._headerField) { 197 | this._emitError('InvalidHeaderFieldToken', byte); 198 | return; 199 | } 200 | 201 | this._state = 'HEADERS_END'; 202 | break 203 | default: 204 | var character = TOKENS[byte]; 205 | if (!character) { 206 | this._emitError('InvalidHeaderFieldToken', byte); 207 | return; 208 | } 209 | 210 | if (++this._headerBufferLength > this._headerBufferLimit) { 211 | this._state = 'HEADER_BUFFER_OVERFLOW'; 212 | continue; 213 | } 214 | 215 | this._headerField += character; 216 | break; 217 | } 218 | break; 219 | case 'HEADERS_END': 220 | switch (byte) { 221 | case LF: 222 | this._marker = i; 223 | this._state = 'PART_BODY'; 224 | this.emit('part', this._part); 225 | break; 226 | default: 227 | this._emitError('InvalidHeaderFieldToken', byte); 228 | return; 229 | break; 230 | } 231 | break; 232 | break; 233 | case 'HEADER_VALUE': 234 | if (byte === CRLF[this._counter++]) { 235 | if (this._counter === CRLF.length) { 236 | this._part.addHeader(this._headerField, this._headerValue.trim()); 237 | this._headerField = ''; 238 | this._headerValue = ''; 239 | this._counter = 0; 240 | this._state = 'HEADER_FIELD'; 241 | } 242 | break; 243 | } 244 | 245 | this._counter = 0; 246 | 247 | if (++this._headerBufferLength > this._headerBufferLimit) { 248 | this._state = 'HEADER_BUFFER_OVERFLOW'; 249 | continue; 250 | } 251 | 252 | this._headerValue += String.fromCharCode(byte); 253 | break; 254 | case 'PART_BODY': 255 | this._marker = i; 256 | 257 | var boundaryLength = this._boundary.length - 1; 258 | var bufferEnd = buffer.length; 259 | var boundaryChars = this._boundaryChars; 260 | 261 | do { 262 | i += boundaryLength; 263 | } while (i < bufferEnd && !(buffer[i] in boundaryChars)) 264 | i -= boundaryLength; 265 | 266 | this._offset += i - this._marker; 267 | 268 | while(true) { 269 | if (byte === CR) { 270 | this._part.write(buffer, this._marker, i); 271 | this._marker = 0; 272 | this._state = 'BOUNDARY'; 273 | break; 274 | } 275 | 276 | this._offset++; 277 | 278 | if ((byte = buffer[++i]) === undefined) { 279 | this._part.write(buffer, this._marker, i); 280 | break; 281 | } 282 | } 283 | break; 284 | case 'EPILOGUE': 285 | return; 286 | case 'HEADER_BUFFER_OVERFLOW': 287 | this._emitError( 288 | 'HeaderBufferOverflow', 289 | 'Max buffer size: ' + this._headerBufferLimit + 'bytes' 290 | ); 291 | return; 292 | case 'NO_BOUNDARY': 293 | this._emitError('NoBoundary', 'No boundary configured for parser.'); 294 | return; 295 | default: 296 | this._emitError('InvalidParserState', 'Unknown state: ' + this._state); 297 | return; 298 | } 299 | 300 | if ((byte = buffer[++i]) === undefined) break; 301 | this._offset++; 302 | } 303 | }; 304 | 305 | MultipartParser.prototype._error = function(type, reason) { 306 | if (typeof reason === 'number') { 307 | var byte = reason; 308 | var character = String.fromCharCode(byte); 309 | 310 | reason = 311 | 'Got byte: ' + byte + ' / ' + JSON.stringify(character) + ' ' + 312 | 'at offset: ' + this._offset; 313 | } 314 | 315 | return new Error('MultipartParser.' + type + ': ' + reason); 316 | }; 317 | 318 | MultipartParser.prototype._emitError = function(type, reason) { 319 | var err = this._error(type, reason); 320 | this.writable = false; 321 | this.emit('error', err); 322 | }; 323 | 324 | MultipartParser.prototype.end = function() { 325 | this.emit('end'); 326 | }; 327 | -------------------------------------------------------------------------------- /lib/part.js: -------------------------------------------------------------------------------- 1 | var Stream = require('stream').Stream; 2 | var util = require('util'); 3 | 4 | module.exports = Part; 5 | util.inherits(Part, Stream); 6 | function Part() { 7 | Stream.call(this); 8 | 9 | this.headers = {}; 10 | this.readable = true; 11 | } 12 | 13 | Part.prototype.addHeader = function(field, value) { 14 | this.headers[field] = value; 15 | }; 16 | 17 | Part.prototype.write = function(buffer, start, end) { 18 | this.emit('data', buffer.slice(start, end)); 19 | }; 20 | 21 | Part.prototype.end = function() { 22 | this.emit('end'); 23 | }; 24 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "author": "Felix Geisendörfer (http://debuggable.com/)", 3 | "name": "multipart-parser", 4 | "description": "A fast and streaming multipart parser.", 5 | "version": "0.0.0", 6 | "homepage": "https://github.com/felixge/node-multipart-parser", 7 | "repository": { 8 | "type": "git", 9 | "url": "git://github.com/felixge/node-multipart-parser.git" 10 | }, 11 | "main": "./index.js", 12 | "engines": { 13 | "node": "*" 14 | }, 15 | "dependencies": {}, 16 | "devDependencies": { 17 | "fast-or-slow": "0.0.5", 18 | "far": "0.0.7", 19 | "commander": "0.2.0", 20 | "uubench": "0.0.1" 21 | }, 22 | "scripts": { 23 | "test": "make test" 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /rfc/1341-the-multipart-content-type.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | RFC1341(MIME) : 7 The Multipart content type 4 | 5 | 6 | 7 |

7.2 The Multipart Content-Type

In the case of multiple part messages, 9 | in which one or more different 10 | sets of data are combined in 11 | a single body, a "multipart" Content-Type 12 | field must appear in the entity's 13 | header. The body must then contain 14 | one or more "body parts," each 15 | preceded by an encapsulation boundary, 16 | and the last one followed by a 17 | closing boundary. Each part starts 18 | with an encapsulation boundary, 19 | and then contains a body part 20 | consisting of header area, a 21 | blank line, and a body area. Thus 22 | a body part is similar to an RFC 23 | 822 message in syntax, but different 24 | in meaning.

25 | A body part is NOT to be interpreted 26 | as actually being an RFC 822 27 | message. To begin with, NO header 28 | fields are actually required in 29 | body parts. A body part that starts 30 | with a blank line, therefore, 31 | is allowed and is a body part for 32 | which all default values are to be 33 | assumed. In such a case, the 34 | absence of a Content-Type header 35 | field implies that the encapsulation 36 | is plain US-ASCII text. The only 38 | header fields that have defined 39 | meaning for body parts are those 40 | the names of which begin with "Content-". 41 | All other header fields are 42 | generally to be ignored in body 43 | parts. Although they should generally 44 | be retained in mail processing, 45 | they may be discarded by gateways 46 | if necessary. Such other fields 47 | are permitted to appear in body parts 48 | but should not be depended on. 49 | "X-" fields may be created for 50 | experimental or private purposes, 51 | with the recognition that the information 52 | they contain may be lost at some 53 | gateways.

54 | The distinction between an RFC 822 55 | message and a body part is subtle, 56 | but important. A gateway between 57 | Internet and X.400 mail, for example, 58 | must be able to tell the difference 59 | between a body part that contains 60 | an image and a body part that contains 61 | an encapsulated message, the body 62 | of which is an image. In order 63 | to represent the latter, the body 64 | part must have "Content-Type: message", 65 | and its body (after the blank 66 | line) must be the encapsulated message, 67 | with its own "Content-Type: image" 68 | header field. The use of similar 69 | syntax facilitates the conversion 70 | of messages to body parts, and 71 | vice versa, but the distinction between 72 | the two must be understood by 73 | implementors. (For the special case 74 | in which all parts actually are 75 | messages, a "digest" subtype is 76 | also defined.)

77 | As stated previously, each body 78 | part is preceded by an encapsulation 79 | boundary. The encapsulation boundary 80 | MUST NOT appear inside any of the 81 | encapsulated parts. Thus, it is 82 | crucial that the composing agent 83 | be able to choose and specify 84 | the unique boundary that will separate 85 | the parts.

86 | All present and future subtypes of 87 | the "multipart" type must use an 88 | identical syntax. Subtypes may 89 | differ in their semantics, and 90 | may impose additional restrictions 91 | on syntax, but must conform to 92 | the required syntax for the multipart 93 | type. This requirement ensures 94 | that all conformant user agents 95 | will at least be able to recognize 96 | and separate the parts of any multipart 97 | entity, even of an unrecognized 98 | subtype.

99 | As stated in the definition of the 100 | Content-Transfer-Encoding field, 101 | no encoding other than "7bit", "8bit", 102 | or "binary" is permitted for entities 103 | of type "multipart". The multipart 104 | delimiters and header fields 105 | are always 7-bit ASCII in any case, 106 | and data within the body parts can 107 | be encoded on a part-by-part 108 | basis, with Content-Transfer-Encoding 109 | fields for each appropriate body 110 | part.

111 | Mail gateways, relays, and other 112 | mail handling agents are commonly 113 | known to alter the top-level header 114 | of an RFC 822 message. In particular, 115 | they frequently add, remove, or 116 | reorder header fields. Such 117 | alterations are explicitly forbidden 118 | for the body part headers embedded 119 | in the bodies of messages of 120 | type "multipart." 121 |

7.2.1 Multipart: The common 123 | syntax

All subtypes of "multipart" share 124 | a common syntax, defined in this 125 | section. A simple example of a 126 | multipart message also appears 127 | in this section. An example of a 128 | more complex multipart message 129 | is given in Appendix C.

130 | The Content-Type field for multipart 131 | entities requires one parameter, 132 | "boundary", which is used to 133 | specify the encapsulation boundary. 134 | The encapsulation boundary is 135 | defined as a line consisting 136 | entirely of two hyphen characters 137 | ("-", decimal code 45) followed by 138 | the boundary parameter value 139 | from the Content-Type header field. 140 |

141 | NOTE: The hyphens are for rough 142 | compatibility with the earlier 143 | RFC 934 method of message encapsulation, 144 | and for ease of searching 145 | for the boundaries in some 146 | implementations. However, it should 147 | be noted that multipart messages 148 | are NOT completely compatible 149 | with RFC 934 encapsulations; 150 | in particular, they do not obey 151 | RFC 934 quoting conventions for 152 | embedded lines that begin with 153 | hyphens. This mechanism was 154 | chosen over the RFC 934 mechanism 155 | because the latter causes lines to 156 | grow with each level of quoting. 157 | The combination of this growth with 158 | the fact that SMTP implementations 159 | sometimes wrap long lines made 160 | the RFC 934 mechanism unsuitable 161 | for use in the event that deeply-nested 162 | multipart structuring is ever desired. 163 |

164 | Thus, a typical multipart Content-Type 165 | header field might look like 166 | this: 167 |

168 |      Content-Type: multipart/mixed; 
169 |           boundary=gc0p4Jq0M2Yt08jU534c0p
170 |  
171 | 
This indicates that the entity consists 172 | of several parts, each itself 173 | with a structure that is syntactically 174 | identical to an RFC 822 message, 175 | except that the header area might 176 | be completely empty, and that 177 | the parts are each preceded by 178 | the line 179 |
180 |      --gc0p4Jq0M2Yt08jU534c0p
181 |  
182 | 
Note that the encapsulation boundary 183 | must occur at the beginning 184 | of a line, i.e., following a CRLF, 185 | and that that initial CRLF is considered 186 | to be part of the encapsulation 187 | boundary rather than part of 188 | the preceding part. The boundary 189 | must be followed immediately either 190 | by another CRLF and the header 191 | fields for the next part, or by two 192 | CRLFs, in which case there are 193 | no header fields for the next part 194 | (and it is therefore assumed to 195 | be of Content-Type text/plain).

196 | NOTE: The CRLF preceding the 197 | encapsulation line is considered 198 | part of the boundary so that it 199 | is possible to have a part that 200 | does not end with a CRLF (line 201 | break). Body parts that must 202 | be considered to end with line breaks, 203 | therefore, should have two CRLFs 204 | preceding the encapsulation line, 205 | the first of which is part of the 206 | preceding body part, and the second 207 | of which is part of the encapsulation 208 | boundary.

209 | The requirement that the encapsulation 210 | boundary begins with a CRLF implies 211 | that the body of a multipart entity 212 | must itself begin with a CRLF before 213 | the first encapsulation line -- 214 | that is, if the "preamble" area 215 | is not used, the entity headers 216 | must be followed by TWO CRLFs. This 217 | is indeed how such entities 218 | should be composed. A tolerant mail 219 | reading program, however, may interpret 220 | a body of type multipart that 221 | begins with an encapsulation line 222 | NOT initiated by a CRLF as also 223 | being an encapsulation boundary, 224 | but a compliant mail sending 225 | program must not generate such 226 | entities.

227 | Encapsulation boundaries must not 228 | appear within the encapsulations, 229 | and must be no longer than 70 characters, 230 | not counting the two leading hyphens. 231 |

232 | The encapsulation boundary following 233 | the last body part is a distinguished 234 | delimiter that indicates that no 235 | further body parts will follow. 236 | Such a delimiter is identical to 237 | the previous delimiters, with 238 | the addition of two more hyphens 239 | at the end of the line: 240 |

241 |      --gc0p4Jq0M2Yt08jU534c0p-- 
242 | 
There appears to be room for additional 243 | information prior to the first 244 | encapsulation boundary and following 245 | the final boundary. These areas 246 | should generally be left blank, 247 | and implementations should ignore 248 | anything that appears before the 249 | first boundary or after the last 250 | one.

251 | NOTE: These "preamble" and "epilogue" 252 | areas are not used because of 253 | the lack of proper typing of these 254 | parts and the lack of clear semantics 255 | for handling these areas at 256 | gateways, particularly X.400 gateways. 257 |

258 | NOTE: Because encapsulation boundaries 259 | must not appear in the body 260 | parts being encapsulated, a user 261 | agent must exercise care to choose 262 | a unique boundary. The boundary 263 | in the example above could have 264 | been the result of an algorithm 265 | designed to produce boundaries with 266 | a very low probability of already 267 | existing in the data to be encapsulated 268 | without having to prescan the 269 | data. Alternate algorithms might 270 | result in more 'readable' boundaries 271 | for a recipient with an old user 272 | agent, but would require more attention 273 | to the possibility that the 274 | boundary might appear in the 275 | encapsulated part. The simplest 276 | boundary possible is something 277 | like "---", with a closing boundary 278 | of "-----".

279 | As a very simple example, the following 280 | multipart message has two parts, 281 | both of them plain text, one 282 | of them explicitly typed and one 283 | of them implicitly typed: 284 |

285 |      From: Nathaniel Borenstein <nsb@bellcore.com> 
286 |      To:  Ned Freed <ned@innosoft.com> 
287 |      Subject: Sample message 
288 |      MIME-Version: 1.0 
289 |      Content-type: multipart/mixed; boundary="simple 
290 |      boundary" 
291 | 
292 |      This is the preamble.  It is to be ignored, though it 
293 |      is a handy place for mail composers to include an 
294 |      explanatory note to non-MIME compliant readers. 
295 |      --simple boundary 
296 | 
297 |      This is implicitly typed plain ASCII text. 
298 |      It does NOT end with a linebreak. 
299 |      --simple boundary 
300 |      Content-type: text/plain; charset=us-ascii 
301 | 
302 |      This is explicitly typed plain ASCII text. 
303 |      It DOES end with a linebreak. 
304 | 
305 |      --simple boundary-- 
306 |      This is the epilogue.  It is also to be ignored.
307 | 
308 | 
The use of a Content-Type of multipart 309 | in a body part within another multipart 310 | entity is explicitly allowed. 311 | In such cases, for obvious reasons, 312 | care must be taken to ensure 313 | that each nested multipart entity 314 | must use a different boundary 315 | delimiter. See Appendix C for an 316 | example of nested multipart entities. 317 |

318 | The use of the multipart Content-Type 319 | with only a single body part 320 | may be useful in certain contexts, 321 | and is explicitly permitted.

322 | The only mandatory parameter for 323 | the multipart Content-Type is 324 | the boundary parameter, which 325 | consists of 1 to 70 characters 326 | from a set of characters known to 327 | be very robust through email 328 | gateways, and NOT ending with white 329 | space. (If a boundary appears to 330 | end with white space, the white 331 | space must be presumed to have 332 | been added by a gateway, and should 333 | be deleted.) It is formally 334 | specified by the following BNF: 335 | 336 |

337 | boundary := 0*69<bchars> bcharsnospace 
338 | 
339 | bchars := bcharsnospace / " " 
340 | 
341 | bcharsnospace :=    DIGIT / ALPHA / "'" / "(" / ")" / "+"  / 
342 | "_" 
343 |                / "," / "-" / "." / "/" / ":" / "=" / "?" 
344 | 
345 | 
Overall, the body of a multipart 346 | entity may be specified as follows: 347 | 348 |
349 | multipart-body := preamble 1*encapsulation 
350 |                close-delimiter epilogue 
351 | 
352 | encapsulation := delimiter CRLF body-part 
353 | 
354 | delimiter := CRLF "--" boundary   ; taken from  Content-Type 
355 | field. 
356 |                                ;   when   content-type    is 
357 | multipart 
358 |                              ; There must be no space 
359 |                              ; between "--" and boundary. 
360 | 
361 | close-delimiter := delimiter "--" ; Again, no  space  before 
362 | "--" 
363 | 
364 | preamble :=  *text                  ;  to  be  ignored  upon 
365 | receipt. 
366 | 
367 | epilogue :=  *text                  ;  to  be  ignored  upon 
368 | receipt. 
369 | 
370 | body-part = <"message" as defined in RFC 822, 
371 |          with all header fields optional, and with the 
372 |          specified delimiter not occurring anywhere in 
373 |          the message body, either on a line by itself 
374 |          or as a substring anywhere.  Note that the 
375 |          semantics of a part differ from the semantics 
376 |          of a message, as described in the text.> 
377 | 
378 | 
NOTE: Conspicuously missing from 379 | the multipart type is a notion 380 | of structured, related body parts. 381 | In general, it seems premature 382 | to try to standardize interpart 383 | structure yet. It is recommended 384 | that those wishing to provide a more 385 | structured or integrated multipart 386 | messaging facility should define 387 | a subtype of multipart that 388 | is syntactically identical, but 389 | that always expects the inclusion 390 | of a distinguished part that can 391 | be used to specify the structure 392 | and integration of the other parts, 393 | probably referring to them by 394 | their Content-ID field. If this 395 | approach is used, other implementations 396 | will not recognize the new subtype, 397 | but will treat it as the primary 398 | subtype (multipart/mixed) and will 399 | thus be able to show the user the 400 | parts that are recognized. 401 |

7.2.2 The Multipart/mixed (primary) 402 | subtype

The primary subtype for multipart, 403 | "mixed", is intended for use when 404 | the body parts are independent and 405 | intended to be displayed serially. 406 | Any multipart subtypes that 407 | an implementation does not recognize 408 | should be treated as being of subtype 409 | "mixed". 410 |

7.2.3 The Multipart/alternative 411 | subtype

The multipart/alternative type is 412 | syntactically identical to multipart/mixed, 413 | but the semantics are different. 414 | In particular, each of the parts 415 | is an "alternative" version of 416 | the same information. User agents 417 | should recognize that the content 418 | of the various parts are interchangeable. 419 | The user agent should either 420 | choose the "best" type based on 421 | the user's environment and preferences, 422 | or offer the user the available 423 | alternatives. In general, choosing 424 | the best type means displaying 425 | only the LAST part that can be displayed. 426 | This may be used, for example, 427 | to send mail in a fancy text format 428 | in such a way that it can easily 429 | be displayed anywhere: 430 |
431 | From:  Nathaniel Borenstein <nsb@bellcore.com> 
432 | To: Ned Freed <ned@innosoft.com> 
433 | Subject: Formatted text mail 
434 | MIME-Version: 1.0 
435 | Content-Type: multipart/alternative; boundary=boundary42 
436 | 
437 | 
438 | --boundary42 
439 | Content-Type: text/plain; charset=us-ascii 
440 | 
441 | ...plain text version of message goes here.... 
442 | 
443 | --boundary42 
444 | Content-Type: text/richtext 
445 | 
446 | .... richtext version of same message goes here ... 
447 | --boundary42 
448 | Content-Type: text/x-whatever 
449 | 
450 | .... fanciest formatted version of same  message  goes  here 
451 | ... 
452 | --boundary42-- 
453 | 
454 | 
In this example, users whose mail 455 | system understood the "text/x-whatever" 456 | format would see only the fancy 457 | version, while other users would 458 | see only the richtext or plain text 459 | version, depending on the capabilities 460 | of their system.

461 | In general, user agents that compose 462 | multipart/alternative entities 463 | should place the body parts in increasing 464 | order of preference, that is, with 465 | the preferred format last. For 466 | fancy text, the sending user 467 | agent should put the plainest format 468 | first and the richest format last. 469 | Receiving user agents should 470 | pick and display the last format 471 | they are capable of displaying. 472 | In the case where one of the 473 | alternatives is itself of type 474 | "multipart" and contains unrecognized 475 | sub-parts, the user agent may choose 476 | either to show that alternative, 477 | an earlier alternative, or both. 478 |

479 | NOTE: From an implementor's perspective, 480 | it might seem more sensible to 481 | reverse this ordering, and have 482 | the plainest alternative last. 483 | However, placing the plainest alternative 484 | first is the friendliest 485 | possible option when mutlipart/alternative 486 | entities are viewed using a non-MIME- 487 | compliant mail reader. While this 488 | approach does impose some burden 489 | on compliant mail readers, interoperability 490 | with older mail readers was deemed 491 | to be more important in this case. 492 |

493 | It may be the case that some user 494 | agents, if they can recognize 495 | more than one of the formats, will 496 | prefer to offer the user the choice 497 | of which format to view. This 498 | makes sense, for example, if mail 499 | includes both a nicely-formatted 500 | image version and an easily-edited 501 | text version. What is most 502 | critical, however, is that the user 503 | not automatically be shown multiple 504 | versions of the same data. Either 505 | the user should be shown the 506 | last recognized version or should 507 | explicitly be given the choice. 508 | 509 |

7.2.4 The Multipart/digest subtype 510 |

This document defines a "digest" 511 | subtype of the multipart Content-Type. 512 | This type is syntactically identical 513 | to multipart/mixed, but the 514 | semantics are different. In 515 | particular, in a digest, the default 516 | Content-Type value for a body 517 | part is changed from "text/plain" 518 | to "message/rfc822". This 519 | is done to allow a more readable 520 | digest format that is largely 521 | compatible (except for the quoting 522 | convention) with RFC 934.

523 | A digest in this format might, then, 524 | look something like this: 525 |

526 | From: Moderator-Address 
527 | MIME-Version: 1.0 
528 | Subject:  Internet Digest, volume 42 
529 | Content-Type: multipart/digest; 
530 |      boundary="---- next message ----" 
531 | 
532 | 
533 | ------ next message ---- 
534 | 
535 | From: someone-else 
536 | Subject: my opinion 
537 | 
538 | ...body goes here ... 
539 | 
540 | ------ next message ---- 
541 | 
542 | From: someone-else-again 
543 | Subject: my different opinion 
544 | 
545 | ... another body goes here... 
546 | 
547 | ------ next message ------
548 | 
549 | 
550 | 
551 |

7.2.5 The Multipart/parallel 553 | subtype

This document defines a "parallel" 554 | subtype of the multipart Content-Type. 555 | This type is syntactically identical 556 | to multipart/mixed, but the 557 | semantics are different. In 558 | particular, in a parallel entity, 559 | all of the parts are intended 560 | to be presented in parallel, i.e., 561 | simultaneously, on hardware and 562 | software that are capable of 563 | doing so. Composing agents should 564 | be aware that many mail readers will 565 | lack this capability and will show 566 | the parts serially in any event. 567 | 568 | 569 | -------------------------------------------------------------------------------- /rfc/2047-message-header-extensions-for-non-ascii-text.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Network Working Group K. Moore 8 | Request for Comments: 2047 University of Tennessee 9 | Obsoletes: 1521, 1522, 1590 November 1996 10 | Category: Standards Track 11 | 12 | 13 | MIME (Multipurpose Internet Mail Extensions) Part Three: 14 | Message Header Extensions for Non-ASCII Text 15 | 16 | Status of this Memo 17 | 18 | This document specifies an Internet standards track protocol for the 19 | Internet community, and requests discussion and suggestions for 20 | improvements. Please refer to the current edition of the "Internet 21 | Official Protocol Standards" (STD 1) for the standardization state 22 | and status of this protocol. Distribution of this memo is unlimited. 23 | 24 | Abstract 25 | 26 | STD 11, RFC 822, defines a message representation protocol specifying 27 | considerable detail about US-ASCII message headers, and leaves the 28 | message content, or message body, as flat US-ASCII text. This set of 29 | documents, collectively called the Multipurpose Internet Mail 30 | Extensions, or MIME, redefines the format of messages to allow for 31 | 32 | (1) textual message bodies in character sets other than US-ASCII, 33 | 34 | (2) an extensible set of different formats for non-textual message 35 | bodies, 36 | 37 | (3) multi-part message bodies, and 38 | 39 | (4) textual header information in character sets other than US-ASCII. 40 | 41 | These documents are based on earlier work documented in RFC 934, STD 42 | 11, and RFC 1049, but extends and revises them. Because RFC 822 said 43 | so little about message bodies, these documents are largely 44 | orthogonal to (rather than a revision of) RFC 822. 45 | 46 | This particular document is the third document in the series. It 47 | describes extensions to RFC 822 to allow non-US-ASCII text data in 48 | Internet mail header fields. 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | Moore Standards Track [Page 1] 59 | 60 | RFC 2047 Message Header Extensions November 1996 61 | 62 | 63 | Other documents in this series include: 64 | 65 | + RFC 2045, which specifies the various headers used to describe 66 | the structure of MIME messages. 67 | 68 | + RFC 2046, which defines the general structure of the MIME media 69 | typing system and defines an initial set of media types, 70 | 71 | + RFC 2048, which specifies various IANA registration procedures 72 | for MIME-related facilities, and 73 | 74 | + RFC 2049, which describes MIME conformance criteria and 75 | provides some illustrative examples of MIME message formats, 76 | acknowledgements, and the bibliography. 77 | 78 | These documents are revisions of RFCs 1521, 1522, and 1590, which 79 | themselves were revisions of RFCs 1341 and 1342. An appendix in RFC 80 | 2049 describes differences and changes from previous versions. 81 | 82 | 1. Introduction 83 | 84 | RFC 2045 describes a mechanism for denoting textual body parts which 85 | are coded in various character sets, as well as methods for encoding 86 | such body parts as sequences of printable US-ASCII characters. This 87 | memo describes similar techniques to allow the encoding of non-ASCII 88 | text in various portions of a RFC 822 [2] message header, in a manner 89 | which is unlikely to confuse existing message handling software. 90 | 91 | Like the encoding techniques described in RFC 2045, the techniques 92 | outlined here were designed to allow the use of non-ASCII characters 93 | in message headers in a way which is unlikely to be disturbed by the 94 | quirks of existing Internet mail handling programs. In particular, 95 | some mail relaying programs are known to (a) delete some message 96 | header fields while retaining others, (b) rearrange the order of 97 | addresses in To or Cc fields, (c) rearrange the (vertical) order of 98 | header fields, and/or (d) "wrap" message headers at different places 99 | than those in the original message. In addition, some mail reading 100 | programs are known to have difficulty correctly parsing message 101 | headers which, while legal according to RFC 822, make use of 102 | backslash-quoting to "hide" special characters such as "<", ",", or 103 | ":", or which exploit other infrequently-used features of that 104 | specification. 105 | 106 | While it is unfortunate that these programs do not correctly 107 | interpret RFC 822 headers, to "break" these programs would cause 108 | severe operational problems for the Internet mail system. The 109 | extensions described in this memo therefore do not rely on little- 110 | used features of RFC 822. 111 | 112 | 113 | 114 | Moore Standards Track [Page 2] 115 | 116 | RFC 2047 Message Header Extensions November 1996 117 | 118 | 119 | Instead, certain sequences of "ordinary" printable ASCII characters 120 | (known as "encoded-words") are reserved for use as encoded data. The 121 | syntax of encoded-words is such that they are unlikely to 122 | "accidentally" appear as normal text in message headers. 123 | Furthermore, the characters used in encoded-words are restricted to 124 | those which do not have special meanings in the context in which the 125 | encoded-word appears. 126 | 127 | Generally, an "encoded-word" is a sequence of printable ASCII 128 | characters that begins with "=?", ends with "?=", and has two "?"s in 129 | between. It specifies a character set and an encoding method, and 130 | also includes the original text encoded as graphic ASCII characters, 131 | according to the rules for that encoding method. 132 | 133 | A mail composer that implements this specification will provide a 134 | means of inputting non-ASCII text in header fields, but will 135 | translate these fields (or appropriate portions of these fields) into 136 | encoded-words before inserting them into the message header. 137 | 138 | A mail reader that implements this specification will recognize 139 | encoded-words when they appear in certain portions of the message 140 | header. Instead of displaying the encoded-word "as is", it will 141 | reverse the encoding and display the original text in the designated 142 | character set. 143 | 144 | NOTES 145 | 146 | This memo relies heavily on notation and terms defined RFC 822 and 147 | RFC 2045. In particular, the syntax for the ABNF used in this memo 148 | is defined in RFC 822, as well as many of the terminal or nonterminal 149 | symbols from RFC 822 are used in the grammar for the header 150 | extensions defined here. Among the symbols defined in RFC 822 and 151 | referenced in this memo are: 'addr-spec', 'atom', 'CHAR', 'comment', 152 | 'CTLs', 'ctext', 'linear-white-space', 'phrase', 'quoted-pair'. 153 | 'quoted-string', 'SPACE', and 'word'. Successful implementation of 154 | this protocol extension requires careful attention to the RFC 822 155 | definitions of these terms. 156 | 157 | When the term "ASCII" appears in this memo, it refers to the "7-Bit 158 | American Standard Code for Information Interchange", ANSI X3.4-1986. 159 | The MIME charset name for this character set is "US-ASCII". When not 160 | specifically referring to the MIME charset name, this document uses 161 | the term "ASCII", both for brevity and for consistency with RFC 822. 162 | However, implementors are warned that the character set name must be 163 | spelled "US-ASCII" in MIME message and body part headers. 164 | 165 | 166 | 167 | 168 | 169 | 170 | Moore Standards Track [Page 3] 171 | 172 | RFC 2047 Message Header Extensions November 1996 173 | 174 | 175 | This memo specifies a protocol for the representation of non-ASCII 176 | text in message headers. It specifically DOES NOT define any 177 | translation between "8-bit headers" and pure ASCII headers, nor is 178 | any such translation assumed to be possible. 179 | 180 | 2. Syntax of encoded-words 181 | 182 | An 'encoded-word' is defined by the following ABNF grammar. The 183 | notation of RFC 822 is used, with the exception that white space 184 | characters MUST NOT appear between components of an 'encoded-word'. 185 | 186 | encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" 187 | 188 | charset = token ; see section 3 189 | 190 | encoding = token ; see section 4 191 | 192 | token = 1* 193 | 194 | especials = "(" / ")" / "<" / ">" / "@" / "," / ";" / ":" / " 195 | <"> / "/" / "[" / "]" / "?" / "." / "=" 196 | 197 | encoded-text = 1* 199 | ; (but see "Use of encoded-words in message 200 | ; headers", section 5) 201 | 202 | Both 'encoding' and 'charset' names are case-independent. Thus the 203 | charset name "ISO-8859-1" is equivalent to "iso-8859-1", and the 204 | encoding named "Q" may be spelled either "Q" or "q". 205 | 206 | An 'encoded-word' may not be more than 75 characters long, including 207 | 'charset', 'encoding', 'encoded-text', and delimiters. If it is 208 | desirable to encode more text than will fit in an 'encoded-word' of 209 | 75 characters, multiple 'encoded-word's (separated by CRLF SPACE) may 210 | be used. 211 | 212 | While there is no limit to the length of a multiple-line header 213 | field, each line of a header field that contains one or more 214 | 'encoded-word's is limited to 76 characters. 215 | 216 | The length restrictions are included both to ease interoperability 217 | through internetwork mail gateways, and to impose a limit on the 218 | amount of lookahead a header parser must employ (while looking for a 219 | final ?= delimiter) before it can decide whether a token is an 220 | "encoded-word" or something else. 221 | 222 | 223 | 224 | 225 | 226 | Moore Standards Track [Page 4] 227 | 228 | RFC 2047 Message Header Extensions November 1996 229 | 230 | 231 | IMPORTANT: 'encoded-word's are designed to be recognized as 'atom's 232 | by an RFC 822 parser. As a consequence, unencoded white space 233 | characters (such as SPACE and HTAB) are FORBIDDEN within an 234 | 'encoded-word'. For example, the character sequence 235 | 236 | =?iso-8859-1?q?this is some text?= 237 | 238 | would be parsed as four 'atom's, rather than as a single 'atom' (by 239 | an RFC 822 parser) or 'encoded-word' (by a parser which understands 240 | 'encoded-words'). The correct way to encode the string "this is some 241 | text" is to encode the SPACE characters as well, e.g. 242 | 243 | =?iso-8859-1?q?this=20is=20some=20text?= 244 | 245 | The characters which may appear in 'encoded-text' are further 246 | restricted by the rules in section 5. 247 | 248 | 3. Character sets 249 | 250 | The 'charset' portion of an 'encoded-word' specifies the character 251 | set associated with the unencoded text. A 'charset' can be any of 252 | the character set names allowed in an MIME "charset" parameter of a 253 | "text/plain" body part, or any character set name registered with 254 | IANA for use with the MIME text/plain content-type. 255 | 256 | Some character sets use code-switching techniques to switch between 257 | "ASCII mode" and other modes. If unencoded text in an 'encoded-word' 258 | contains a sequence which causes the charset interpreter to switch 259 | out of ASCII mode, it MUST contain additional control codes such that 260 | ASCII mode is again selected at the end of the 'encoded-word'. (This 261 | rule applies separately to each 'encoded-word', including adjacent 262 | 'encoded-word's within a single header field.) 263 | 264 | When there is a possibility of using more than one character set to 265 | represent the text in an 'encoded-word', and in the absence of 266 | private agreements between sender and recipients of a message, it is 267 | recommended that members of the ISO-8859-* series be used in 268 | preference to other character sets. 269 | 270 | 4. Encodings 271 | 272 | Initially, the legal values for "encoding" are "Q" and "B". These 273 | encodings are described below. The "Q" encoding is recommended for 274 | use when most of the characters to be encoded are in the ASCII 275 | character set; otherwise, the "B" encoding should be used. 276 | Nevertheless, a mail reader which claims to recognize 'encoded-word's 277 | MUST be able to accept either encoding for any character set which it 278 | supports. 279 | 280 | 281 | 282 | Moore Standards Track [Page 5] 283 | 284 | RFC 2047 Message Header Extensions November 1996 285 | 286 | 287 | Only a subset of the printable ASCII characters may be used in 288 | 'encoded-text'. Space and tab characters are not allowed, so that 289 | the beginning and end of an 'encoded-word' are obvious. The "?" 290 | character is used within an 'encoded-word' to separate the various 291 | portions of the 'encoded-word' from one another, and thus cannot 292 | appear in the 'encoded-text' portion. Other characters are also 293 | illegal in certain contexts. For example, an 'encoded-word' in a 294 | 'phrase' preceding an address in a From header field may not contain 295 | any of the "specials" defined in RFC 822. Finally, certain other 296 | characters are disallowed in some contexts, to ensure reliability for 297 | messages that pass through internetwork mail gateways. 298 | 299 | The "B" encoding automatically meets these requirements. The "Q" 300 | encoding allows a wide range of printable characters to be used in 301 | non-critical locations in the message header (e.g., Subject), with 302 | fewer characters available for use in other locations. 303 | 304 | 4.1. The "B" encoding 305 | 306 | The "B" encoding is identical to the "BASE64" encoding defined by RFC 307 | 2045. 308 | 309 | 4.2. The "Q" encoding 310 | 311 | The "Q" encoding is similar to the "Quoted-Printable" content- 312 | transfer-encoding defined in RFC 2045. It is designed to allow text 313 | containing mostly ASCII characters to be decipherable on an ASCII 314 | terminal without decoding. 315 | 316 | (1) Any 8-bit value may be represented by a "=" followed by two 317 | hexadecimal digits. For example, if the character set in use 318 | were ISO-8859-1, the "=" character would thus be encoded as 319 | "=3D", and a SPACE by "=20". (Upper case should be used for 320 | hexadecimal digits "A" through "F".) 321 | 322 | (2) The 8-bit hexadecimal value 20 (e.g., ISO-8859-1 SPACE) may be 323 | represented as "_" (underscore, ASCII 95.). (This character may 324 | not pass through some internetwork mail gateways, but its use 325 | will greatly enhance readability of "Q" encoded data with mail 326 | readers that do not support this encoding.) Note that the "_" 327 | always represents hexadecimal 20, even if the SPACE character 328 | occupies a different code position in the character set in use. 329 | 330 | (3) 8-bit values which correspond to printable ASCII characters other 331 | than "=", "?", and "_" (underscore), MAY be represented as those 332 | characters. (But see section 5 for restrictions.) In 333 | particular, SPACE and TAB MUST NOT be represented as themselves 334 | within encoded words. 335 | 336 | 337 | 338 | Moore Standards Track [Page 6] 339 | 340 | RFC 2047 Message Header Extensions November 1996 341 | 342 | 343 | 5. Use of encoded-words in message headers 344 | 345 | An 'encoded-word' may appear in a message header or body part header 346 | according to the following rules: 347 | 348 | (1) An 'encoded-word' may replace a 'text' token (as defined by RFC 822) 349 | in any Subject or Comments header field, any extension message 350 | header field, or any MIME body part field for which the field body 351 | is defined as '*text'. An 'encoded-word' may also appear in any 352 | user-defined ("X-") message or body part header field. 353 | 354 | Ordinary ASCII text and 'encoded-word's may appear together in the 355 | same header field. However, an 'encoded-word' that appears in a 356 | header field defined as '*text' MUST be separated from any adjacent 357 | 'encoded-word' or 'text' by 'linear-white-space'. 358 | 359 | (2) An 'encoded-word' may appear within a 'comment' delimited by "(" and 360 | ")", i.e., wherever a 'ctext' is allowed. More precisely, the RFC 361 | 822 ABNF definition for 'comment' is amended as follows: 362 | 363 | comment = "(" *(ctext / quoted-pair / comment / encoded-word) ")" 364 | 365 | A "Q"-encoded 'encoded-word' which appears in a 'comment' MUST NOT 366 | contain the characters "(", ")" or " 367 | 'encoded-word' that appears in a 'comment' MUST be separated from 368 | any adjacent 'encoded-word' or 'ctext' by 'linear-white-space'. 369 | 370 | It is important to note that 'comment's are only recognized inside 371 | "structured" field bodies. In fields whose bodies are defined as 372 | '*text', "(" and ")" are treated as ordinary characters rather than 373 | comment delimiters, and rule (1) of this section applies. (See RFC 374 | 822, sections 3.1.2 and 3.1.3) 375 | 376 | (3) As a replacement for a 'word' entity within a 'phrase', for example, 377 | one that precedes an address in a From, To, or Cc header. The ABNF 378 | definition for 'phrase' from RFC 822 thus becomes: 379 | 380 | phrase = 1*( encoded-word / word ) 381 | 382 | In this case the set of characters that may be used in a "Q"-encoded 383 | 'encoded-word' is restricted to: . An 'encoded-word' that appears within a 386 | 'phrase' MUST be separated from any adjacent 'word', 'text' or 387 | 'special' by 'linear-white-space'. 388 | 389 | 390 | 391 | 392 | 393 | 394 | Moore Standards Track [Page 7] 395 | 396 | RFC 2047 Message Header Extensions November 1996 397 | 398 | 399 | These are the ONLY locations where an 'encoded-word' may appear. In 400 | particular: 401 | 402 | + An 'encoded-word' MUST NOT appear in any portion of an 'addr-spec'. 403 | 404 | + An 'encoded-word' MUST NOT appear within a 'quoted-string'. 405 | 406 | + An 'encoded-word' MUST NOT be used in a Received header field. 407 | 408 | + An 'encoded-word' MUST NOT be used in parameter of a MIME 409 | Content-Type or Content-Disposition field, or in any structured 410 | field body except within a 'comment' or 'phrase'. 411 | 412 | The 'encoded-text' in an 'encoded-word' must be self-contained; 413 | 'encoded-text' MUST NOT be continued from one 'encoded-word' to 414 | another. This implies that the 'encoded-text' portion of a "B" 415 | 'encoded-word' will be a multiple of 4 characters long; for a "Q" 416 | 'encoded-word', any "=" character that appears in the 'encoded-text' 417 | portion will be followed by two hexadecimal characters. 418 | 419 | Each 'encoded-word' MUST encode an integral number of octets. The 420 | 'encoded-text' in each 'encoded-word' must be well-formed according 421 | to the encoding specified; the 'encoded-text' may not be continued in 422 | the next 'encoded-word'. (For example, "=?charset?Q?=?= 423 | =?charset?Q?AB?=" would be illegal, because the two hex digits "AB" 424 | must follow the "=" in the same 'encoded-word'.) 425 | 426 | Each 'encoded-word' MUST represent an integral number of characters. 427 | A multi-octet character may not be split across adjacent 'encoded- 428 | word's. 429 | 430 | Only printable and white space character data should be encoded using 431 | this scheme. However, since these encoding schemes allow the 432 | encoding of arbitrary octet values, mail readers that implement this 433 | decoding should also ensure that display of the decoded data on the 434 | recipient's terminal will not cause unwanted side-effects. 435 | 436 | Use of these methods to encode non-textual data (e.g., pictures or 437 | sounds) is not defined by this memo. Use of 'encoded-word's to 438 | represent strings of purely ASCII characters is allowed, but 439 | discouraged. In rare cases it may be necessary to encode ordinary 440 | text that looks like an 'encoded-word'. 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | Moore Standards Track [Page 8] 451 | 452 | RFC 2047 Message Header Extensions November 1996 453 | 454 | 455 | 6. Support of 'encoded-word's by mail readers 456 | 457 | 6.1. Recognition of 'encoded-word's in message headers 458 | 459 | A mail reader must parse the message and body part headers according 460 | to the rules in RFC 822 to correctly recognize 'encoded-word's. 461 | 462 | 'encoded-word's are to be recognized as follows: 463 | 464 | (1) Any message or body part header field defined as '*text', or any 465 | user-defined header field, should be parsed as follows: Beginning 466 | at the start of the field-body and immediately following each 467 | occurrence of 'linear-white-space', each sequence of up to 75 468 | printable characters (not containing any 'linear-white-space') 469 | should be examined to see if it is an 'encoded-word' according to 470 | the syntax rules in section 2. Any other sequence of printable 471 | characters should be treated as ordinary ASCII text. 472 | 473 | (2) Any header field not defined as '*text' should be parsed 474 | according to the syntax rules for that header field. However, 475 | any 'word' that appears within a 'phrase' should be treated as an 476 | 'encoded-word' if it meets the syntax rules in section 2. 477 | Otherwise it should be treated as an ordinary 'word'. 478 | 479 | (3) Within a 'comment', any sequence of up to 75 printable characters 480 | (not containing 'linear-white-space'), that meets the syntax 481 | rules in section 2, should be treated as an 'encoded-word'. 482 | Otherwise it should be treated as normal comment text. 483 | 484 | (4) A MIME-Version header field is NOT required to be present for 485 | 'encoded-word's to be interpreted according to this 486 | specification. One reason for this is that the mail reader is 487 | not expected to parse the entire message header before displaying 488 | lines that may contain 'encoded-word's. 489 | 490 | 6.2. Display of 'encoded-word's 491 | 492 | Any 'encoded-word's so recognized are decoded, and if possible, the 493 | resulting unencoded text is displayed in the original character set. 494 | 495 | NOTE: Decoding and display of encoded-words occurs *after* a 496 | structured field body is parsed into tokens. It is therefore 497 | possible to hide 'special' characters in encoded-words which, when 498 | displayed, will be indistinguishable from 'special' characters in the 499 | surrounding text. For this and other reasons, it is NOT generally 500 | possible to translate a message header containing 'encoded-word's to 501 | an unencoded form which can be parsed by an RFC 822 mail reader. 502 | 503 | 504 | 505 | 506 | Moore Standards Track [Page 9] 507 | 508 | RFC 2047 Message Header Extensions November 1996 509 | 510 | 511 | When displaying a particular header field that contains multiple 512 | 'encoded-word's, any 'linear-white-space' that separates a pair of 513 | adjacent 'encoded-word's is ignored. (This is to allow the use of 514 | multiple 'encoded-word's to represent long strings of unencoded text, 515 | without having to separate 'encoded-word's where spaces occur in the 516 | unencoded text.) 517 | 518 | In the event other encodings are defined in the future, and the mail 519 | reader does not support the encoding used, it may either (a) display 520 | the 'encoded-word' as ordinary text, or (b) substitute an appropriate 521 | message indicating that the text could not be decoded. 522 | 523 | If the mail reader does not support the character set used, it may 524 | (a) display the 'encoded-word' as ordinary text (i.e., as it appears 525 | in the header), (b) make a "best effort" to display using such 526 | characters as are available, or (c) substitute an appropriate message 527 | indicating that the decoded text could not be displayed. 528 | 529 | If the character set being used employs code-switching techniques, 530 | display of the encoded text implicitly begins in "ASCII mode". In 531 | addition, the mail reader must ensure that the output device is once 532 | again in "ASCII mode" after the 'encoded-word' is displayed. 533 | 534 | 6.3. Mail reader handling of incorrectly formed 'encoded-word's 535 | 536 | It is possible that an 'encoded-word' that is legal according to the 537 | syntax defined in section 2, is incorrectly formed according to the 538 | rules for the encoding being used. For example: 539 | 540 | (1) An 'encoded-word' which contains characters which are not legal 541 | for a particular encoding (for example, a "-" in the "B" 542 | encoding, or a SPACE or HTAB in either the "B" or "Q" encoding), 543 | is incorrectly formed. 544 | 545 | (2) Any 'encoded-word' which encodes a non-integral number of 546 | characters or octets is incorrectly formed. 547 | 548 | A mail reader need not attempt to display the text associated with an 549 | 'encoded-word' that is incorrectly formed. However, a mail reader 550 | MUST NOT prevent the display or handling of a message because an 551 | 'encoded-word' is incorrectly formed. 552 | 553 | 7. Conformance 554 | 555 | A mail composing program claiming compliance with this specification 556 | MUST ensure that any string of non-white-space printable ASCII 557 | characters within a '*text' or '*ctext' that begins with "=?" and 558 | ends with "?=" be a valid 'encoded-word'. ("begins" means: at the 559 | 560 | 561 | 562 | Moore Standards Track [Page 10] 563 | 564 | RFC 2047 Message Header Extensions November 1996 565 | 566 | 567 | start of the field-body, immediately following 'linear-white-space', 568 | or immediately following a "(" for an 'encoded-word' within '*ctext'; 569 | "ends" means: at the end of the field-body, immediately preceding 570 | 'linear-white-space', or immediately preceding a ")" for an 571 | 'encoded-word' within '*ctext'.) In addition, any 'word' within a 572 | 'phrase' that begins with "=?" and ends with "?=" must be a valid 573 | 'encoded-word'. 574 | 575 | A mail reading program claiming compliance with this specification 576 | must be able to distinguish 'encoded-word's from 'text', 'ctext', or 577 | 'word's, according to the rules in section 6, anytime they appear in 578 | appropriate places in message headers. It must support both the "B" 579 | and "Q" encodings for any character set which it supports. The 580 | program must be able to display the unencoded text if the character 581 | set is "US-ASCII". For the ISO-8859-* character sets, the mail 582 | reading program must at least be able to display the characters which 583 | are also in the ASCII set. 584 | 585 | 8. Examples 586 | 587 | The following are examples of message headers containing 'encoded- 588 | word's: 589 | 590 | From: =?US-ASCII?Q?Keith_Moore?= 591 | To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= 592 | CC: =?ISO-8859-1?Q?Andr=E9?= Pirard 593 | Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= 594 | =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?= 595 | 596 | Note: In the first 'encoded-word' of the Subject field above, the 597 | last "=" at the end of the 'encoded-text' is necessary because each 598 | 'encoded-word' must be self-contained (the "=" character completes a 599 | group of 4 base64 characters representing 2 octets). An additional 600 | octet could have been encoded in the first 'encoded-word' (so that 601 | the encoded-word would contain an exact multiple of 3 encoded 602 | octets), except that the second 'encoded-word' uses a different 603 | 'charset' than the first one. 604 | 605 | From: =?ISO-8859-1?Q?Olle_J=E4rnefors?= 606 | To: ietf-822@dimacs.rutgers.edu, ojarnef@admin.kth.se 607 | Subject: Time for ISO 10646? 608 | 609 | To: Dave Crocker 610 | Cc: ietf-822@dimacs.rutgers.edu, paf@comsol.se 611 | From: =?ISO-8859-1?Q?Patrik_F=E4ltstr=F6m?= 612 | Subject: Re: RFC-HDR care and feeding 613 | 614 | 615 | 616 | 617 | 618 | Moore Standards Track [Page 11] 619 | 620 | RFC 2047 Message Header Extensions November 1996 621 | 622 | 623 | From: Nathaniel Borenstein 624 | (=?iso-8859-8?b?7eXs+SDv4SDp7Oj08A==?=) 625 | To: Greg Vaudreuil , Ned Freed 626 | , Keith Moore 627 | Subject: Test of new header generator 628 | MIME-Version: 1.0 629 | Content-type: text/plain; charset=ISO-8859-1 630 | 631 | The following examples illustrate how text containing 'encoded-word's 632 | which appear in a structured field body. The rules are slightly 633 | different for fields defined as '*text' because "(" and ")" are not 634 | recognized as 'comment' delimiters. [Section 5, paragraph (1)]. 635 | 636 | In each of the following examples, if the same sequence were to occur 637 | in a '*text' field, the "displayed as" form would NOT be treated as 638 | encoded words, but be identical to the "encoded form". This is 639 | because each of the encoded-words in the following examples is 640 | adjacent to a "(" or ")" character. 641 | 642 | encoded form displayed as 643 | --------------------------------------------------------------------- 644 | (=?ISO-8859-1?Q?a?=) (a) 645 | 646 | (=?ISO-8859-1?Q?a?= b) (a b) 647 | 648 | Within a 'comment', white space MUST appear between an 649 | 'encoded-word' and surrounding text. [Section 5, 650 | paragraph (2)]. However, white space is not needed between 651 | the initial "(" that begins the 'comment', and the 652 | 'encoded-word'. 653 | 654 | 655 | (=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=) (ab) 656 | 657 | White space between adjacent 'encoded-word's is not 658 | displayed. 659 | 660 | (=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=) (ab) 661 | 662 | Even multiple SPACEs between 'encoded-word's are ignored 663 | for the purpose of display. 664 | 665 | (=?ISO-8859-1?Q?a?= (ab) 666 | =?ISO-8859-1?Q?b?=) 667 | 668 | Any amount of linear-space-white between 'encoded-word's, 669 | even if it includes a CRLF followed by one or more SPACEs, 670 | is ignored for the purposes of display. 671 | 672 | 673 | 674 | Moore Standards Track [Page 12] 675 | 676 | RFC 2047 Message Header Extensions November 1996 677 | 678 | 679 | (=?ISO-8859-1?Q?a_b?=) (a b) 680 | 681 | In order to cause a SPACE to be displayed within a portion 682 | of encoded text, the SPACE MUST be encoded as part of the 683 | 'encoded-word'. 684 | 685 | (=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=) (a b) 686 | 687 | In order to cause a SPACE to be displayed between two strings 688 | of encoded text, the SPACE MAY be encoded as part of one of 689 | the 'encoded-word's. 690 | 691 | 9. References 692 | 693 | [RFC 822] Crocker, D., "Standard for the Format of ARPA Internet Text 694 | Messages", STD 11, RFC 822, UDEL, August 1982. 695 | 696 | [RFC 2049] Borenstein, N., and N. Freed, "Multipurpose Internet Mail 697 | Extensions (MIME) Part Five: Conformance Criteria and Examples", 698 | RFC 2049, November 1996. 699 | 700 | [RFC 2045] Borenstein, N., and N. Freed, "Multipurpose Internet Mail 701 | Extensions (MIME) Part One: Format of Internet Message Bodies", 702 | RFC 2045, November 1996. 703 | 704 | [RFC 2046] Borenstein N., and N. Freed, "Multipurpose Internet Mail 705 | Extensions (MIME) Part Two: Media Types", RFC 2046, 706 | November 1996. 707 | 708 | [RFC 2048] Freed, N., Klensin, J., and J. Postel, "Multipurpose 709 | Internet Mail Extensions (MIME) Part Four: Registration 710 | Procedures", RFC 2048, November 1996. 711 | 712 | 713 | 714 | 715 | 716 | 717 | 718 | 719 | 720 | 721 | 722 | 723 | 724 | 725 | 726 | 727 | 728 | 729 | 730 | Moore Standards Track [Page 13] 731 | 732 | RFC 2047 Message Header Extensions November 1996 733 | 734 | 735 | 10. Security Considerations 736 | 737 | Security issues are not discussed in this memo. 738 | 739 | 11. Acknowledgements 740 | 741 | The author wishes to thank Nathaniel Borenstein, Issac Chan, Lutz 742 | Donnerhacke, Paul Eggert, Ned Freed, Andreas M. Kirchwitz, Olle 743 | Jarnefors, Mike Rosin, Yutaka Sato, Bart Schaefer, and Kazuhiko 744 | Yamamoto, for their helpful advice, insightful comments, and 745 | illuminating questions in response to earlier versions of this 746 | specification. 747 | 748 | 12. Author's Address 749 | 750 | Keith Moore 751 | University of Tennessee 752 | 107 Ayres Hall 753 | Knoxville TN 37996-1301 754 | 755 | EMail: moore@cs.utk.edu 756 | 757 | 758 | 759 | 760 | 761 | 762 | 763 | 764 | 765 | 766 | 767 | 768 | 769 | 770 | 771 | 772 | 773 | 774 | 775 | 776 | 777 | 778 | 779 | 780 | 781 | 782 | 783 | 784 | 785 | 786 | Moore Standards Track [Page 14] 787 | 788 | RFC 2047 Message Header Extensions November 1996 789 | 790 | 791 | Appendix - changes since RFC 1522 (in no particular order) 792 | 793 | + explicitly state that the MIME-Version is not requried to use 794 | 'encoded-word's. 795 | 796 | + add explicit note that SPACEs and TABs are not allowed within 797 | 'encoded-word's, explaining that an 'encoded-word' must look like an 798 | 'atom' to an RFC822 parser.values, to be precise). 799 | 800 | + add examples from Olle Jarnefors (thanks!) which illustrate how 801 | encoded-words with adjacent linear-white-space are displayed. 802 | 803 | + explicitly list terms defined in RFC822 and referenced in this memo 804 | 805 | + fix transcription typos that caused one or two lines and a couple of 806 | characters to disappear in the resulting text, due to nroff quirks. 807 | 808 | + clarify that encoded-words are allowed in '*text' fields in both 809 | RFC822 headers and MIME body part headers, but NOT as parameter 810 | values. 811 | 812 | + clarify the requirement to switch back to ASCII within the encoded 813 | portion of an 'encoded-word', for any charset that uses code switching 814 | sequences. 815 | 816 | + add a note about 'encoded-word's being delimited by "(" and ")" 817 | within a comment, but not in a *text (how bizarre!). 818 | 819 | + fix the Andre Pirard example to get rid of the trailing "_" after 820 | the =E9. (no longer needed post-1342). 821 | 822 | + clarification: an 'encoded-word' may appear immediately following 823 | the initial "(" or immediately before the final ")" that delimits a 824 | comment, not just adjacent to "(" and ")" *within* *ctext. 825 | 826 | + add a note to explain that a "B" 'encoded-word' will always have a 827 | multiple of 4 characters in the 'encoded-text' portion. 828 | 829 | + add note about the "=" in the examples 830 | 831 | + note that processing of 'encoded-word's occurs *after* parsing, and 832 | some of the implications thereof. 833 | 834 | + explicitly state that you can't expect to translate between 835 | 1522 and either vanilla 822 or so-called "8-bit headers". 836 | 837 | + explicitly state that 'encoded-word's are not valid within a 838 | 'quoted-string'. 839 | 840 | 841 | 842 | Moore Standards Track [Page 15] 843 | 844 | -------------------------------------------------------------------------------- /rfc/2048-registration-procedures.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Network Working Group N. Freed 8 | Request for Comments: 2048 Innosoft 9 | BCP: 13 J. Klensin 10 | Obsoletes: 1521, 1522, 1590 MCI 11 | Category: Best Current Practice J. Postel 12 | ISI 13 | November 1996 14 | 15 | 16 | Multipurpose Internet Mail Extensions 17 | (MIME) Part Four: 18 | Registration Procedures 19 | 20 | Status of this Memo 21 | 22 | This document specifies an Internet Best Current Practices for the 23 | Internet Community, and requests discussion and suggestions for 24 | improvements. Distribution of this memo is unlimited. 25 | 26 | Abstract 27 | 28 | STD 11, RFC 822, defines a message representation protocol specifying 29 | considerable detail about US-ASCII message headers, and leaves the 30 | message content, or message body, as flat US-ASCII text. This set of 31 | documents, collectively called the Multipurpose Internet Mail 32 | Extensions, or MIME, redefines the format of messages to allow for 33 | 34 | (1) textual message bodies in character sets other than 35 | US-ASCII, 36 | 37 | (2) an extensible set of different formats for non-textual 38 | message bodies, 39 | 40 | (3) multi-part message bodies, and 41 | 42 | (4) textual header information in character sets other than 43 | US-ASCII. 44 | 45 | These documents are based on earlier work documented in RFC 934, STD 46 | 11, and RFC 1049, but extends and revises them. Because RFC 822 said 47 | so little about message bodies, these documents are largely 48 | orthogonal to (rather than a revision of) RFC 822. 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | Freed, et. al. Best Current Practice [Page 1] 59 | 60 | RFC 2048 MIME Registration Procedures November 1996 61 | 62 | 63 | This fourth document, RFC 2048, specifies various IANA registration 64 | procedures for the following MIME facilities: 65 | 66 | (1) media types, 67 | 68 | (2) external body access types, 69 | 70 | (3) content-transfer-encodings. 71 | 72 | Registration of character sets for use in MIME is covered elsewhere 73 | and is no longer addressed by this document. 74 | 75 | These documents are revisions of RFCs 1521 and 1522, which themselves 76 | were revisions of RFCs 1341 and 1342. An appendix in RFC 2049 77 | describes differences and changes from previous versions. 78 | 79 | Table of Contents 80 | 81 | 1. Introduction ......................................... 3 82 | 2. Media Type Registration .............................. 4 83 | 2.1 Registration Trees and Subtype Names ................ 4 84 | 2.1.1 IETF Tree ......................................... 4 85 | 2.1.2 Vendor Tree ....................................... 4 86 | 2.1.3 Personal or Vanity Tree ........................... 5 87 | 2.1.4 Special `x.' Tree ................................. 5 88 | 2.1.5 Additional Registration Trees ..................... 6 89 | 2.2 Registration Requirements ........................... 6 90 | 2.2.1 Functionality Requirement ......................... 6 91 | 2.2.2 Naming Requirements ............................... 6 92 | 2.2.3 Parameter Requirements ............................ 7 93 | 2.2.4 Canonicalization and Format Requirements .......... 7 94 | 2.2.5 Interchange Recommendations ....................... 8 95 | 2.2.6 Security Requirements ............................. 8 96 | 2.2.7 Usage and Implementation Non-requirements ......... 9 97 | 2.2.8 Publication Requirements .......................... 10 98 | 2.2.9 Additional Information ............................ 10 99 | 2.3 Registration Procedure .............................. 11 100 | 2.3.1 Present the Media Type to the Community for Review 11 101 | 2.3.2 IESG Approval ..................................... 12 102 | 2.3.3 IANA Registration ................................. 12 103 | 2.4 Comments on Media Type Registrations ................ 12 104 | 2.5 Location of Registered Media Type List .............. 12 105 | 2.6 IANA Procedures for Registering Media Types ......... 12 106 | 2.7 Change Control ...................................... 13 107 | 2.8 Registration Template ............................... 14 108 | 3. External Body Access Types ........................... 14 109 | 3.1 Registration Requirements ........................... 15 110 | 3.1.1 Naming Requirements ............................... 15 111 | 112 | 113 | 114 | Freed, et. al. Best Current Practice [Page 2] 115 | 116 | RFC 2048 MIME Registration Procedures November 1996 117 | 118 | 119 | 3.1.2 Mechanism Specification Requirements .............. 15 120 | 3.1.3 Publication Requirements .......................... 15 121 | 3.1.4 Security Requirements ............................. 15 122 | 3.2 Registration Procedure .............................. 15 123 | 3.2.1 Present the Access Type to the Community .......... 16 124 | 3.2.2 Access Type Reviewer .............................. 16 125 | 3.2.3 IANA Registration ................................. 16 126 | 3.3 Location of Registered Access Type List ............. 16 127 | 3.4 IANA Procedures for Registering Access Types ........ 16 128 | 4. Transfer Encodings ................................... 17 129 | 4.1 Transfer Encoding Requirements ...................... 17 130 | 4.1.1 Naming Requirements ............................... 17 131 | 4.1.2 Algorithm Specification Requirements .............. 18 132 | 4.1.3 Input Domain Requirements ......................... 18 133 | 4.1.4 Output Range Requirements ......................... 18 134 | 4.1.5 Data Integrity and Generality Requirements ........ 18 135 | 4.1.6 New Functionality Requirements .................... 18 136 | 4.2 Transfer Encoding Definition Procedure .............. 19 137 | 4.3 IANA Procedures for Transfer Encoding Registration... 19 138 | 4.4 Location of Registered Transfer Encodings List ...... 19 139 | 5. Authors' Addresses ................................... 20 140 | A. Grandfathered Media Types ............................ 21 141 | 142 | 1. Introduction 143 | 144 | Recent Internet protocols have been carefully designed to be easily 145 | extensible in certain areas. In particular, MIME [RFC 2045] is an 146 | open-ended framework and can accommodate additional object types, 147 | character sets, and access methods without any changes to the basic 148 | protocol. A registration process is needed, however, to ensure that 149 | the set of such values is developed in an orderly, well-specified, 150 | and public manner. 151 | 152 | This document defines registration procedures which use the Internet 153 | Assigned Numbers Authority (IANA) as a central registry for such 154 | values. 155 | 156 | Historical Note: The registration process for media types was 157 | initially defined in the context of the asynchronous Internet mail 158 | environment. In this mail environment there is a need to limit the 159 | number of possible media types to increase the likelihood of 160 | interoperability when the capabilities of the remote mail system are 161 | not known. As media types are used in new environments, where the 162 | proliferation of media types is not a hindrance to interoperability, 163 | the original procedure was excessively restrictive and had to be 164 | generalized. 165 | 166 | 167 | 168 | 169 | 170 | Freed, et. al. Best Current Practice [Page 3] 171 | 172 | RFC 2048 MIME Registration Procedures November 1996 173 | 174 | 175 | 2. Media Type Registration 176 | 177 | Registration of a new media type or types starts with the 178 | construction of a registration proposal. Registration may occur in 179 | several different registration trees, which have different 180 | requirements as discussed below. In general, the new registration 181 | proposal is circulated and reviewed in a fashion appropriate to the 182 | tree involved. The media type is then registered if the proposal is 183 | acceptable. The following sections describe the requirements and 184 | procedures used for each of the different registration trees. 185 | 186 | 2.1. Registration Trees and Subtype Names 187 | 188 | In order to increase the efficiency and flexibility of the 189 | registration process, different structures of subtype names may be 190 | registered to accomodate the different natural requirements for, 191 | e.g., a subtype that will be recommended for wide support and 192 | implementation by the Internet Community or a subtype that is used to 193 | move files associated with proprietary software. The following 194 | subsections define registration "trees", distinguished by the use of 195 | faceted names (e.g., names of the form "tree.subtree...type"). Note 196 | that some media types defined prior to this document do not conform 197 | to the naming conventions described below. See Appendix A for a 198 | discussion of them. 199 | 200 | 2.1.1. IETF Tree 201 | 202 | The IETF tree is intended for types of general interest to the 203 | Internet Community. Registration in the IETF tree requires approval 204 | by the IESG and publication of the media type registration as some 205 | form of RFC. 206 | 207 | Media types in the IETF tree are normally denoted by names that are 208 | not explicitly faceted, i.e., do not contain period (".", full stop) 209 | characters. 210 | 211 | The "owner" of a media type registration in the IETF tree is assumed 212 | to be the IETF itself. Modification or alteration of the 213 | specification requires the same level of processing (e.g. standards 214 | track) required for the initial registration. 215 | 216 | 2.1.2. Vendor Tree 217 | 218 | The vendor tree is used for media types associated with commercially 219 | available products. "Vendor" or "producer" are construed as 220 | equivalent and very broadly in this context. 221 | 222 | 223 | 224 | 225 | 226 | Freed, et. al. Best Current Practice [Page 4] 227 | 228 | RFC 2048 MIME Registration Procedures November 1996 229 | 230 | 231 | A registration may be placed in the vendor tree by anyone who has 232 | need to interchange files associated with the particular product. 233 | However, the registration formally belongs to the vendor or 234 | organization producing the software or file format. Changes to the 235 | specification will be made at their request, as discussed in 236 | subsequent sections. 237 | 238 | Registrations in the vendor tree will be distinguished by the leading 239 | facet "vnd.". That may be followed, at the discretion of the 240 | registration, by either a media type name from a well-known producer 241 | (e.g., "vnd.mudpie") or by an IANA-approved designation of the 242 | producer's name which is then followed by a media type or product 243 | designation (e.g., vnd.bigcompany.funnypictures). 244 | 245 | While public exposure and review of media types to be registered in 246 | the vendor tree is not required, using the ietf-types list for review 247 | is strongly encouraged to improve the quality of those 248 | specifications. Registrations in the vendor tree may be submitted 249 | directly to the IANA. 250 | 251 | 2.1.3. Personal or Vanity Tree 252 | 253 | Registrations for media types created experimentally or as part of 254 | products that are not distributed commercially may be registered in 255 | the personal or vanity tree. The registrations are distinguished by 256 | the leading facet "prs.". 257 | 258 | The owner of "personal" registrations and associated specifications 259 | is the person or entity making the registration, or one to whom 260 | responsibility has been transferred as described below. 261 | 262 | While public exposure and review of media types to be registered in 263 | the personal tree is not required, using the ietf-types list for 264 | review is strongly encouraged to improve the quality of those 265 | specifications. Registrations in the personl tree may be submitted 266 | directly to the IANA. 267 | 268 | 2.1.4. Special `x.' Tree 269 | 270 | For convenience and symmetry with this registration scheme, media 271 | type names with "x." as the first facet may be used for the same 272 | purposes for which names starting in "x-" are normally used. These 273 | types are unregistered, experimental, and should be used only with 274 | the active agreement of the parties exchanging them. 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | Freed, et. al. Best Current Practice [Page 5] 283 | 284 | RFC 2048 MIME Registration Procedures November 1996 285 | 286 | 287 | However, with the simplified registration procedures described above 288 | for vendor and personal trees, it should rarely, if ever, be 289 | necessary to use unregistered experimental types, and as such use of 290 | both "x-" and "x." forms is discouraged. 291 | 292 | 2.1.5. Additional Registration Trees 293 | 294 | From time to time and as required by the community, the IANA may, 295 | with the advice and consent of the IESG, create new top-level 296 | registration trees. It is explicitly assumed that these trees may be 297 | created for external registration and management by well-known 298 | permanent bodies, such as scientific societies for media types 299 | specific to the sciences they cover. In general, the quality of 300 | review of specifications for one of these additional registration 301 | trees is expected to be equivalent to that which IETF would give to 302 | registrations in its own tree. Establishment of these new trees will 303 | be announced through RFC publication approved by the IESG. 304 | 305 | 2.2. Registration Requirements 306 | 307 | Media type registration proposals are all expected to conform to 308 | various requirements laid out in the following sections. Note that 309 | requirement specifics sometimes vary depending on the registration 310 | tree, again as detailed in the following sections. 311 | 312 | 2.2.1. Functionality Requirement 313 | 314 | Media types must function as an actual media format: Registration of 315 | things that are better thought of as a transfer encoding, as a 316 | character set, or as a collection of separate entities of another 317 | type, is not allowed. For example, although applications exist to 318 | decode the base64 transfer encoding [RFC 2045], base64 cannot be 319 | registered as a media type. 320 | 321 | This requirement applies regardless of the registration tree 322 | involved. 323 | 324 | 2.2.2. Naming Requirements 325 | 326 | All registered media types must be assigned MIME type and subtype 327 | names. The combination of these names then serves to uniquely 328 | identify the media type and the format of the subtype name identifies 329 | the registration tree. 330 | 331 | The choice of top-level type name must take the nature of media type 332 | involved into account. For example, media normally used for 333 | representing still images should be a subtype of the image content 334 | type, whereas media capable of representing audio information belongs 335 | 336 | 337 | 338 | Freed, et. al. Best Current Practice [Page 6] 339 | 340 | RFC 2048 MIME Registration Procedures November 1996 341 | 342 | 343 | under the audio content type. See RFC 2046 for additional information 344 | on the basic set of top-level types and their characteristics. 345 | 346 | New subtypes of top-level types must conform to the restrictions of 347 | the top-level type, if any. For example, all subtypes of the 348 | multipart content type must use the same encapsulation syntax. 349 | 350 | In some cases a new media type may not "fit" under any currently 351 | defined top-level content type. Such cases are expected to be quite 352 | rare. However, if such a case arises a new top-level type can be 353 | defined to accommodate it. Such a definition must be done via 354 | standards-track RFC; no other mechanism can be used to define 355 | additional top-level content types. 356 | 357 | These requirements apply regardless of the registration tree 358 | involved. 359 | 360 | 2.2.3. Parameter Requirements 361 | 362 | Media types may elect to use one or more MIME content type 363 | parameters, or some parameters may be automatically made available to 364 | the media type by virtue of being a subtype of a content type that 365 | defines a set of parameters applicable to any of its subtypes. In 366 | either case, the names, values, and meanings of any parameters must 367 | be fully specified when a media type is registered in the IETF tree, 368 | and should be specified as completely as possible when media types 369 | are registered in the vendor or personal trees. 370 | 371 | New parameters must not be defined as a way to introduce new 372 | functionality in types registered in the IETF tree, although new 373 | parameters may be added to convey additional information that does 374 | not otherwise change existing functionality. An example of this 375 | would be a "revision" parameter to indicate a revision level of an 376 | external specification such as JPEG. Similar behavior is encouraged 377 | for media types registered in the vendor or personal trees but is not 378 | required. 379 | 380 | 2.2.4. Canonicalization and Format Requirements 381 | 382 | All registered media types must employ a single, canonical data 383 | format, regardless of registration tree. 384 | 385 | A precise and openly available specification of the format of each 386 | media type is required for all types registered in the IETF tree and 387 | must at a minimum be referenced by, if it isn't actually included in, 388 | the media type registration proposal itself. 389 | 390 | 391 | 392 | 393 | 394 | Freed, et. al. Best Current Practice [Page 7] 395 | 396 | RFC 2048 MIME Registration Procedures November 1996 397 | 398 | 399 | The specifications of format and processing particulars may or may 400 | not be publically available for media types registered in the vendor 401 | tree, and such registration proposals are explicitly permitted to 402 | include only a specification of which software and version produce or 403 | process such media types. References to or inclusion of format 404 | specifications in registration proposals is encouraged but not 405 | required. 406 | 407 | Format specifications are still required for registration in the 408 | personal tree, but may be either published as RFCs or otherwise 409 | deposited with IANA. The deposited specifications will meet the same 410 | criteria as those required to register a well-known TCP port and, in 411 | particular, need not be made public. 412 | 413 | Some media types involve the use of patented technology. The 414 | registration of media types involving patented technology is 415 | specifically permitted. However, the restrictions set forth in RFC 416 | 1602 on the use of patented technology in standards-track protocols 417 | must be respected when the specification of a media type is part of a 418 | standards-track protocol. 419 | 420 | 2.2.5. Interchange Recommendations 421 | 422 | Media types should, whenever possible, interoperate across as many 423 | systems and applications as possible. However, some media types will 424 | inevitably have problems interoperating across different platforms. 425 | Problems with different versions, byte ordering, and specifics of 426 | gateway handling can and will arise. 427 | 428 | Universal interoperability of media types is not required, but known 429 | interoperability issues should be identified whenever possible. 430 | Publication of a media type does not require an exhaustive review of 431 | interoperability, and the interoperability considerations section is 432 | subject to continuing evaluation. 433 | 434 | These recommendations apply regardless of the registration tree 435 | involved. 436 | 437 | 2.2.6. Security Requirements 438 | 439 | An analysis of security issues is required for for all types 440 | registered in the IETF Tree. (This is in accordance with the basic 441 | requirements for all IETF protocols.) A similar analysis for media 442 | types registered in the vendor or personal trees is encouraged but 443 | not required. However, regardless of what security analysis has or 444 | has not been done, all descriptions of security issues must be as 445 | accurate as possible regardless of registration tree. In particular, 446 | a statement that there are "no security issues associated with this 447 | 448 | 449 | 450 | Freed, et. al. Best Current Practice [Page 8] 451 | 452 | RFC 2048 MIME Registration Procedures November 1996 453 | 454 | 455 | type" must not be confused with "the security issues associates with 456 | this type have not been assessed". 457 | 458 | There is absolutely no requirement that media types registered in any 459 | tree be secure or completely free from risks. Nevertheless, all 460 | known security risks must be identified in the registration of a 461 | media type, again regardless of registration tree. 462 | 463 | The security considerations section of all registrations is subject 464 | to continuing evaluation and modification, and in particular may be 465 | extended by use of the "comments on media types" mechanism described 466 | in subsequent sections. 467 | 468 | Some of the issues that should be looked at in a security analysis of 469 | a media type are: 470 | 471 | (1) Complex media types may include provisions for 472 | directives that institute actions on a recipient's 473 | files or other resources. In many cases provision is 474 | made for originators to specify arbitrary actions in an 475 | unrestricted fashion which may then have devastating 476 | effects. See the registration of the 477 | application/postscript media type in RFC 2046 for 478 | an example of such directives and how to handle them. 479 | 480 | (2) Complex media types may include provisions for 481 | directives that institute actions which, while not 482 | directly harmful to the recipient, may result in 483 | disclosure of information that either facilitates a 484 | subsequent attack or else violates a recipient's 485 | privacy in some way. Again, the registration of the 486 | application/postscript media type illustrates how such 487 | directives can be handled. 488 | 489 | (3) A media type might be targeted for applications that 490 | require some sort of security assurance but not provide 491 | the necessary security mechanisms themselves. For 492 | example, a media type could be defined for storage of 493 | confidential medical information which in turn requires 494 | an external confidentiality service. 495 | 496 | 2.2.7. Usage and Implementation Non-requirements 497 | 498 | In the asynchronous mail environment, where information on the 499 | capabilities of the remote mail agent is frequently not available to 500 | the sender, maximum interoperability is attained by restricting the 501 | number of media types used to those "common" formats expected to be 502 | widely implemented. This was asserted in the past as a reason to 503 | 504 | 505 | 506 | Freed, et. al. Best Current Practice [Page 9] 507 | 508 | RFC 2048 MIME Registration Procedures November 1996 509 | 510 | 511 | limit the number of possible media types and resulted in a 512 | registration process with a significant hurdle and delay for those 513 | registering media types. 514 | 515 | However, the need for "common" media types does not require limiting 516 | the registration of new media types. If a limited set of media types 517 | is recommended for a particular application, that should be asserted 518 | by a separate applicability statement specific for the application 519 | and/or environment. 520 | 521 | As such, universal support and implementation of a media type is NOT 522 | a requirement for registration. If, however, a media type is 523 | explicitly intended for limited use, this should be noted in its 524 | registration. 525 | 526 | 2.2.8. Publication Requirements 527 | 528 | Proposals for media types registered in the IETF tree must be 529 | published as RFCs. RFC publication of vendor and personal media type 530 | proposals is encouraged but not required. In all cases IANA will 531 | retain copies of all media type proposals and "publish" them as part 532 | of the media types registration tree itself. 533 | 534 | Other than in the IETF tree, the registration of a data type does not 535 | imply endorsement, approval, or recommendation by IANA or IETF or 536 | even certification that the specification is adequate. To become 537 | Internet Standards, protocol, data objects, or whatever must go 538 | through the IETF standards process. This is too difficult and too 539 | lengthy a process for the convenient registration of media types. 540 | 541 | The IETF tree exists for media types that do require require a 542 | substantive review and approval process with the vendor and personal 543 | trees exist for those that do not. It is expected that applicability 544 | statements for particular applications will be published from time to 545 | time that recommend implementation of, and support for, media types 546 | that have proven particularly useful in those contexts. 547 | 548 | As discussed above, registration of a top-level type requires 549 | standards-track processing and, hence, RFC publication. 550 | 551 | 2.2.9. Additional Information 552 | 553 | Various sorts of optional information may be included in the 554 | specification of a media type if it is available: 555 | 556 | (1) Magic number(s) (length, octet values). Magic numbers 557 | are byte sequences that are always present and thus can 558 | be used to identify entities as being of a given media 559 | 560 | 561 | 562 | Freed, et. al. Best Current Practice [Page 10] 563 | 564 | RFC 2048 MIME Registration Procedures November 1996 565 | 566 | 567 | type. 568 | 569 | (2) File extension(s) commonly used on one or more 570 | platforms to indicate that some file containing a given 571 | type of media. 572 | 573 | (3) Macintosh File Type code(s) (4 octets) used to label 574 | files containing a given type of media. 575 | 576 | Such information is often quite useful to implementors and if 577 | available should be provided. 578 | 579 | 2.3. Registration Procedure 580 | 581 | The following procedure has been implemented by the IANA for review 582 | and approval of new media types. This is not a formal standards 583 | process, but rather an administrative procedure intended to allow 584 | community comment and sanity checking without excessive time delay. 585 | For registration in the IETF tree, the normal IETF processes should 586 | be followed, treating posting of an internet-draft and announcement 587 | on the ietf-types list (as described in the next subsection) as a 588 | first step. For registrations in the vendor or personal tree, the 589 | initial review step described below may be omitted and the type 590 | registered directly by submitting the template and an explanation 591 | directly to IANA (at iana@iana.org). However, authors of vendor or 592 | personal media type specifications are encouraged to seek community 593 | review and comment whenever that is feasible. 594 | 595 | 2.3.1. Present the Media Type to the Community for Review 596 | 597 | Send a proposed media type registration to the "ietf-types@iana.org" 598 | mailing list for a two week review period. This mailing list has 599 | been established for the purpose of reviewing proposed media and 600 | access types. Proposed media types are not formally registered and 601 | must not be used; the "x-" prefix specified in RFC 2045 can be used 602 | until registration is complete. 603 | 604 | The intent of the public posting is to solicit comments and feedback 605 | on the choice of type/subtype name, the unambiguity of the references 606 | with respect to versions and external profiling information, and a 607 | review of any interoperability or security considerations. The 608 | submitter may submit a revised registration, or withdraw the 609 | registration completely, at any time. 610 | 611 | 612 | 613 | 614 | 615 | 616 | 617 | 618 | Freed, et. al. Best Current Practice [Page 11] 619 | 620 | RFC 2048 MIME Registration Procedures November 1996 621 | 622 | 623 | 2.3.2. IESG Approval 624 | 625 | Media types registered in the IETF tree must be submitted to the IESG 626 | for approval. 627 | 628 | 2.3.3. IANA Registration 629 | 630 | Provided that the media type meets the requirements for media types 631 | and has obtained approval that is necessary, the author may submit 632 | the registration request to the IANA, which will register the media 633 | type and make the media type registration available to the community. 634 | 635 | 2.4. Comments on Media Type Registrations 636 | 637 | Comments on registered media types may be submitted by members of the 638 | community to IANA. These comments will be passed on to the "owner" 639 | of the media type if possible. Submitters of comments may request 640 | that their comment be attached to the media type registration itself, 641 | and if IANA approves of this the comment will be made accessible in 642 | conjunction with the type registration itself. 643 | 644 | 2.5. Location of Registered Media Type List 645 | 646 | Media type registrations will be posted in the anonymous FTP 647 | directory "ftp://ftp.isi.edu/in-notes/iana/assignments/media-types/" 648 | and all registered media types will be listed in the periodically 649 | issued "Assigned Numbers" RFC [currently STD 2, RFC 1700]. The media 650 | type description and other supporting material may also be published 651 | as an Informational RFC by sending it to "rfc-editor@isi.edu" (please 652 | follow the instructions to RFC authors [RFC-1543]). 653 | 654 | 2.6. IANA Procedures for Registering Media Types 655 | 656 | The IANA will only register media types in the IETF tree in response 657 | to a communication from the IESG stating that a given registration 658 | has been approved. Vendor and personal types will be registered by 659 | the IANA automatically and without any formal review as long as the 660 | following minimal conditions are met: 661 | 662 | (1) Media types must function as an actual media format. 663 | In particular, character sets and transfer encodings 664 | may not be registered as media types. 665 | 666 | (2) All media types must have properly formed type and 667 | subtype names. All type names must be defined by a 668 | standards-track RFC. All subtype names must be unique, 669 | must conform to the MIME grammar for such names, and 670 | must contain the proper tree prefix. 671 | 672 | 673 | 674 | Freed, et. al. Best Current Practice [Page 12] 675 | 676 | RFC 2048 MIME Registration Procedures November 1996 677 | 678 | 679 | (3) Types registered in the personal tree must either 680 | provide a format specification or a pointer to one. 681 | 682 | (4) Any security considerations given must not be obviously 683 | bogus. (It is neither possible nor necessary for the 684 | IANA to conduct a comprehensive security review of 685 | media type registrations. Nevertheless, IANA has the 686 | authority to identify obviously incompetent material 687 | and exclude it.) 688 | 689 | 2.7. Change Control 690 | 691 | Once a media type has been published by IANA, the author may request 692 | a change to its definition. The descriptions of the different 693 | registration trees above designate the "owners" of each type of 694 | registration. The change request follows the same procedure as the 695 | registration request: 696 | 697 | (1) Publish the revised template on the ietf-types list. 698 | 699 | (2) Leave at least two weeks for comments. 700 | 701 | (3) Publish using IANA after formal review if required. 702 | 703 | Changes should be requested only when there are serious omission or 704 | errors in the published specification. When review is required, a 705 | change request may be denied if it renders entities that were valid 706 | under the previous definition invalid under the new definition. 707 | 708 | The owner of a content type may pass responsibility for the content 709 | type to another person or agency by informing IANA and the ietf-types 710 | list; this can be done without discussion or review. 711 | 712 | The IESG may reassign responsibility for a media type. The most 713 | common case of this will be to enable changes to be made to types 714 | where the author of the registration has died, moved out of contact 715 | or is otherwise unable to make changes that are important to the 716 | community. 717 | 718 | Media type registrations may not be deleted; media types which are no 719 | longer believed appropriate for use can be declared OBSOLETE by a 720 | change to their "intended use" field; such media types will be 721 | clearly marked in the lists published by IANA. 722 | 723 | 724 | 725 | 726 | 727 | 728 | 729 | 730 | Freed, et. al. Best Current Practice [Page 13] 731 | 732 | RFC 2048 MIME Registration Procedures November 1996 733 | 734 | 735 | 2.8. Registration Template 736 | 737 | To: ietf-types@iana.org 738 | Subject: Registration of MIME media type XXX/YYY 739 | 740 | MIME media type name: 741 | 742 | MIME subtype name: 743 | 744 | Required parameters: 745 | 746 | Optional parameters: 747 | 748 | Encoding considerations: 749 | 750 | Security considerations: 751 | 752 | Interoperability considerations: 753 | 754 | Published specification: 755 | 756 | Applications which use this media type: 757 | 758 | Additional information: 759 | 760 | Magic number(s): 761 | File extension(s): 762 | Macintosh File Type Code(s): 763 | 764 | Person & email address to contact for further information: 765 | 766 | Intended usage: 767 | 768 | (One of COMMON, LIMITED USE or OBSOLETE) 769 | 770 | Author/Change controller: 771 | 772 | (Any other information that the author deems interesting may be 773 | added below this line.) 774 | 775 | 3. External Body Access Types 776 | 777 | RFC 2046 defines the message/external-body media type, whereby a MIME 778 | entity can act as pointer to the actual body data in lieu of 779 | including the data directly in the entity body. Each 780 | message/external-body reference specifies an access type, which 781 | determines the mechanism used to retrieve the actual body data. RFC 782 | 2046 defines an initial set of access types, but allows for the 783 | 784 | 785 | 786 | Freed, et. al. Best Current Practice [Page 14] 787 | 788 | RFC 2048 MIME Registration Procedures November 1996 789 | 790 | 791 | registration of additional access types to accommodate new retrieval 792 | mechanisms. 793 | 794 | 3.1. Registration Requirements 795 | 796 | New access type specifications must conform to a number of 797 | requirements as described below. 798 | 799 | 3.1.1. Naming Requirements 800 | 801 | Each access type must have a unique name. This name appears in the 802 | access-type parameter in the message/external-body content-type 803 | header field, and must conform to MIME content type parameter syntax. 804 | 805 | 3.1.2. Mechanism Specification Requirements 806 | 807 | All of the protocols, transports, and procedures used by a given 808 | access type must be described, either in the specification of the 809 | access type itself or in some other publicly available specification, 810 | in sufficient detail for the access type to be implemented by any 811 | competent implementor. Use of secret and/or proprietary methods in 812 | access types are expressly prohibited. The restrictions imposed by 813 | RFC 1602 on the standardization of patented algorithms must be 814 | respected as well. 815 | 816 | 3.1.3. Publication Requirements 817 | 818 | All access types must be described by an RFC. The RFC may be 819 | informational rather than standards-track, although standard-track 820 | review and approval are encouraged for all access types. 821 | 822 | 3.1.4. Security Requirements 823 | 824 | Any known security issues that arise from the use of the access type 825 | must be completely and fully described. It is not required that the 826 | access type be secure or that it be free from risks, but that the 827 | known risks be identified. Publication of a new access type does not 828 | require an exhaustive security review, and the security 829 | considerations section is subject to continuing evaluation. 830 | Additional security considerations should be addressed by publishing 831 | revised versions of the access type specification. 832 | 833 | 3.2. Registration Procedure 834 | 835 | Registration of a new access type starts with the construction of a 836 | draft of an RFC. 837 | 838 | 839 | 840 | 841 | 842 | Freed, et. al. Best Current Practice [Page 15] 843 | 844 | RFC 2048 MIME Registration Procedures November 1996 845 | 846 | 847 | 3.2.1. Present the Access Type to the Community 848 | 849 | Send a proposed access type specification to the "ietf- 850 | types@iana.org" mailing list for a two week review period. This 851 | mailing list has been established for the purpose of reviewing 852 | proposed access and media types. Proposed access types are not 853 | formally registered and must not be used. 854 | 855 | The intent of the public posting is to solicit comments and feedback 856 | on the access type specification and a review of any security 857 | considerations. 858 | 859 | 3.2.2. Access Type Reviewer 860 | 861 | When the two week period has passed, the access type reviewer, who is 862 | appointed by the IETF Applications Area Director, either forwards the 863 | request to iana@isi.edu, or rejects it because of significant 864 | objections raised on the list. 865 | 866 | Decisions made by the reviewer must be posted to the ietf-types 867 | mailing list within 14 days. Decisions made by the reviewer may be 868 | appealed to the IESG. 869 | 870 | 3.2.3. IANA Registration 871 | 872 | Provided that the access type has either passed review or has been 873 | successfully appealed to the IESG, the IANA will register the access 874 | type and make the registration available to the community. The 875 | specification of the access type must also be published as an RFC. 876 | Informational RFCs are published by sending them to "rfc- 877 | editor@isi.edu" (please follow the instructions to RFC authors [RFC- 878 | 1543]). 879 | 880 | 3.3. Location of Registered Access Type List 881 | 882 | Access type registrations will be posted in the anonymous FTP 883 | directory "ftp://ftp.isi.edu/in-notes/iana/assignments/access-types/" 884 | and all registered access types will be listed in the periodically 885 | issued "Assigned Numbers" RFC [currently RFC-1700]. 886 | 887 | 3.4. IANA Procedures for Registering Access Types 888 | 889 | The identity of the access type reviewer is communicated to the IANA 890 | by the IESG. The IANA then only acts in response to access type 891 | definitions that either are approved by the access type reviewer and 892 | forwarded by the reviewer to the IANA for registration, or in 893 | response to a communication from the IESG that an access type 894 | definition appeal has overturned the access type reviewer's ruling. 895 | 896 | 897 | 898 | Freed, et. al. Best Current Practice [Page 16] 899 | 900 | RFC 2048 MIME Registration Procedures November 1996 901 | 902 | 903 | 4. Transfer Encodings 904 | 905 | Transfer encodings are tranformations applied to MIME media types 906 | after conversion to the media type's canonical form. Transfer 907 | encodings are used for several purposes: 908 | 909 | (1) Many transports, especially message transports, can 910 | only handle data consisting of relatively short lines 911 | of text. There can also be severe restrictions on what 912 | characters can be used in these lines of text -- some 913 | transports are restricted to a small subset of US-ASCII 914 | and others cannot handle certain character sequences. 915 | Transfer encodings are used to transform binary data 916 | into textual form that can survive such transports. 917 | Examples of this sort of transfer encoding include the 918 | base64 and quoted-printable transfer encodings defined 919 | in RFC 2045. 920 | 921 | (2) Image, audio, video, and even application entities are 922 | sometimes quite large. Compression algorithms are often 923 | quite effective in reducing the size of large entities. 924 | Transfer encodings can be used to apply general-purpose 925 | non-lossy compression algorithms to MIME entities. 926 | 927 | (3) Transport encodings can be defined as a means of 928 | representing existing encoding formats in a MIME 929 | context. 930 | 931 | IMPORTANT: The standardization of a large numbers of different 932 | transfer encodings is seen as a significant barrier to widespread 933 | interoperability and is expressely discouraged. Nevertheless, the 934 | following procedure has been defined to provide a means of defining 935 | additional transfer encodings, should standardization actually be 936 | justified. 937 | 938 | 4.1. Transfer Encoding Requirements 939 | 940 | Transfer encoding specifications must conform to a number of 941 | requirements as described below. 942 | 943 | 4.1.1. Naming Requirements 944 | 945 | Each transfer encoding must have a unique name. This name appears in 946 | the Content-Transfer-Encoding header field and must conform to the 947 | syntax of that field. 948 | 949 | 950 | 951 | 952 | 953 | 954 | Freed, et. al. Best Current Practice [Page 17] 955 | 956 | RFC 2048 MIME Registration Procedures November 1996 957 | 958 | 959 | 4.1.2. Algorithm Specification Requirements 960 | 961 | All of the algorithms used in a transfer encoding (e.g. conversion 962 | to printable form, compression) must be described in their entirety 963 | in the transfer encoding specification. Use of secret and/or 964 | proprietary algorithms in standardized transfer encodings are 965 | expressly prohibited. The restrictions imposed by RFC 1602 on the 966 | standardization of patented algorithms must be respected as well. 967 | 968 | 4.1.3. Input Domain Requirements 969 | 970 | All transfer encodings must be applicable to an arbitrary sequence of 971 | octets of any length. Dependence on particular input forms is not 972 | allowed. 973 | 974 | It should be noted that the 7bit and 8bit encodings do not conform to 975 | this requirement. Aside from the undesireability of having 976 | specialized encodings, the intent here is to forbid the addition of 977 | additional encodings along the lines of 7bit and 8bit. 978 | 979 | 4.1.4. Output Range Requirements 980 | 981 | There is no requirement that a particular tranfer encoding produce a 982 | particular form of encoded output. However, the output format for 983 | each transfer encoding must be fully and completely documented. In 984 | particular, each specification must clearly state whether the output 985 | format always lies within the confines of 7bit data, 8bit data, or is 986 | simply pure binary data. 987 | 988 | 4.1.5. Data Integrity and Generality Requirements 989 | 990 | All transfer encodings must be fully invertible on any platform; it 991 | must be possible for anyone to recover the original data by 992 | performing the corresponding decoding operation. Note that this 993 | requirement effectively excludes all forms of lossy compression as 994 | well as all forms of encryption from use as a transfer encoding. 995 | 996 | 4.1.6. New Functionality Requirements 997 | 998 | All transfer encodings must provide some sort of new functionality. 999 | Some degree of functionality overlap with previously defined transfer 1000 | encodings is acceptable, but any new transfer encoding must also 1001 | offer something no other transfer encoding provides. 1002 | 1003 | 1004 | 1005 | 1006 | 1007 | 1008 | 1009 | 1010 | Freed, et. al. Best Current Practice [Page 18] 1011 | 1012 | RFC 2048 MIME Registration Procedures November 1996 1013 | 1014 | 1015 | 4.2. Transfer Encoding Definition Procedure 1016 | 1017 | Definition of a new transfer encoding starts with the construction of 1018 | a draft of a standards-track RFC. The RFC must define the transfer 1019 | encoding precisely and completely, and must also provide substantial 1020 | justification for defining and standardizing a new transfer encoding. 1021 | This specification must then be presented to the IESG for 1022 | consideration. The IESG can 1023 | 1024 | (1) reject the specification outright as being 1025 | inappropriate for standardization, 1026 | 1027 | (2) approve the formation of an IETF working group to work 1028 | on the specification in accordance with IETF 1029 | procedures, or, 1030 | 1031 | (3) accept the specification as-is and put it directly on 1032 | the standards track. 1033 | 1034 | Transfer encoding specifications on the standards track follow normal 1035 | IETF rules for standards track documents. A transfer encoding is 1036 | considered to be defined and available for use once it is on the 1037 | standards track. 1038 | 1039 | 4.3. IANA Procedures for Transfer Encoding Registration 1040 | 1041 | There is no need for a special procedure for registering Transfer 1042 | Encodings with the IANA. All legitimate transfer encoding 1043 | registrations must appear as a standards-track RFC, so it is the 1044 | IESG's responsibility to notify the IANA when a new transfer encoding 1045 | has been approved. 1046 | 1047 | 4.4. Location of Registered Transfer Encodings List 1048 | 1049 | Transfer encoding registrations will be posted in the anonymous FTP 1050 | directory "ftp://ftp.isi.edu/in-notes/iana/assignments/transfer- 1051 | encodings/" and all registered transfer encodings will be listed in 1052 | the periodically issued "Assigned Numbers" RFC [currently RFC-1700]. 1053 | 1054 | 1055 | 1056 | 1057 | 1058 | 1059 | 1060 | 1061 | 1062 | 1063 | 1064 | 1065 | 1066 | Freed, et. al. Best Current Practice [Page 19] 1067 | 1068 | RFC 2048 MIME Registration Procedures November 1996 1069 | 1070 | 1071 | 5. Authors' Addresses 1072 | 1073 | For more information, the authors of this document are best 1074 | contacted via Internet mail: 1075 | 1076 | Ned Freed 1077 | Innosoft International, Inc. 1078 | 1050 East Garvey Avenue South 1079 | West Covina, CA 91790 1080 | USA 1081 | 1082 | Phone: +1 818 919 3600 1083 | Fax: +1 818 919 3614 1084 | EMail: ned@innosoft.com 1085 | 1086 | 1087 | John Klensin 1088 | MCI 1089 | 2100 Reston Parkway 1090 | Reston, VA 22091 1091 | 1092 | Phone: +1 703 715-7361 1093 | Fax: +1 703 715-7436 1094 | EMail: klensin@mci.net 1095 | 1096 | 1097 | Jon Postel 1098 | USC/Information Sciences Institute 1099 | 4676 Admiralty Way 1100 | Marina del Rey, CA 90292 1101 | USA 1102 | 1103 | 1104 | Phone: +1 310 822 1511 1105 | Fax: +1 310 823 6714 1106 | EMail: Postel@ISI.EDU 1107 | 1108 | 1109 | 1110 | 1111 | 1112 | 1113 | 1114 | 1115 | 1116 | 1117 | 1118 | 1119 | 1120 | 1121 | 1122 | Freed, et. al. Best Current Practice [Page 20] 1123 | 1124 | RFC 2048 MIME Registration Procedures November 1996 1125 | 1126 | 1127 | Appendix A -- Grandfathered Media Types 1128 | 1129 | A number of media types, registered prior to 1996, would, if 1130 | registered under the guidelines in this document, be placed into 1131 | either the vendor or personal trees. Reregistration of those types 1132 | to reflect the appropriate trees is encouraged, but not required. 1133 | Ownership and change control principles outlined in this document 1134 | apply to those types as if they had been registered in the trees 1135 | described above. 1136 | 1137 | 1138 | 1139 | 1140 | 1141 | 1142 | 1143 | 1144 | 1145 | 1146 | 1147 | 1148 | 1149 | 1150 | 1151 | 1152 | 1153 | 1154 | 1155 | 1156 | 1157 | 1158 | 1159 | 1160 | 1161 | 1162 | 1163 | 1164 | 1165 | 1166 | 1167 | 1168 | 1169 | 1170 | 1171 | 1172 | 1173 | 1174 | 1175 | 1176 | 1177 | 1178 | 1179 | Freed, et. al. Best Current Practice [Page 21] 1180 | 1181 | -------------------------------------------------------------------------------- /rfc/2387-mime-multipart-content-type.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Network Working Group E. Levinson 8 | Request for Comments: 2387 August 1998 9 | Obsoletes: 2112 10 | Category: Standards Track 11 | 12 | 13 | The MIME Multipart/Related Content-type 14 | 15 | Status of this Memo 16 | 17 | This document specifies an Internet standards track protocol for the 18 | Internet community, and requests discussion and suggestions for 19 | improvements. Please refer to the current edition of the "Internet 20 | Official Protocol Standards" (STD 1) for the standardization state 21 | and status of this protocol. Distribution of this memo is unlimited. 22 | 23 | Copyright Notice 24 | 25 | Copyright (C) The Internet Society (1998). All Rights Reserved. 26 | 27 | Abstract 28 | 29 | The Multipart/Related content-type provides a common mechanism for 30 | representing objects that are aggregates of related MIME body parts. 31 | This document defines the Multipart/Related content-type and provides 32 | examples of its use. 33 | 34 | 1. Introduction 35 | 36 | Several applications of MIME, including MIME-PEM, and MIME-Macintosh 37 | and other proposals, require multiple body parts that make sense only 38 | in the aggregate. The present approach to these compound objects has 39 | been to define specific multipart subtypes for each new object. In 40 | keeping with the MIME philosophy of having one mechanism to achieve 41 | the same goal for different purposes, this document describes a 42 | single mechanism for such aggregate or compound objects. 43 | 44 | The Multipart/Related content-type addresses the MIME representation 45 | of compound objects. The object is categorized by a "type" 46 | parameter. Additional parameters are provided to indicate a specific 47 | starting body part or root and auxiliary information which may be 48 | required when unpacking or processing the object. 49 | 50 | Multipart/Related MIME entities may contain Content-Disposition 51 | headers that provide suggestions for the storage and display of a 52 | body part. Multipart/Related processing takes precedence over 53 | Content-Disposition; the interaction between them is discussed in 54 | section 4. 55 | 56 | 57 | 58 | Levinson Standards Track [Page 1] 59 | 60 | RFC 2387 Multipart/Related August 1998 61 | 62 | 63 | Responsibility for the display or processing of a Multipart/Related's 64 | constituent entities rests with the application that handles the 65 | compound object. 66 | 67 | 2. Multipart/Related Registration Information 68 | 69 | The following form is copied from RFC 1590, Appendix A. 70 | 71 | To: IANA@isi.edu 72 | Subject: Registration of new Media Type content-type/subtype 73 | 74 | Media Type name: Multipart 75 | 76 | Media subtype name: Related 77 | 78 | Required parameters: Type, a media type/subtype. 79 | 80 | Optional parameters: Start 81 | Start-info 82 | 83 | Encoding considerations: Multipart content-types cannot have 84 | encodings. 85 | 86 | Security considerations: Depends solely on the referenced type. 87 | 88 | Published specification: RFC-REL (this document). 89 | 90 | Person & email address to contact for further information: 91 | Edward Levinson 92 | 47 Clive Street 93 | Metuchen, NJ 08840-1060 94 | +1 908 494 1606 95 | XIson@cnj.digex.net 96 | 97 | 3. Intended usage 98 | 99 | The Multipart/Related media type is intended for compound objects 100 | consisting of several inter-related body parts. For a 101 | Multipart/Related object, proper display cannot be achieved by 102 | individually displaying the constituent body parts. The content-type 103 | of the Multipart/Related object is specified by the type parameter. 104 | The "start" parameter, if given, points, via a content-ID, to the 105 | body part that contains the object root. The default root is the 106 | first body part within the Multipart/Related body. 107 | 108 | The relationships among the body parts of a compound object 109 | distinguishes it from other object types. These relationships are 110 | often represented by links internal to the object's components that 111 | 112 | 113 | 114 | Levinson Standards Track [Page 2] 115 | 116 | RFC 2387 Multipart/Related August 1998 117 | 118 | 119 | reference the other components. Within a single operating 120 | environment the links are often file names, such links may be 121 | represented within a MIME message using content-IDs or the value of 122 | some other "Content-" headers. 123 | 124 | 3.1. The Type Parameter 125 | 126 | The type parameter must be specified and its value is the MIME media 127 | type of the "root" body part. It permits a MIME user agent to 128 | determine the content-type without reference to the enclosed body 129 | part. If the value of the type parameter and the root body part's 130 | content-type differ then the User Agent's behavior is undefined. 131 | 132 | 3.2. The Start Parameter 133 | 134 | The start parameter, if given, is the content-ID of the compound 135 | object's "root". If not present the "root" is the first body part in 136 | the Multipart/Related entity. The "root" is the element the 137 | applications processes first. 138 | 139 | 3.3. The Start-Info Parameter 140 | 141 | Additional information can be provided to an application by the 142 | start-info parameter. It contains either a string or points, via a 143 | content-ID, to another MIME entity in the message. A typical use 144 | might be to provide additional command line parameters or a MIME 145 | entity giving auxiliary information for processing the compound 146 | object. 147 | 148 | Applications that use Multipart/Related must specify the 149 | interpretation of start-info. User Agents shall provide the 150 | parameter's value to the processing application. Processes can 151 | distinguish a start-info reference from a token or quoted-string by 152 | examining the first non-white-space character, "<" indicates a 153 | reference. 154 | 155 | 3.4. Syntax 156 | 157 | related-param := [ ";" "start" "=" cid ] 158 | [ ";" "start-info" "=" 159 | ( cid-list / value ) ] 160 | [ ";" "type" "=" type "/" subtype ] 161 | ; order independent 162 | 163 | cid-list := cid cid-list 164 | 165 | cid := msg-id ; c.f. [822] 166 | 167 | 168 | 169 | 170 | Levinson Standards Track [Page 3] 171 | 172 | RFC 2387 Multipart/Related August 1998 173 | 174 | 175 | value := token / quoted-string ; c.f. [MIME] 176 | ; value cannot begin with "<" 177 | 178 | Note that the parameter values will usually require quoting. Msg-id 179 | contains the special characters "<", ">", "@", and perhaps other 180 | special characters. If msg-id contains quoted-strings, those quote 181 | marks must be escaped. Similarly, the type parameter contains the 182 | special character "/". 183 | 184 | 4. Handling Content-Disposition Headers 185 | 186 | Content-Disposition Headers [DISP] suggest presentation styles for 187 | MIME body parts. [DISP] describes two presentation styles, called 188 | the disposition type, INLINE and ATTACHMENT. These, used within a 189 | multipart entity, allow the sender to suggest presentation 190 | information. [DISP] also provides for an optional storage (file) 191 | name. Content-Disposition headers could appear in one or more body 192 | parts contained within a Multipart/Related entity. 193 | 194 | Using Content-Disposition headers in addition to Multipart/Related 195 | provides presentation information to User Agents that do not 196 | recognize Multipart/Related. They will treat the multipart as 197 | Multipart/Mixed and they may find the Content-Disposition information 198 | useful. 199 | 200 | With Multipart/Related however, the application processing the 201 | compound object determines the presentation style for all the 202 | contained parts. In that context the Content-Disposition header 203 | information is redundant or even misleading. Hence, User Agents that 204 | understand Multipart/Related shall ignore the disposition type within 205 | a Multipart/Related body part. 206 | 207 | It may be possible for a User Agent capable of handling both 208 | Multipart/Related and Content-Disposition headers to provide the 209 | invoked application the Content-Disposition header's optional 210 | filename parameter to the Multipart/Related. The use of that 211 | information will depend on the specific application and should be 212 | specified when describing the handling of the corresponding compound 213 | object. Such descriptions would be appropriate in an RFC registering 214 | that object's media type. 215 | 216 | 5. Examples 217 | 218 | 5.1 Application/X-FixedRecord 219 | 220 | The X-FixedRecord content-type consists of one or more octet-streams 221 | and a list of the lengths of each record. The root, which lists the 222 | record lengths of each record within the streams. The record length 223 | 224 | 225 | 226 | Levinson Standards Track [Page 4] 227 | 228 | RFC 2387 Multipart/Related August 1998 229 | 230 | 231 | list, type Application/X-FixedRecord, consists of a set of INTEGERs 232 | in ASCII format, one per line. Each INTEGER gives the number of 233 | octets from the octet-stream body part that constitute the next 234 | "record". 235 | 236 | The example below, uses a single data block. 237 | 238 | Content-Type: Multipart/Related; boundary=example-1 239 | start="<950120.aaCC@XIson.com>"; 240 | type="Application/X-FixedRecord" 241 | start-info="-o ps" 242 | 243 | --example-1 244 | Content-Type: Application/X-FixedRecord 245 | Content-ID: <950120.aaCC@XIson.com> 246 | 247 | 25 248 | 10 249 | 34 250 | 10 251 | 25 252 | 21 253 | 26 254 | 10 255 | --example-1 256 | Content-Type: Application/octet-stream 257 | Content-Description: The fixed length records 258 | Content-Transfer-Encoding: base64 259 | Content-ID: <950120.aaCB@XIson.com> 260 | 261 | T2xkIE1hY0RvbmFsZCBoYWQgYSBmYXJtCkUgSS 262 | BFIEkgTwpBbmQgb24gaGlzIGZhcm0gaGUgaGFk 263 | IHNvbWUgZHVja3MKRSBJIEUgSSBPCldpdGggYS 264 | BxdWFjayBxdWFjayBoZXJlLAphIHF1YWNrIHF1 265 | YWNrIHRoZXJlLApldmVyeSB3aGVyZSBhIHF1YW 266 | NrIHF1YWNrCkUgSSBFIEkgTwo= 267 | 268 | --example-1-- 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | Levinson Standards Track [Page 5] 283 | 284 | RFC 2387 Multipart/Related August 1998 285 | 286 | 287 | 5.2 Text/X-Okie 288 | 289 | The Text/X-Okie is an invented markup language permitting the 290 | inclusion of images with text. A feature of this example is the 291 | inclusion of two additional body parts, both picture. They are 292 | referred to internally by the encapsulated document via each 293 | picture's body part content-ID. Usage of "cid:", as in this example, 294 | may be useful for a variety of compound objects. It is not, however, 295 | a part of the Multipart/Related specification. 296 | 297 | Content-Type: Multipart/Related; boundary=example-2; 298 | start="<950118.AEBH@XIson.com>" 299 | type="Text/x-Okie" 300 | 301 | --example-2 302 | Content-Type: Text/x-Okie; charset=iso-8859-1; 303 | declaration="<950118.AEB0@XIson.com>" 304 | Content-ID: <950118.AEBH@XIson.com> 305 | Content-Description: Document 306 | 307 | {doc} 308 | This picture was taken by an automatic camera mounted ... 309 | {image file=cid:950118.AECB@XIson.com} 310 | {para} 311 | Now this is an enlargement of the area ... 312 | {image file=cid:950118:AFDH@XIson.com} 313 | {/doc} 314 | --example-2 315 | Content-Type: image/jpeg 316 | Content-ID: <950118.AFDH@XIson.com> 317 | Content-Transfer-Encoding: BASE64 318 | Content-Description: Picture A 319 | 320 | [encoded jpeg image] 321 | --example-2 322 | Content-Type: image/jpeg 323 | Content-ID: <950118.AECB@XIson.com> 324 | Content-Transfer-Encoding: BASE64 325 | Content-Description: Picture B 326 | 327 | [encoded jpeg image] 328 | --example-2-- 329 | 330 | 5.3 Content-Disposition 331 | 332 | In the above example each image body part could also have a Content- 333 | Disposition header. For example, 334 | 335 | 336 | 337 | 338 | Levinson Standards Track [Page 6] 339 | 340 | RFC 2387 Multipart/Related August 1998 341 | 342 | 343 | --example-2 344 | Content-Type: image/jpeg 345 | Content-ID: <950118.AECB@XIson.com> 346 | Content-Transfer-Encoding: BASE64 347 | Content-Description: Picture B 348 | Content-Disposition: INLINE 349 | 350 | [encoded jpeg image] 351 | --example-2-- 352 | 353 | User Agents that recognize Multipart/Related will ignore the 354 | Content-Disposition header's disposition type. Other User Agents 355 | will process the Multipart/Related as Multipart/Mixed and may make 356 | use of that header's information. 357 | 358 | 6. User Agent Requirements 359 | 360 | User agents that do not recognize Multipart/Related shall, in 361 | accordance with [MIME], treat the entire entity as Multipart/Mixed. 362 | MIME User Agents that do recognize Multipart/Related entities but are 363 | unable to process the given type should give the user the option of 364 | suppressing the entire Multipart/Related body part shall be. 365 | 366 | Existing MIME-capable mail user agents (MUAs) handle the existing 367 | media types in a straightforward manner. For discrete media types 368 | (e.g. text, image, etc.) the body of the entity can be directly 369 | passed to a display process. Similarly the existing composite 370 | subtypes can be reduced to handing one or more discrete types. 371 | Handling Multipart/Related differs in that processing cannot be 372 | reduced to handling the individual entities. 373 | 374 | The following sections discuss what information the processing 375 | application requires. 376 | 377 | It is possible that an application specific "receiving agent" will 378 | manipulate the entities for display prior to invoking actual 379 | application process. Okie, above, is an example of this; it may need 380 | a receiving agent to parse the document and substitute local file 381 | names for the originator's file names. Other applications may just 382 | require a table showing the correspondence between the local file 383 | names and the originator's. The receiving agent takes responsibility 384 | for such processing. 385 | 386 | 6.1 Data Requirements 387 | 388 | MIME-capable mail user agents (MUAs) are required to provide the 389 | application: 390 | 391 | 392 | 393 | 394 | Levinson Standards Track [Page 7] 395 | 396 | RFC 2387 Multipart/Related August 1998 397 | 398 | 399 | (a) the bodies of the MIME entities and the entity Content-* headers, 400 | 401 | (b) the parameters of the Multipart/Related Content-type header, and 402 | 403 | (c) the correspondence between each body's local file name, that 404 | body's header data, and, if present, the body part's content-ID. 405 | 406 | 6.2 Storing Multipart/Related Entities 407 | 408 | The Multipart/Related media type will be used for objects that have 409 | internal linkages between the body parts. When the objects are 410 | stored the linkages may require processing by the application or its 411 | receiving agent. 412 | 413 | 6.3 Recursion 414 | 415 | MIME is a recursive structure. Hence one must expect a 416 | Multipart/Related entity to contain other Multipart/Related entities. 417 | When a Multipart/Related entity is being processed for display or 418 | storage, any enclosed Multipart/Related entities shall be processed 419 | as though they were being stored. 420 | 421 | 6.4 Configuration Considerations 422 | 423 | It is suggested that MUAs that use configuration mechanisms, see 424 | [CFG] for an example, refer to Multipart/Related as Multi- 425 | part/Related/, were is the value of the "type" 426 | parameter. 427 | 428 | 7. Security Considerations 429 | 430 | Security considerations relevant to Multipart/Related are identical 431 | to those of the underlying content-type. 432 | 433 | 8. Acknowledgments 434 | 435 | This proposal is the result of conversations the author has had with 436 | many people. In particular, Harald A. Alvestrand, James Clark, 437 | Charles Goldfarb, Gary Houston, Ned Freed, Ray Moody, and Don 438 | Stinchfield, provided both encouragement and invaluable help. The 439 | author, however, take full responsibility for all errors contained in 440 | this document. 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | Levinson Standards Track [Page 8] 451 | 452 | RFC 2387 Multipart/Related August 1998 453 | 454 | 455 | 9. References 456 | 457 | [822] Crocker, D., "Standard for the Format of ARPA Internet 458 | Text Messages", STD 11, RFC 822, August 1982. 459 | 460 | [CID] Levinson, E., and J. Clark, "Message/External-Body 461 | Content-ID Access Type", RFC 1873, December 1995, 462 | Levinson, E., "Message/External-Body Content-ID Access 463 | Type", Work in Progress. 464 | 465 | [CFG] Borenstein, N., "A User Agent Configuration Mechanism For 466 | Multimedia Mail Format Information", RFC 1524, September 467 | 1993. 468 | 469 | [DISP] Troost, R., and S. Dorner, "Communicating Presentation 470 | Information in Internet Messages: The Content- 471 | Disposition Header", RFC 1806, June 1995. 472 | 473 | [MIME] Borenstein, N., and Freed, N., "Multipurpose Internet 474 | Mail Extensions (MIME) Part One: Format of Internet 475 | Message Bodies", RFC 2045, November 1996. 476 | 477 | 9. Author's Address 478 | 479 | Edward Levinson 480 | 47 Clive Street 481 | Metuchen, NJ 08840-1060 482 | USA 483 | 484 | Phone: +1 908 494 1606 485 | EMail: XIson@cnj.digex.com 486 | 487 | 10. Changes from previous draft (RFC 2112) 488 | 489 | Corrected cid urls to conform to RFC 2111; the angle brackets were 490 | removed. 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | Levinson Standards Track [Page 9] 507 | 508 | RFC 2387 Multipart/Related August 1998 509 | 510 | 511 | 11. Full Copyright Statement 512 | 513 | Copyright (C) The Internet Society (1998). All Rights Reserved. 514 | 515 | This document and translations of it may be copied and furnished to 516 | others, and derivative works that comment on or otherwise explain it 517 | or assist in its implementation may be prepared, copied, published 518 | and distributed, in whole or in part, without restriction of any 519 | kind, provided that the above copyright notice and this paragraph are 520 | included on all such copies and derivative works. However, this 521 | document itself may not be modified in any way, such as by removing 522 | the copyright notice or references to the Internet Society or other 523 | Internet organizations, except as needed for the purpose of 524 | developing Internet standards in which case the procedures for 525 | copyrights defined in the Internet Standards process must be 526 | followed, or as required to translate it into languages other than 527 | English. 528 | 529 | The limited permissions granted above are perpetual and will not be 530 | revoked by the Internet Society or its successors or assigns. 531 | 532 | This document and the information contained herein is provided on an 533 | "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING 534 | TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING 535 | BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION 536 | HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF 537 | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | 549 | 550 | 551 | 552 | 553 | 554 | 555 | 556 | 557 | 558 | 559 | 560 | 561 | 562 | Levinson Standards Track [Page 10] 563 | 564 | -------------------------------------------------------------------------------- /rfc/2388-returning-values-from-forms-multipart-form-data.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Network Working Group L. Masinter 8 | Request for Comments: 2388 Xerox Corporation 9 | Category: Standards Track August 1998 10 | 11 | 12 | Returning Values from Forms: multipart/form-data 13 | 14 | Status of this Memo 15 | 16 | This document specifies an Internet standards track protocol for the 17 | Internet community, and requests discussion and suggestions for 18 | improvements. Please refer to the current edition of the "Internet 19 | Official Protocol Standards" (STD 1) for the standardization state 20 | and status of this protocol. Distribution of this memo is unlimited. 21 | 22 | Copyright Notice 23 | 24 | Copyright (C) The Internet Society (1998). All Rights Reserved. 25 | 26 | 1. Abstract 27 | 28 | This specification defines an Internet Media Type, multipart/form- 29 | data, which can be used by a wide variety of applications and 30 | transported by a wide variety of protocols as a way of returning a 31 | set of values as the result of a user filling out a form. 32 | 33 | 2. Introduction 34 | 35 | In many applications, it is possible for a user to be presented with 36 | a form. The user will fill out the form, including information that 37 | is typed, generated by user input, or included from files that the 38 | user has selected. When the form is filled out, the data from the 39 | form is sent from the user to the receiving application. 40 | 41 | The definition of MultiPart/Form-Data is derived from one of those 42 | applications, originally set out in [RFC1867] and subsequently 43 | incorporated into [HTML40], where forms are expressed in HTML, and in 44 | which the form values are sent via HTTP or electronic mail. This 45 | representation is widely implemented in numerous web browsers and web 46 | servers. 47 | 48 | However, multipart/form-data can be used for forms that are presented 49 | using representations other than HTML (spreadsheets, Portable 50 | Document Format, etc), and for transport using other means than 51 | electronic mail or HTTP. This document defines the representation of 52 | form values independently of the application for which it is used. 53 | 54 | 55 | 56 | 57 | 58 | Masinter Standards Track [Page 1] 59 | 60 | RFC 2388 multipart/form-data August 1998 61 | 62 | 63 | 3. Definition of multipart/form-data 64 | 65 | The media-type multipart/form-data follows the rules of all multipart 66 | MIME data streams as outlined in [RFC 2046]. In forms, there are a 67 | series of fields to be supplied by the user who fills out the form. 68 | Each field has a name. Within a given form, the names are unique. 69 | 70 | "multipart/form-data" contains a series of parts. Each part is 71 | expected to contain a content-disposition header [RFC 2183] where the 72 | disposition type is "form-data", and where the disposition contains 73 | an (additional) parameter of "name", where the value of that 74 | parameter is the original field name in the form. For example, a part 75 | might contain a header: 76 | 77 | Content-Disposition: form-data; name="user" 78 | 79 | with the value corresponding to the entry of the "user" field. 80 | 81 | Field names originally in non-ASCII character sets may be encoded 82 | within the value of the "name" parameter using the standard method 83 | described in RFC 2047. 84 | 85 | As with all multipart MIME types, each part has an optional 86 | "Content-Type", which defaults to text/plain. If the contents of a 87 | file are returned via filling out a form, then the file input is 88 | identified as the appropriate media type, if known, or 89 | "application/octet-stream". If multiple files are to be returned as 90 | the result of a single form entry, they should be represented as a 91 | "multipart/mixed" part embedded within the "multipart/form-data". 92 | 93 | Each part may be encoded and the "content-transfer-encoding" header 94 | supplied if the value of that part does not conform to the default 95 | encoding. 96 | 97 | 4. Use of multipart/form-data 98 | 99 | 4.1 Boundary 100 | 101 | As with other multipart types, a boundary is selected that does not 102 | occur in any of the data. Each field of the form is sent, in the 103 | order defined by the sending appliction and form, as a part of the 104 | multipart stream. Each part identifies the INPUT name within the 105 | original form. Each part should be labelled with an appropriate 106 | content-type if the media type is known (e.g., inferred from the file 107 | extension or operating system typing information) or as 108 | "application/octet-stream". 109 | 110 | 111 | 112 | 113 | 114 | Masinter Standards Track [Page 2] 115 | 116 | RFC 2388 multipart/form-data August 1998 117 | 118 | 119 | 4.2 Sets of files 120 | 121 | If the value of a form field is a set of files rather than a single 122 | file, that value can be transferred together using the 123 | "multipart/mixed" format. 124 | 125 | 4.3 Encoding 126 | 127 | While the HTTP protocol can transport arbitrary binary data, the 128 | default for mail transport is the 7BIT encoding. The value supplied 129 | for a part may need to be encoded and the "content-transfer-encoding" 130 | header supplied if the value does not conform to the default 131 | encoding. [See section 5 of RFC 2046 for more details.] 132 | 133 | 4.4 Other attributes 134 | 135 | Forms may request file inputs from the user; the form software may 136 | include the file name and other file attributes, as specified in [RFC 137 | 2184]. 138 | 139 | The original local file name may be supplied as well, either as a 140 | "filename" parameter either of the "content-disposition: form-data" 141 | header or, in the case of multiple files, in a "content-disposition: 142 | file" header of the subpart. The sending application MAY supply a 143 | file name; if the file name of the sender's operating system is not 144 | in US-ASCII, the file name might be approximated, or encoded using 145 | the method of RFC 2231. 146 | 147 | This is a convenience for those cases where the files supplied by the 148 | form might contain references to each other, e.g., a TeX file and its 149 | .sty auxiliary style description. 150 | 151 | 4.5 Charset of text in form data 152 | 153 | Each part of a multipart/form-data is supposed to have a content- 154 | type. In the case where a field element is text, the charset 155 | parameter for the text indicates the character encoding used. 156 | 157 | For example, a form with a text field in which a user typed 'Joe owes 158 | 100' where is the Euro symbol might have form data returned 159 | as: 160 | 161 | --AaB03x 162 | content-disposition: form-data; name="field1" 163 | content-type: text/plain;charset=windows-1250 164 | content-transfer-encoding: quoted-printable 165 | 166 | 167 | 168 | 169 | 170 | Masinter Standards Track [Page 3] 171 | 172 | RFC 2388 multipart/form-data August 1998 173 | 174 | 175 | Joe owes =80100. 176 | --AaB03x 177 | 178 | 5. Operability considerations 179 | 180 | 5.1 Compression, encryption 181 | 182 | Some of the data in forms may be compressed or encrypted, using other 183 | MIME mechanisms. This is a function of the application that is 184 | generating the form-data. 185 | 186 | 5.2 Other data encodings rather than multipart 187 | 188 | Various people have suggested using new mime top-level type 189 | "aggregate", e.g., aggregate/mixed or a content-transfer-encoding of 190 | "packet" to express indeterminate-length binary data, rather than 191 | relying on the multipart-style boundaries. While this would be 192 | useful, the "multipart" mechanisms are well established, simple to 193 | implement on both the sending client and receiving server, and as 194 | efficient as other methods of dealing with multiple combinations of 195 | binary data. 196 | 197 | The multipart/form-data encoding has a high overhead and performance 198 | impact if there are many fields with short values. However, in 199 | practice, for the forms in use, for example, in HTML, the average 200 | overhead is not significant. 201 | 202 | 5.3 Remote files with third-party transfer 203 | 204 | In some scenarios, the user operating the form software might want to 205 | specify a URL for remote data rather than a local file. In this case, 206 | is there a way to allow the browser to send to the client a pointer 207 | to the external data rather than the entire contents? This capability 208 | could be implemented, for example, by having the client send to the 209 | server data of type "message/external-body" with "access-type" set 210 | to, say, "uri", and the URL of the remote data in the body of the 211 | message. 212 | 213 | 5.4 Non-ASCII field names 214 | 215 | Note that MIME headers are generally required to consist only of 7- 216 | bit data in the US-ASCII character set. Hence field names should be 217 | encoded according to the method in RFC 2047 if they contain 218 | characters outside of that set. 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | Masinter Standards Track [Page 4] 227 | 228 | RFC 2388 multipart/form-data August 1998 229 | 230 | 231 | 5.5 Ordered fields and duplicated field names 232 | 233 | The relationship of the ordering of fields within a form and the 234 | ordering of returned values within "multipart/form-data" is not 235 | defined by this specification, nor is the handling of the case where 236 | a form has multiple fields with the same name. While HTML-based forms 237 | may send back results in the order received, and intermediaries 238 | should not reorder the results, there are some systems which might 239 | not define a natural order for form fields. 240 | 241 | 5.6 Interoperability with web applications 242 | 243 | Many web applications use the "application/x-url-encoded" method for 244 | returning data from forms. This format is quite compact, e.g.: 245 | 246 | name=Xavier+Xantico&verdict=Yes&colour=Blue&happy=sad&Utf%F6r=Send 247 | 248 | however, there is no opportunity to label the enclosed data with 249 | content type, apply a charset, or use other encoding mechanisms. 250 | 251 | Many form-interpreting programs (primarly web browsers) now implement 252 | and generate multipart/form-data, but an existing application might 253 | need to optionally support both the application/x-url-encoded format 254 | as well. 255 | 256 | 5.7 Correlating form data with the original form 257 | 258 | This specification provides no specific mechanism by which 259 | multipart/form-data can be associated with the form that caused it to 260 | be transmitted. This separation is intentional; many different forms 261 | might be used for transmitting the same data. In practice, 262 | applications may supply a specific form processing resource (in HTML, 263 | the ACTION attribute in a FORM tag) for each different form. 264 | Alternatively, data about the form might be encoded in a "hidden 265 | field" (a field which is part of the form but which has a fixed value 266 | to be transmitted back to the form-data processor.) 267 | 268 | 6. Security Considerations 269 | 270 | The data format described in this document introduces no new security 271 | considerations outside of those introduced by the protocols that use 272 | it and of the component elements. It is important when interpreting 273 | content-disposition to not overwrite files in the recipients address 274 | space inadvertently. 275 | 276 | User applications that request form information from users must be 277 | careful not to cause a user to send information to the requestor or a 278 | third party unwillingly or unwittingly. For example, a form might 279 | 280 | 281 | 282 | Masinter Standards Track [Page 5] 283 | 284 | RFC 2388 multipart/form-data August 1998 285 | 286 | 287 | request 'spam' information to be sent to an unintended third party, 288 | or private information to be sent to someone that the user might not 289 | actually intend. While this is primarily an issue for the 290 | representation and interpretation of forms themselves, rather than 291 | the data representation of the result of form transmission, the 292 | transportation of private information must be done in a way that does 293 | not expose it to unwanted prying. 294 | 295 | With the introduction of form-data that can reasonably send back the 296 | content of files from user's file space, the possibility that a user 297 | might be sent an automated script that fills out a form and then 298 | sends the user's local file to another address arises. Thus, 299 | additional caution is required when executing automated scripting 300 | where form-data might include user's files. 301 | 302 | 7. Author's Address 303 | 304 | Larry Masinter 305 | Xerox Palo Alto Research Center 306 | 3333 Coyote Hill Road 307 | Palo Alto, CA 94304 308 | 309 | Fax: +1 650 812 4333 310 | EMail: masinter@parc.xerox.com 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | Masinter Standards Track [Page 6] 339 | 340 | RFC 2388 multipart/form-data August 1998 341 | 342 | 343 | Appendix A. Media type registration for multipart/form-data 344 | 345 | Media Type name: 346 | multipart 347 | 348 | Media subtype name: 349 | form-data 350 | 351 | Required parameters: 352 | none 353 | 354 | Optional parameters: 355 | none 356 | 357 | Encoding considerations: 358 | No additional considerations other than as for other multipart 359 | types. 360 | 361 | Security Considerations 362 | Applications which receive forms and process them must be careful 363 | not to supply data back to the requesting form processing site that 364 | was not intended to be sent by the recipient. This is a 365 | consideration for any application that generates a multipart/form- 366 | data. 367 | 368 | The multipart/form-data type introduces no new security 369 | considerations for recipients beyond what might occur with any of 370 | the enclosed parts. 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | Masinter Standards Track [Page 7] 395 | 396 | RFC 2388 multipart/form-data August 1998 397 | 398 | 399 | References 400 | 401 | [RFC 2046] Freed, N., and N. Borenstein, "Multipurpose Internet Mail 402 | Extensions (MIME) Part Two: Media Types", RFC 2046, 403 | November 1996. 404 | 405 | [RFC 2047] Moore, K., "MIME (Multipurpose Internet Mail Extensions) 406 | Part Three: Message Header Extensions for Non-ASCII Text", 407 | RFC 2047, November 1996. 408 | 409 | [RFC 2231] Freed, N., and K. Moore, "MIME Parameter Value and Encoded 410 | Word Extensions: Character Sets, Languages, and 411 | Continuations", RFC 2231, November 1997. 412 | 413 | [RFC 1806] Troost, R., and S. Dorner, "Communicating Presentation 414 | Information in Internet Messages: The Content-Disposition 415 | Header", RFC 1806, June 1995. 416 | 417 | [RFC 1867] Nebel, E., and L. Masinter, "Form-based File Upload in 418 | HTML", RFC 1867, November 1995. 419 | 420 | [RFC 2183] Troost, R., Dorner, S., and K. Moore, "Communicating 421 | Presentation Information in Internet Messages: The 422 | Content-Disposition Header Field", RFC 2183, August 1997. 423 | 424 | [RFC 2184] Freed, N., and K. Moore, "MIME Parameter Value and Encoded 425 | Word Extensions: Character Sets, Languages, and 426 | Continuations", RFC 2184, August 1997. 427 | 428 | [HTML40] D. Raggett, A. Le Hors, I. Jacobs. "HTML 4.0 429 | Specification", World Wide Web Consortium Technical Report 430 | "REC-html40", December, 1997. 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | Masinter Standards Track [Page 8] 451 | 452 | RFC 2388 multipart/form-data August 1998 453 | 454 | 455 | Full Copyright Statement 456 | 457 | Copyright (C) The Internet Society (1998). All Rights Reserved. 458 | 459 | This document and translations of it may be copied and furnished to 460 | others, and derivative works that comment on or otherwise explain it 461 | or assist in its implementation may be prepared, copied, published 462 | and distributed, in whole or in part, without restriction of any 463 | kind, provided that the above copyright notice and this paragraph are 464 | included on all such copies and derivative works. However, this 465 | document itself may not be modified in any way, such as by removing 466 | the copyright notice or references to the Internet Society or other 467 | Internet organizations, except as needed for the purpose of 468 | developing Internet standards in which case the procedures for 469 | copyrights defined in the Internet Standards process must be 470 | followed, or as required to translate it into languages other than 471 | English. 472 | 473 | The limited permissions granted above are perpetual and will not be 474 | revoked by the Internet Society or its successors or assigns. 475 | 476 | This document and the information contained herein is provided on an 477 | "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING 478 | TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING 479 | BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION 480 | HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF 481 | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | Masinter Standards Track [Page 9] 507 | 508 | -------------------------------------------------------------------------------- /test/common.js: -------------------------------------------------------------------------------- 1 | var path = require('path'); 2 | 3 | var root = path.join(__dirname, '../'); 4 | exports.dir = { 5 | root: root, 6 | lib: root + '/lib', 7 | fixture: root + '/test/fixture', 8 | }; 9 | 10 | exports.assert = require('assert'); 11 | exports.fastOrSlow = require('fast-or-slow'); 12 | 13 | exports.require = function(lib) { 14 | return require(exports.dir.lib + '/' + lib); 15 | }; 16 | -------------------------------------------------------------------------------- /test/fast/test-fixtures.js: -------------------------------------------------------------------------------- 1 | return console.log('disabled'); 2 | 3 | var common = require('../common'); 4 | var assert = common.assert; 5 | var fs = require('fs'); 6 | var path = require('path'); 7 | var MultipartParser = common.require('multipart_parser'); 8 | 9 | findFixtures(common.dir.fixture + '/js'); 10 | 11 | function findFixtures(dir) { 12 | fs.readdir(dir, function(err, files) { 13 | if (err) throw err; 14 | files 15 | .map(function(file) { 16 | return dir + '/' + file; 17 | }) 18 | .forEach(load); 19 | }); 20 | } 21 | 22 | 23 | function load(jsFixture) { 24 | if (!/\.js$/.test(jsFixture)) return; 25 | 26 | var tests = require(jsFixture); 27 | var dir = common.dir.fixture + '/http/' + path.basename(jsFixture, '.js'); 28 | 29 | for (var name in tests) { 30 | var httpFixture = fs.createReadStream(dir + '/' + name); 31 | verify(httpFixture, tests[name]); 32 | } 33 | } 34 | 35 | function verify(http, expected) { 36 | var ended = false; 37 | var parser = new MultipartParser(); 38 | var parts = []; 39 | var shortPath = http.path.substr(common.dir.fixture.length + '/http/'.length); 40 | 41 | http.pipe(parser); 42 | 43 | parser 44 | .on('part', function(part) { 45 | parts.push(part); 46 | }) 47 | .on('end', function() { 48 | ended = true; 49 | 50 | assert.equal( 51 | parts.length, 52 | expected.length, 53 | 'Expected ' + expected.length + ' part(s), got: ' + parts.length + ': ' + 54 | shortPath 55 | ); 56 | }); 57 | 58 | http 59 | .on('end', function() { 60 | assert.ok(ended, 'Parser did not end: ' + shortPath); 61 | }); 62 | } 63 | -------------------------------------------------------------------------------- /test/fast/test-multipart-parser.js: -------------------------------------------------------------------------------- 1 | var common = require('../common'); 2 | var test = common.fastOrSlow.fast(); 3 | var assert = common.assert; 4 | var MultipartParser = common.require('multipart_parser'); 5 | var Part = common.require('part'); 6 | var boundary = '------WebKitFormBoundarytyE4wkKlZ5CQJVTG'; 7 | 8 | var parser; 9 | test.before(function() { 10 | parser = MultipartParser.create(boundary); 11 | }); 12 | 13 | function assertEmitsError(buffer, expectedError) { 14 | var hadError = false; 15 | parser.on('error', function(err) { 16 | hadError = true; 17 | assert.equal(err.message.substr(0, expectedError.length), expectedError); 18 | }); 19 | 20 | parser.write(buffer); 21 | assert.ok(hadError, 'no error was emitted'); 22 | } 23 | 24 | test('#write: error: invalid parser state', function() { 25 | parser._state = 'SOMETHING'; 26 | assertEmitsError(new Buffer('123'), 'MultipartParser.InvalidParserState'); 27 | }); 28 | 29 | test('#write: error: write without boundary', function() { 30 | var buffer = new Buffer('a'); 31 | parser = new MultipartParser(); 32 | 33 | assert.throws(function() { 34 | parser.write(buffer); 35 | }, /Bad state: NO_BOUNDARY/); 36 | }); 37 | 38 | test('#write: tolerate missing CRLF on first boundary', function() { 39 | var buffer = new Buffer('--' + boundary + '\r\n'); 40 | parser.write(buffer); 41 | 42 | assert.equal(parser._state, 'HEADER_FIELD'); 43 | }); 44 | 45 | test('#write: leading preamble', function() { 46 | parser.write(new Buffer(boundary.substr(0, 4) + 'HAHA')); 47 | assert.equal(parser._state, 'PREAMBLE'); 48 | 49 | parser.write(new Buffer('--' + boundary + '\r\n')); 50 | assert.equal(parser._state, 'HEADER_FIELD'); 51 | }); 52 | 53 | test('#write: error: Invalid header token', function() { 54 | // ',' is an example for an invalid token for header fields (see RFC 2616) 55 | var buffer = new Buffer('Invalid,Header: '); 56 | parser._state = 'HEADER_FIELD'; 57 | assertEmitsError(buffer, 'MultipartParser.InvalidHeaderFieldToken'); 58 | }); 59 | 60 | test('#write: Emit part object with lowercased headers', function() { 61 | var buffer = new Buffer('Header-1:value-1\r\nHeader-2:value-2\r\n\r\n'); 62 | parser._state = 'HEADER_FIELD'; 63 | parser._part = new Part(); 64 | 65 | parser.write(buffer); 66 | 67 | assert.deepEqual(parser._part.headers, { 68 | 'header-1': 'value-1', 69 | 'header-2': 'value-2', 70 | }); 71 | }); 72 | 73 | test('#write: Trim leading and trailing header value whitespace', function() { 74 | var buffer = new Buffer('header: value \r\n\r\n'); 75 | parser._state = 'HEADER_FIELD'; 76 | parser._part = new Part(); 77 | 78 | parser.write(buffer); 79 | 80 | assert.deepEqual(parser._part.headers, {'header': 'value'}); 81 | }); 82 | 83 | test('#write: error: CR on non-empty _headerField', function() { 84 | var buffer = new Buffer('head\r'); 85 | parser._state = 'HEADER_FIELD'; 86 | assertEmitsError(buffer, 'MultipartParser.InvalidHeaderFieldToken'); 87 | }); 88 | 89 | test('#write: no part headers', function() { 90 | var buffer = new Buffer('\r\n'); 91 | parser._state = 'HEADER_FIELD'; 92 | parser._part = new Part(); 93 | 94 | parser.write(buffer); 95 | 96 | assert.deepEqual(parser._part.headers, {}); 97 | }); 98 | 99 | test('#write: header buffer overflow in field', function() { 100 | parser._headerBufferLimit = 2; 101 | 102 | var buffer = new Buffer('ab'); 103 | parser._state = 'HEADER_FIELD'; 104 | parser.write(buffer); 105 | assertEmitsError(new Buffer('c'), 'MultipartParser.HeaderBufferOverflow'); 106 | }); 107 | 108 | test('#write: emit part data', function() { 109 | parser._part = new Part(); 110 | parser._state = 'PART_BODY'; 111 | 112 | var expected = [ 113 | new Buffer('abc'), 114 | new Buffer('def'), 115 | ]; 116 | 117 | parser._part.on('data', function(buffer) { 118 | assert.equal(''+buffer, ''+expected.shift()); 119 | }); 120 | 121 | parser.write(expected[0]); 122 | parser.write(expected[0]); 123 | 124 | assert.strictEqual(expected.length, 0); 125 | assert.equal(parser._offset, 6); 126 | }); 127 | 128 | test('#write: hit partial boundary in part data', function() { 129 | parser.boundary('end'); 130 | parser._preamble = false; 131 | parser._part = new Part(); 132 | parser._state = 'PART_BODY'; 133 | 134 | var buffers =[]; 135 | parser._part.on('data', function(buffer) { 136 | buffers.push(''+buffer); 137 | }); 138 | 139 | parser.write(new Buffer('ab\r\n--enc')); 140 | assert.deepEqual(buffers, ['ab', '\r\n--en', 'c']); 141 | }); 142 | 143 | test('#write: hit partial boundary in part data spread over 2 buffers', function() { 144 | parser.boundary('end'); 145 | parser._preamble = false; 146 | parser._part = new Part(); 147 | parser._state = 'PART_BODY'; 148 | 149 | var buffers =[]; 150 | parser._part.on('data', function(buffer) { 151 | buffers.push(''+buffer); 152 | }); 153 | 154 | var first = new Buffer('ab\r\n--e'); 155 | var second = new Buffer('haha'); 156 | 157 | parser.write(first); 158 | assert.equal(buffers.length, 1); 159 | 160 | parser.write(second); 161 | assert.deepEqual(buffers, ['ab', '\r\n--e', 'haha']); 162 | }); 163 | 164 | test('#write: hit partial boundary in part data spread over 3 buffers', function() { 165 | parser.boundary('end'); 166 | parser._preamble = false; 167 | parser._part = new Part(); 168 | parser._state = 'PART_BODY'; 169 | 170 | var buffers =[]; 171 | parser._part.on('data', function(buffer) { 172 | buffers.push(''+buffer); 173 | }); 174 | 175 | var first = new Buffer('ab\r\n--e'); 176 | var second = new Buffer('n'); 177 | var third = new Buffer('haha'); 178 | 179 | parser.write(first); 180 | assert.equal(buffers.length, 1); 181 | 182 | parser.write(second); 183 | assert.equal(buffers.length, 1); 184 | 185 | parser.write(third); 186 | assert.deepEqual(buffers, ['ab', '\r\n--en', 'haha']); 187 | }); 188 | 189 | function testRfc1341Entity(chunkSize) { 190 | parser.boundary('simple boundary'); 191 | 192 | var part1 = 193 | 'This is implicitly typed plain ASCII text.\r\n' + 194 | 'It does NOT end with a linebreak.'; 195 | var part2 = 196 | 'This is explicitly typed plain ASCII text.\r\n' + 197 | 'It DOES end with a linebreak.\r\n'; 198 | 199 | var rfc1341Entity = 200 | 'This is the preamble. It is to be ignored, though it\r\n' + 201 | 'is a handy place for mail composers to include an\r\n' + 202 | 'explanatory note to non-MIME compliant readers.\r\n' + 203 | '--simple boundary\r\n' + 204 | '\r\n' + 205 | part1 + 206 | '\r\n' + 207 | '--simple boundary\r\n' + 208 | 'Content-type: text/plain; charset=us-ascii\r\n' + 209 | '\r\n' + 210 | part2 + 211 | '\r\n' + 212 | '--simple boundary--\r\n' + 213 | 'This is the epilogue. It is also to be ignored.\r\n'; 214 | 215 | var parts = []; 216 | var ended = false; 217 | parser 218 | .on('error', function(error) { 219 | throw error; 220 | }) 221 | .on('part', function(part) { 222 | parts.push(part); 223 | 224 | part.data = ''; 225 | part 226 | .on('data', function(chunk) { 227 | part.data += chunk; 228 | }) 229 | .on('end', function() { 230 | part.ended = true; 231 | }); 232 | }) 233 | .on('end', function() { 234 | ended = true; 235 | }); 236 | 237 | var buffer = new Buffer(rfc1341Entity); 238 | if (!chunkSize) { 239 | parser.write(buffer); 240 | } else { 241 | for (var i = 0; i < buffer.length; i += chunkSize) { 242 | var end = (i + chunkSize < buffer.length) 243 | ? i + chunkSize 244 | : buffer.length; 245 | 246 | var chunk = new Buffer(buffer.slice(i, end)); 247 | parser.write(chunk); 248 | } 249 | } 250 | 251 | assert.equal(parts.length, 2); 252 | assert.equal(parts[0].data, part1); 253 | assert.equal(parts[1].data, part2); 254 | parts.forEach(function(part, i) { 255 | assert.ok(part.ended, 'Part ' + (i + 1) + ' did not end.'); 256 | }); 257 | assert.ok(ended); 258 | } 259 | 260 | test('#write: full rfc1341 entity', function() { 261 | testRfc1341Entity(); 262 | }); 263 | 264 | // What can I say, my ability to visualize this state machine has its limits :) 265 | for (var i = 1; i <= 10; i++) { 266 | test('#write: full rfc1341 entity with chunk size: ' + i, function() { 267 | var chunkSize = parseInt(this.name.match(/\d+$/), 10); 268 | testRfc1341Entity(chunkSize); 269 | }); 270 | } 271 | -------------------------------------------------------------------------------- /test/run.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | var far = require('far').create(); 3 | 4 | far.add(__dirname); 5 | far.include(/test-.*\.js$/); 6 | 7 | far.execute(); 8 | --------------------------------------------------------------------------------