├── .gitignore
├── License
├── Makefile
├── Readme.md
├── benchmark
    ├── benchmark.js
    ├── helper.js
    └── parsers
    │   ├── formidable.js
    │   └── multipart_parser.js
├── index.js
├── lib
    ├── multipart_parser.js
    └── part.js
├── package.json
├── rfc
    ├── 0822-arpa-internet-text-messages.txt
    ├── 1341-the-multipart-content-type.html
    ├── 2045-format-of-internet-message-bodies.txt
    ├── 2046-media-types.txt
    ├── 2047-message-header-extensions-for-non-ascii-text.txt
    ├── 2048-registration-procedures.txt
    ├── 2049-conformance-criteria-and-examples.txt
    ├── 2387-mime-multipart-content-type.txt
    └── 2388-returning-values-from-forms-multipart-form-data.txt
└── test
    ├── common.js
    ├── fast
        ├── test-fixtures.js
        └── test-multipart-parser.js
    └── run.js


/.gitignore:
--------------------------------------------------------------------------------
1 | *.un~
2 | /node_modules
3 | 


--------------------------------------------------------------------------------
/License:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2011 Felix Geisendörfer (felix@debuggable.com) and contributors
 2 | 
 3 |  Permission is hereby granted, free of charge, to any person obtaining a copy
 4 |  of this software and associated documentation files (the "Software"), to deal
 5 |  in the Software without restriction, including without limitation the rights
 6 |  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 |  copies of the Software, and to permit persons to whom the Software is
 8 |  furnished to do so, subject to the following conditions:
 9 | 
10 |  The above copyright notice and this permission notice shall be included in
11 |  all copies or substantial portions of the Software.
12 | 
13 |  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 |  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 |  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 |  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 |  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 |  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 |  THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL := /bin/bash
 2 | 
 3 | test:
 4 | 	@./test/run.js
 5 | 
 6 | build: npm test
 7 | 
 8 | npm:
 9 | 	npm install .
10 | 
11 | clean:
12 | 	rm test/tmp/*
13 | 
14 | .PHONY: test clean build
15 | 


--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
 1 | # multipart-parser
 2 | 
 3 | A fast and streaming multipart parser.
 4 | 
 5 | ## Is it any good?
 6 | 
 7 | No, this is still being developed.
 8 | 
 9 | ## Is it fast?
10 | 
11 | Yes. According to the benchmark suite shipped with this parser, it is on par
12 | with existing implementations, and can easily exceed typical disk/storage
13 | throughputs.
14 | 
15 | ```
16 | $ node --version
17 | v0.4.12
18 | $ node benchmark/benchmark.js -r 100
19 | Options:
20 |   Entity Size         : 10 mb
21 |   Chunk Size          : 32 kb
22 |   Runs                : 100
23 |   Iterations per run  : 10
24 | 
25 | ....................................................................................................
26 | Benchmark took: 33.8 seconds
27 | 
28 | formidable: 740.47 mb/sec (95% of 1000 iterations)
29 | multipart_parser: 846.75 mb/sec (95% of 1000 iterations)
30 | ```
31 | 
32 | ```
33 | $ node --version
34 | v0.5.10-pre
35 | $ node benchmark/benchmark.js -r 100
36 | Options:
37 |   Entity Size         : 10 mb
38 |   Chunk Size          : 32 kb
39 |   Runs                : 100
40 |   Iterations per run  : 10
41 | 
42 | ....................................................................................................
43 | Benchmark took: 33.4 seconds
44 | 
45 | formidable: 775.19 mb/sec (95% of 1000 iterations)
46 | multipart_parser: 934.58 mb/sec (95% of 1000 iterations)
47 | ```
48 | 
49 | ## Is it secure?
50 | 
51 | Blah.
52 | 
53 | ## Is it compliant?
54 | 
55 | Blah.
56 | 
57 | ## Is it user friendly?
58 | 
59 | Blah.
60 | 
61 | ## License
62 | 
63 | MIT License.
64 | 


--------------------------------------------------------------------------------
/benchmark/benchmark.js:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | var util    = require('util');
 3 | var uubench = require('uubench');
 4 | var helper  = require('./helper');
 5 | var options = require('commander');
 6 | 
 7 | options
 8 |   .option('-e, --entitysize [size]', 'The size of the entity [10mb].', '10mb')
 9 |   .option('-c, --chunksize [size]', 'The write chunksize in kb [32kb]', '32kb')
10 |   .option('-r, --runs [runs]', 'How many times to run the benchmarks [10]', 10)
11 |   .option('-i, --iterations [iterations]', 'The minimum amount of iterations for each run. [10]', 10)
12 |   .parse(process.argv);
13 | 
14 | options.runs       = parseInt(options.runs, 10);
15 | options.iterations = parseInt(options.iterations, 10);
16 | 
17 | console.log('Options:');
18 | console.log('  Entity Size         : %s', helper.toHuman(options.entitysize));
19 | console.log('  Chunk Size          : %s', helper.toHuman(options.chunksize));
20 | console.log('  Runs                : %s', options.runs);
21 | console.log('  Iterations per run  : %s', options.iterations);
22 | console.log('');
23 | 
24 | var start = Date.now();
25 | 
26 | var results = {};
27 | var suite = new uubench.Suite({
28 |   iterations: options.iterations,
29 |   type: 'fixed',
30 |   result: function(name, stats) {
31 |     var seconds  = (stats.elapsed / 1000);
32 |     var mb       = helper.toUnit('mb', options.entitysize);
33 |     var mbPerSec = helper.round(((stats.iterations * mb) / seconds), 2);
34 | 
35 |     results[name] = results[name] || {iterations: 0, series: []};
36 |     results[name].iterations += stats.iterations;
37 |     results[name].series.push(mbPerSec);
38 |   },
39 |   done: function() {
40 |     process.stdout.write('.');
41 |     if (--options.runs) return suite.run();
42 | 
43 |     var duration = helper.round((Date.now() - start) / 1000, 1);
44 |     console.log('\nBenchmark took: %d seconds\n', duration);
45 | 
46 |     for (var name in results) {
47 |       var result = results[name];
48 |       var series = result.series;
49 |       series.sort(function(a, b) {
50 |         return b - a;
51 |       });
52 | 
53 |       var percentile = 95;
54 |       var speed      = helper.round(helper.quantile(series, percentile / 100), 2)
55 |       console.log(
56 |         '%s: %s mb/sec (%d% of %d iterations)',
57 |         name,
58 |         speed,
59 |         percentile,
60 |         result.iterations
61 |       );
62 |     }
63 |   },
64 | });
65 | 
66 | var boundary = helper.boundary();
67 | var buffer = helper.multipartMessage(boundary, options.entitysize);
68 | var chunkSize = helper.toBytes(options.chunksize);
69 | 
70 | var parsers = helper.parsers();
71 | for (var name in parsers) {
72 |   (function(name) {
73 |     suite.bench(name, function(next) {
74 |       var write = parsers[name](boundary, next);
75 | 
76 |       for (var i = 0; i < buffer.length; i += chunkSize) {
77 |         var end = (i + chunkSize < buffer.length)
78 |           ? i + chunkSize
79 |           : buffer.length;
80 | 
81 |         write(buffer.slice(i, end));
82 |       }
83 |     });
84 |   })(name);
85 | }
86 | 
87 | suite.run();
88 | 


--------------------------------------------------------------------------------
/benchmark/helper.js:
--------------------------------------------------------------------------------
  1 | var _     = require('underscore');
  2 | var fs    = require('fs');
  3 | var units = {
  4 |   'gb'    : Math.pow(1024, 3),
  5 |   'mb'    : Math.pow(1024, 2),
  6 |   'kb'    : Math.pow(1024, 1),
  7 |   'bytes' : Math.pow(1024, 0),
  8 | };
  9 | 
 10 | exports.toBytes = function(str) {
 11 |   if (typeof str === 'number') return str;
 12 | 
 13 |   var bytes = str.replace(/^([\d.]+)(.*)/i, function(m, size, unit) {
 14 |     size = parseFloat(size, 10);
 15 | 
 16 |     switch (unit) {
 17 |       case 'g'  :
 18 |       case 'gb' : return size * Math.pow(1024, 2);
 19 |       case 'm'  :
 20 |       case 'mb' : return size * Math.pow(1024, 2);
 21 |       case 'k'  :
 22 |       case 'kb' : return size * Math.pow(1024, 1);
 23 |       case 'b'  : return size * Math.pow(1024, 0);
 24 |       default   : throw new Error('Unknown size unit: "' + unit + '"');
 25 |     }
 26 |   });
 27 | 
 28 |   return parseInt(bytes, 10);
 29 | };
 30 | 
 31 | exports.toUnit = function(unit, size) {
 32 |   var bytes = this.toBytes(size);
 33 |   var limit = units[unit];
 34 | 
 35 |   return (bytes / limit);
 36 | };
 37 | 
 38 | exports.toHuman = function(size) {
 39 |   size = (typeof size === 'string')
 40 |     ? size = this.toBytes(size)
 41 |     : size;
 42 | 
 43 |   for (var unit in units) {
 44 |     var limit = units[unit];
 45 |     if (size < limit) continue;
 46 | 
 47 |     size = (size / limit)
 48 |       .toFixed(1)
 49 |       .replace(/\.0$/, '');
 50 | 
 51 |     return size + ' ' + unit;
 52 |   }
 53 | };
 54 | 
 55 | exports.boundary = function() {
 56 |   return '-----------------------------168072824752491622650073';
 57 | };
 58 | 
 59 | exports.multipartMessage = function(boundary, size) {
 60 |   size = this.toBytes(size);
 61 | 
 62 |   var head =
 63 |         '--'+boundary+'\r\n'
 64 |       + 'content-disposition: form-data; name="field1"\r\n'
 65 |       + '\r\n'
 66 |     , tail = '\r\n--'+boundary+'--\r\n'
 67 |     , buffer = new Buffer(size);
 68 | 
 69 |   buffer.write(head, 'ascii', 0);
 70 |   buffer.write(tail, 'ascii', buffer.length - tail.length);
 71 |   return buffer;
 72 | };
 73 | 
 74 | exports.parsers = function() {
 75 |   var dir     = __dirname + '/parsers';
 76 |   var parsers = {};
 77 | 
 78 |   fs
 79 |     .readdirSync(dir)
 80 |     .filter(function(name) {
 81 |       return /\.js$/.test(name);
 82 |     })
 83 |     .forEach(function(file) {
 84 |       var parser = require(dir + '/' + file);
 85 |       var name = file.replace(/\.js$/, '');
 86 | 
 87 |       parsers[name] = parser;
 88 |     });
 89 | 
 90 |   return parsers;
 91 | };
 92 | 
 93 | // From: https://gist.github.com/642690
 94 | (function(uustats){
 95 |   uustats.sdev = function(series) {
 96 |     return Math.sqrt(uustats.variance(series));
 97 |   };
 98 | 
 99 |   uustats.variance = function(series) {
100 |     var t = 0, squares = 0, len = series.length;
101 | 
102 |     for (var i=0; i<len; i++) {
103 |       var obs = series[i];
104 |       t += obs;
105 |       squares += Math.pow(obs, 2);
106 |     }
107 |     return (squares/len) - Math.pow(t/len, 2);
108 |   },
109 | 
110 |   uustats.mean = function(series) {
111 |     var t = 0, len = series.length;
112 | 
113 |     for (var i=0; i<len; i++) {
114 |       t += series[i];
115 |     }
116 |     return t / Math.max(len, 1);
117 |   }
118 | 
119 |   uustats.summary = function(series) {
120 |     var q = uustats.quantile,
121 |         a = series.slice(0).sort(function(a, b) { return a - b });
122 | 
123 |     return {
124 |       min: a[0],
125 |       p25: q(a, 0.25),
126 |       med: q(a, 0.5),
127 |       p75: q(a, 0.75),
128 |       max: a[a.length - 1],
129 |       p10: q(a, 0.1),
130 |       p90: q(a, 0.9),
131 |       avg: uustats.mean(a)
132 |     }
133 |   };
134 | 
135 |   uustats.quantile = function(series, q) {
136 |     var len = series.length,
137 |         pos = q * (len - 1),
138 |         t   = Math.ceil(pos),
139 |         f   = t - 1;
140 | 
141 |     if (f < 0) { return series[0] }
142 |     if (t >= len) { return series[len - 1] }
143 |     return series[f] * (t - pos) + series[t] * (pos - f);
144 |   };
145 | 
146 |   uustats.round = function(x, n) {
147 |     return Math.round(x*Math.pow(10, n))/Math.pow(10, n);
148 |   };
149 | })(typeof exports !== 'undefined' ? exports : window.uustats = {});
150 | 


--------------------------------------------------------------------------------
/benchmark/parsers/formidable.js:
--------------------------------------------------------------------------------
 1 | var MultipartParser = require('formidable/lib/multipart_parser').MultipartParser;
 2 | 
 3 | module.exports = function(boundary, next) {
 4 |   var parser = new MultipartParser();
 5 |   parser.onEnd = next;
 6 |   parser.initWithBoundary(boundary);
 7 | 
 8 |   return parser.write.bind(parser);
 9 | };
10 | 


--------------------------------------------------------------------------------
/benchmark/parsers/multipart_parser.js:
--------------------------------------------------------------------------------
1 | var MultipartParser = require('../../index');
2 | 
3 | module.exports = function(boundary, next) {
4 |   var parser = MultipartParser.create(boundary);
5 |   parser.on('end', next);
6 |   return parser.write.bind(parser);
7 | };
8 | 


--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
1 | module.exports = require('./lib/multipart_parser.js');
2 | 


--------------------------------------------------------------------------------
/lib/multipart_parser.js:
--------------------------------------------------------------------------------
  1 | var EventEmitter = require('events').EventEmitter;
  2 | var util         = require('util');
  3 | var Part         = require('./part');
  4 | 
  5 | /* Tokens as defined by rfc 2616. Also lowercases them.
  6 |  *        token       = 1*<any CHAR except CTLs or separators>
  7 |  *     separators     = "(" | ")" | "<" | ">" | "@"
  8 |  *                    | "," | ";" | ":" | "\" | <">
  9 |  *                    | "/" | "[" | "]" | "?" | "="
 10 |  *                    | "{" | "}" | SP | HT
 11 |  *
 12 |  * From Ryan Dahl's http_parser.c
 13 |  */
 14 | var TOKENS = [
 15 | /*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
 16 |         0,       0,       0,       0,       0,       0,       0,       0,
 17 | /*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
 18 |         0,       0,       0,       0,       0,       0,       0,       0,
 19 | /*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
 20 |         0,       0,       0,       0,       0,       0,       0,       0,
 21 | /*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
 22 |         0,       0,       0,       0,       0,       0,       0,       0,
 23 | /*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
 24 |        ' ',      '!',     '"',     '#',     '$',     '%',     '&',    '\'',
 25 | /*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
 26 |         0,       0,      '*',     '+',      0,      '-',     '.',     '/',
 27 | /*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
 28 |        '0',     '1',     '2',     '3',     '4',     '5',     '6',     '7',
 29 | /*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
 30 |        '8',     '9',      0,       0,       0,       0,       0,       0,
 31 | /*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
 32 |         0,      'a',     'b',     'c',     'd',     'e',     'f',     'g',
 33 | /*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
 34 |        'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
 35 | /*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
 36 |        'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
 37 | /*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
 38 |        'x',     'y',     'z',      0,       0,       0,      '^',     '_',
 39 | /*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
 40 |        '`',     'a',     'b',     'c',     'd',     'e',     'f',     'g',
 41 | /* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
 42 |        'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
 43 | /* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
 44 |        'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
 45 | /* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
 46 |        'x',     'y',     'z',      0,      '|',     '}',     '~',       0 ];
 47 | 
 48 | var LF       = 10;
 49 | var CR       = 13;
 50 | var COLON    = 58;
 51 | var DASH     = 45;
 52 | var CRLF     = new Buffer('\r\n');
 53 | var DASH_END = new Buffer('--\r\n');
 54 | 
 55 | module.exports = MultipartParser;
 56 | util.inherits(MultipartParser, EventEmitter);
 57 | function MultipartParser() {
 58 |   EventEmitter.call(this);
 59 | 
 60 |   this.writable            = false;
 61 |   this._state              = 'NO_BOUNDARY';
 62 |   this._boundary           = null;
 63 |   this._preamble           = true;
 64 |   this._counter            = 0;
 65 |   this._marker             = 0;
 66 |   this._offset             = 0;
 67 |   this._part               = null;
 68 |   this._headerBufferLimit  = 4 * 1024;
 69 |   this._headerBufferLength = 0;
 70 |   this._headerField        = '';
 71 |   this._headerValue        = '';
 72 |   this._boundaryChars      = {};
 73 | }
 74 | 
 75 | 
 76 | MultipartParser.create = function(boundary) {
 77 |   var instance = new this();
 78 |   instance.boundary(boundary);
 79 |   return instance;
 80 | };
 81 | 
 82 | MultipartParser.prototype.boundary = function(boundary) {
 83 |   // Last 3 bytes are used for lookbehind
 84 |   this._boundary = new Buffer('\r\n--' + boundary + '???');
 85 |   this._state    = 'PREAMBLE';
 86 |   this.writable = true;
 87 | 
 88 |   this._boundaryChars = {};
 89 |   for (var i = 0; i < this._boundary.length - 3; i++) {
 90 |     this._boundaryChars[this._boundary[i]] = true;
 91 |   }
 92 | };
 93 | 
 94 | 
 95 | MultipartParser.prototype.write = function(buffer) {
 96 |   if (!this.writable) {
 97 |     throw this._error('NotWritable', 'Bad state: ' + this._state);
 98 |   }
 99 | 
100 |   var i    = 0;
101 |   var byte = buffer[i];
102 | 
103 |   while (true) {
104 |     switch (this._state) {
105 |       case 'PREAMBLE':
106 |         switch (byte) {
107 |           case CR:
108 |             this._state   = 'BOUNDARY';
109 |             this._marker  = 0;
110 |             break;
111 |           case DASH:
112 |             // As per RFC-1341 we have to accept this, but complying clients
113 |             // should not generate this.
114 |             this._state  = 'BOUNDARY';
115 |             this._marker = 2;
116 |             break;
117 |         }
118 |         break;
119 |       case 'BOUNDARY':
120 |         if (byte !== this._boundary[++this._marker]) {
121 |           this._state  = 'BOUNDARY_MISMATCH';
122 |           continue;
123 |         }
124 | 
125 |         if (this._marker === this._boundary.length - 4) {
126 |           this._state = 'BOUNDARY_END';
127 |         }
128 |         break;
129 |       case 'BOUNDARY_END':
130 |         this._counter                  = 0;
131 |         this._boundary[++this._marker] = byte;
132 | 
133 |         switch (byte) {
134 |           case CR:
135 |             this._state = 'BOUNDARY_LINE_END';
136 |             break;
137 |           case DASH:
138 |             if (this._preamble) {
139 |               this._state = 'PREAMBLE';
140 |               break;
141 |             }
142 | 
143 |             this._state = 'BOUNDARY_DASH_END';
144 |             break;
145 |           default:
146 |             this._state = 'BOUNDARY_MISMATCH';
147 |             continue;
148 |         }
149 |         break;
150 |       case 'BOUNDARY_LINE_END':
151 |         switch (byte) {
152 |           case LF:
153 |             if (this._part) this._part.end();
154 | 
155 |             this._preamble = false;
156 |             this._state    = 'HEADER_FIELD';
157 |             this._counter  = 0;
158 |             this._marker   = 0;
159 |             this._part     = new Part();
160 |             break
161 |           default:
162 |             this._state = 'BOUNDARY_MISMATCH';
163 |             continue;
164 |         }
165 |         break;
166 |       case 'BOUNDARY_DASH_END':
167 |         if (byte !== DASH_END[++this._counter]) {
168 |           this._state = 'BOUNDARY_MISMATCH';
169 |           continue;
170 |         }
171 | 
172 |         if (this._counter === DASH_END.length - 1) {
173 |           this._part.end();
174 |           this.emit('end');
175 |           this._state = 'EPILOGUE';
176 |           break;
177 |         }
178 | 
179 |         this._boundary[++this._marker] = byte;
180 |         break;
181 |       case 'BOUNDARY_MISMATCH':
182 |         if (this._preamble) {
183 |           this._state = 'PREAMBLE';
184 |           continue;
185 |         }
186 | 
187 |         this._part.write(this._boundary, 0, this._marker);
188 |         this._state = 'PART_BODY';
189 |         continue;
190 |       case 'HEADER_FIELD':
191 |         switch (byte) {
192 |           case COLON:
193 |             this._state = 'HEADER_VALUE';
194 |             break
195 |           case CR:
196 |             if (this._headerField) {
197 |               this._emitError('InvalidHeaderFieldToken', byte);
198 |               return;
199 |             }
200 | 
201 |             this._state = 'HEADERS_END';
202 |             break
203 |           default:
204 |             var character = TOKENS[byte];
205 |             if (!character) {
206 |               this._emitError('InvalidHeaderFieldToken', byte);
207 |               return;
208 |             }
209 | 
210 |             if (++this._headerBufferLength > this._headerBufferLimit) {
211 |               this._state = 'HEADER_BUFFER_OVERFLOW';
212 |               continue;
213 |             }
214 | 
215 |             this._headerField += character;
216 |             break;
217 |         }
218 |         break;
219 |       case 'HEADERS_END':
220 |         switch (byte) {
221 |           case LF:
222 |             this._marker = i;
223 |             this._state  = 'PART_BODY';
224 |             this.emit('part', this._part);
225 |             break;
226 |           default:
227 |             this._emitError('InvalidHeaderFieldToken', byte);
228 |             return;
229 |             break;
230 |         }
231 |         break;
232 |         break;
233 |       case 'HEADER_VALUE':
234 |         if (byte === CRLF[this._counter++]) {
235 |           if (this._counter === CRLF.length) {
236 |             this._part.addHeader(this._headerField, this._headerValue.trim());
237 |             this._headerField = '';
238 |             this._headerValue = '';
239 |             this._counter     = 0;
240 |             this._state       = 'HEADER_FIELD';
241 |           }
242 |           break;
243 |         }
244 | 
245 |         this._counter = 0;
246 | 
247 |         if (++this._headerBufferLength > this._headerBufferLimit) {
248 |           this._state = 'HEADER_BUFFER_OVERFLOW';
249 |           continue;
250 |         }
251 | 
252 |         this._headerValue += String.fromCharCode(byte);
253 |         break;
254 |       case 'PART_BODY':
255 |         this._marker = i;
256 | 
257 |         var boundaryLength = this._boundary.length - 1;
258 |         var bufferEnd      = buffer.length;
259 |         var boundaryChars  = this._boundaryChars;
260 | 
261 |         do {
262 |           i += boundaryLength;
263 |         } while (i < bufferEnd && !(buffer[i] in boundaryChars))
264 |         i -= boundaryLength;
265 | 
266 |         this._offset += i - this._marker;
267 | 
268 |         while(true) {
269 |           if (byte === CR) {
270 |             this._part.write(buffer, this._marker, i);
271 |             this._marker = 0;
272 |             this._state  = 'BOUNDARY';
273 |             break;
274 |           }
275 | 
276 |           this._offset++;
277 | 
278 |           if ((byte = buffer[++i]) === undefined) {
279 |             this._part.write(buffer, this._marker, i);
280 |             break;
281 |           }
282 |         }
283 |         break;
284 |       case 'EPILOGUE':
285 |         return;
286 |       case 'HEADER_BUFFER_OVERFLOW':
287 |         this._emitError(
288 |           'HeaderBufferOverflow',
289 |           'Max buffer size: ' + this._headerBufferLimit + 'bytes'
290 |         );
291 |         return;
292 |       case 'NO_BOUNDARY':
293 |         this._emitError('NoBoundary', 'No boundary configured for parser.');
294 |         return;
295 |       default:
296 |         this._emitError('InvalidParserState', 'Unknown state: ' + this._state);
297 |         return;
298 |     }
299 | 
300 |     if ((byte = buffer[++i]) === undefined) break;
301 |     this._offset++;
302 |   }
303 | };
304 | 
305 | MultipartParser.prototype._error = function(type, reason) {
306 |   if (typeof reason === 'number') {
307 |     var byte      = reason;
308 |     var character = String.fromCharCode(byte);
309 | 
310 |     reason =
311 |       'Got byte: ' + byte + ' / ' + JSON.stringify(character) + ' ' +
312 |       'at offset: ' + this._offset;
313 |   }
314 | 
315 |   return new Error('MultipartParser.' + type + ': ' + reason);
316 | };
317 | 
318 | MultipartParser.prototype._emitError = function(type, reason) {
319 |   var err       = this._error(type, reason);
320 |   this.writable = false;
321 |   this.emit('error', err);
322 | };
323 | 
324 | MultipartParser.prototype.end = function() {
325 |   this.emit('end');
326 | };
327 | 


--------------------------------------------------------------------------------
/lib/part.js:
--------------------------------------------------------------------------------
 1 | var Stream = require('stream').Stream;
 2 | var util   = require('util');
 3 | 
 4 | module.exports = Part;
 5 | util.inherits(Part, Stream);
 6 | function Part() {
 7 |   Stream.call(this);
 8 | 
 9 |   this.headers  = {};
10 |   this.readable = true;
11 | }
12 | 
13 | Part.prototype.addHeader = function(field, value) {
14 |   this.headers[field] = value;
15 | };
16 | 
17 | Part.prototype.write = function(buffer, start, end) {
18 |   this.emit('data', buffer.slice(start, end));
19 | };
20 | 
21 | Part.prototype.end = function() {
22 |   this.emit('end');
23 | };
24 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "author": "Felix Geisendörfer <felix@debuggable.com> (http://debuggable.com/)",
 3 |   "name": "multipart-parser",
 4 |   "description": "A fast and streaming multipart parser.",
 5 |   "version": "0.0.0",
 6 |   "homepage": "https://github.com/felixge/node-multipart-parser",
 7 |   "repository": {
 8 |     "type": "git",
 9 |     "url": "git://github.com/felixge/node-multipart-parser.git"
10 |   },
11 |   "main": "./index.js",
12 |   "engines": {
13 |     "node": "*"
14 |   },
15 |   "dependencies": {},
16 |   "devDependencies": {
17 |     "fast-or-slow": "0.0.5",
18 |     "far": "0.0.7",
19 |     "commander": "0.2.0",
20 |     "uubench": "0.0.1"
21 |   },
22 |   "scripts": {
23 |     "test": "make test"
24 |   }
25 | }
26 | 


--------------------------------------------------------------------------------
/rfc/1341-the-multipart-content-type.html:
--------------------------------------------------------------------------------
  1 | <HTML>
  2 | <HEAD>
  3 | <TITLE>RFC1341(MIME) : 7    The Multipart content type </TITLE>
  4 | <NEXTID N="z4">
  5 | </HEAD>
  6 | <BODY>
  7 | <H1><A
  8 | NAME="z3" HREF="0_TableOfContents.html">7.2</A>  The Multipart Content-Type</H1>In the case of multiple part messages,
  9 | in which one or  more  different
 10 | sets  of  data  are  combined in
 11 | a single body, a  "multipart" Content-Type
 12 | field must appear in  the  entity's
 13 |  header. The body must then contain
 14 | one or more "body parts,"  each
 15 | preceded by an encapsulation boundary,
 16 | and the last one  followed  by a
 17 |  closing boundary.  Each part starts
 18 | with an  encapsulation  boundary,
 19 | and  then  contains  a  body  part
 20 |  consisting  of   header area, a
 21 | blank line, and a body area.  Thus
 22 | a body part is similar to an RFC
 23 | 822 message in syntax,  but different
 24 | in meaning. <P>
 25 | A body part is NOT to be interpreted
 26 | as  actually  being  an  RFC  822
 27 | message.   To  begin  with,  NO header
 28 | fields are  actually required in
 29 | body parts.  A body  part  that starts
 30 |  with  a blank line, therefore,
 31 | is allowed and is a body part  for
 32 | which all default values are to be
 33 | assumed.  In  such  a  case,  the
 34 | absence  of  a Content-Type header
 35 | field implies  that the encapsulation
 36 | is plain  US-ASCII  text.   <A
 37 | NAME="z2">The only
 38 |  header  fields  that have defined
 39 | meaning for body parts are  those
 40 | the names of which begin with "Content-".
 41 |   All  other  header fields  are
 42 |  generally  to be ignored in body
 43 | parts.</A>  Although  they should  generally
 44 |  be   retained  in   mail  processing,
 45 |  they may be discarded by gateways
 46 | if necessary.  Such other fields
 47 | are permitted to appear in body parts
 48 |  but  should not  be  depended on.
 49 | "X-" fields may be created for 
 50 | experimental or private purposes,
 51 | with the recognition that  the information
 52 | they contain may be lost at some
 53 | gateways. <P>
 54 | The distinction between an RFC 822
 55 | message and a  body  part  is  subtle,
 56 | but  important.  A gateway between
 57 | Internet and  X.400 mail, for example,
 58 | must be able to tell the difference
 59 |  between  a  body part that contains
 60 | an image and a body part  that contains
 61 | an encapsulated message, the body
 62 | of which  is  an  image.   In order
 63 | to represent the latter, the body
 64 | part  must have "Content-Type: message",
 65 | and its body  (after  the  blank
 66 | line)  must be the encapsulated message,
 67 | with its own  "Content-Type: image"
 68 | header  field.   The  use of  similar
 69 |  syntax facilitates the conversion
 70 | of messages to body parts,  and
 71 | vice versa, but the distinction between
 72 | the two must be  understood  by
 73 | implementors. (For the special case
 74 | in which  all parts actually are
 75 | messages, a "digest" subtype is 
 76 | also  defined.) <P>
 77 | As stated previously, each  body
 78 | part  is  preceded  by  an  encapsulation
 79 | boundary.  The encapsulation boundary
 80 | MUST NOT  appear inside any of the
 81 | encapsulated parts.   Thus,  it is
 82 |  crucial  that  the  composing agent
 83 |  be  able to choose and  specify
 84 | the unique boundary that will separate
 85 | the parts. <P>
 86 | All present and future subtypes of
 87 | the "multipart" type must  use an
 88 |  identical  syntax.  Subtypes may
 89 |  differ  in their  semantics, and
 90 | may impose additional restrictions
 91 | on syntax,  but  must  conform to
 92 | the required syntax for the multipart
 93 |  type.  This requirement ensures
 94 | that  all  conformant  user  agents
 95 | will  at least be able to recognize
 96 | and separate the  parts of any multipart
 97 |  entity,  even  of  an unrecognized
 98 |  subtype. <P>
 99 | As stated in the definition of the
100 | Content-Transfer-Encoding  field,
101 | no encoding other than "7bit", "8bit",
102 | or "binary" is  permitted for entities
103 | of type "multipart".   The  multipart
104 |  delimiters  and  header fields
105 | are always 7-bit ASCII in any  case,
106 | and data within the body parts can
107 | be  encoded  on  a  part-by-part
108 | basis,  with  Content-Transfer-Encoding
109 | fields  for each appropriate body
110 | part. <P>
111 | Mail gateways, relays, and other
112 | mail  handling  agents  are  commonly
113 | known  to alter the top-level header
114 | of an RFC 822  message.   In particular,
115 | they frequently  add,  remove,  or
116 |  reorder  header  fields.   Such
117 | alterations  are explicitly  forbidden
118 | for the body part headers embedded
119 | in  the  bodies  of messages of
120 | type "multipart." 
121 | <H2><A
122 | NAME="z0">7.2.1     Multipart:  The common
123 | syntax</A></H2>All subtypes of "multipart" share
124 | a common  syntax,  defined  in this
125 |  section.   A simple example of a
126 | multipart message  also appears
127 | in this section.  An example of a
128 | more  complex  multipart message
129 | is given in Appendix C. <P>
130 | The Content-Type field for multipart
131 | entities requires  one  parameter,
132 |  "boundary",   which  is  used to
133 |  specify  the  encapsulation boundary.
134 |   The  encapsulation boundary   is
135 |  defined   as  a line  consisting
136 |  entirely  of  two hyphen  characters
137 | ("-", decimal code 45) followed by
138 |  the  boundary  parameter value
139 | from the Content-Type header field.
140 | <P>
141 | NOTE:  The hyphens are  for  rough
142 | compatibility  with  the  earlier
143 | RFC  934  method  of message encapsulation,
144 | and for  ease   of   searching 
145 | for   the   boundaries    in   some
146 |  implementations.  However, it should
147 | be noted that multipart  messages
148 |  are  NOT  completely compatible
149 |  with   RFC   934  encapsulations;
150 | in  particular,  they  do  not obey
151 | RFC 934  quoting conventions  for
152 | embedded  lines  that  begin  with
153 |  hyphens.   This  mechanism  was
154 | chosen  over  the  RFC  934  mechanism
155 | because the latter causes lines to
156 | grow with  each  level  of  quoting.
157 | The combination of this growth with
158 | the  fact that SMTP implementations
159 | sometimes  wrap  long  lines  made
160 | the  RFC 934 mechanism unsuitable
161 | for use in the event  that deeply-nested
162 | multipart structuring is ever desired.
163 | <P>
164 | Thus, a typical multipart Content-Type
165 | header  field  might  look like
166 | this: 
167 | <PRE>
168 |      Content-Type: multipart/mixed; 
169 |           boundary=gc0p4Jq0M2Yt08jU534c0p
170 |  
171 | </PRE>This indicates that the entity consists
172 | of  several  parts,  each itself
173 | with a structure that is syntactically
174 | identical  to an RFC 822 message,
175 | except that the header area might
176 | be  completely  empty,  and  that
177 | the parts are each preceded by 
178 | the line 
179 | <PRE>
180 |      --gc0p4Jq0M2Yt08jU534c0p
181 |  
182 | </PRE>Note that the  encapsulation  boundary
183 | must  occur  at  the  beginning
184 | of  a line, i.e., following a CRLF,
185 | and that that  initial CRLF is considered
186 | to be part of  the  encapsulation
187 |  boundary  rather  than  part  of
188 | the preceding part.    The  boundary
189 | must be followed immediately either
190 | by another CRLF  and the header
191 | fields for the next part, or by two
192 | CRLFs, in  which case there are
193 | no header fields for the next part
194 | (and  it is therefore assumed to
195 | be of Content-Type text/plain). <P>
196 | NOTE:   The  CRLF  preceding  the
197 | encapsulation   line   is  considered
198 | part  of  the boundary so that it
199 | is possible to  have a part that
200 | does not end with  a  CRLF  (line
201 |  break).  Body  parts that must
202 | be considered to end with line breaks,
203 |  therefore, should have two CRLFs
204 | preceding the encapsulation  line,
205 | the first of which is part of the
206 | preceding body part,  and the  second
207 | of  which  is  part  of  the  encapsulation
208 |  boundary. <P>
209 | The requirement that the encapsulation
210 | boundary begins  with  a  CRLF implies
211 |  that  the  body of a multipart entity
212 | must  itself begin with a CRLF before
213 | the first encapsulation line  --
214 |  that  is, if the "preamble" area
215 | is not used, the entity  headers
216 | must be followed by TWO CRLFs.  This
217 | is  indeed  how  such  entities
218 | should be composed.  A tolerant mail
219 | reading  program, however, may interpret
220 | a  body  of  type multipart  that
221 |  begins  with  an encapsulation line
222 | NOT initiated by a  CRLF  as  also
223 |  being  an encapsulation  boundary,
224 |  but  a  compliant  mail  sending
225 |  program must  not  generate  such
226 |  entities. <P>
227 | Encapsulation  boundaries  must not
228 |   appear   within   the  encapsulations,
229 | and  must  be no longer than 70 characters,
230 |  not counting the two leading hyphens.
231 | <P>
232 | The encapsulation boundary following
233 | the last body part is a  distinguished
234 | delimiter that indicates that no
235 | further body  parts will follow.
236 | Such a delimiter  is  identical to
237 |  the  previous  delimiters, with
238 | the addition of two more hyphens
239 |  at the end of the line: 
240 | <PRE>
241 |      --gc0p4Jq0M2Yt08jU534c0p-- 
242 | </PRE>There appears to be room for additional
243 | information prior to  the  first
244 | encapsulation  boundary  and following
245 | the final  boundary.  These areas
246 | should generally be left  blank,
247 | and  implementations  should  ignore
248 | anything that appears before  the
249 | first boundary or after the last
250 | one. <P>
251 | NOTE:  These "preamble" and "epilogue"
252 | areas  are  not  used  because of
253 | the lack of proper typing of these
254 | parts and the  lack  of  clear semantics
255 |  for  handling  these areas   at
256 |  gateways, particularly X.400 gateways.
257 | <P>
258 | NOTE:  Because encapsulation boundaries
259 | must not  appear  in  the  body
260 | parts  being  encapsulated,  a user
261 |  agent  must  exercise care to choose
262 | a unique boundary.  The boundary
263 |  in  the example above could have
264 | been the result of an algorithm 
265 | designed to produce boundaries with
266 | a very  low  probability  of already
267 |  existing in the data to be encapsulated
268 | without  having to prescan  the
269 |  data.   Alternate algorithms  might
270 |  result in more 'readable' boundaries
271 | for a recipient with an  old user
272 | agent, but would require  more  attention
273 |  to  the  possibility   that   the
274 |   boundary  might  appear  in  the
275 |  encapsulated part.   The  simplest
276 |  boundary possible   is  something
277 | like "---", with a closing boundary
278 | of "-----". <P>
279 | As a very simple example, the  following
280 | multipart  message  has  two  parts,
281 | both  of  them  plain  text,  one
282 | of them  explicitly typed and one
283 | of them implicitly typed: 
284 | <PRE>
285 |      From: Nathaniel Borenstein &lt;nsb@bellcore.com> 
286 |      To:  Ned Freed &lt;ned@innosoft.com> 
287 |      Subject: Sample message 
288 |      MIME-Version: 1.0 
289 |      Content-type: multipart/mixed; boundary="simple 
290 |      boundary" 
291 | 
292 |      This is the preamble.  It is to be ignored, though it 
293 |      is a handy place for mail composers to include an 
294 |      explanatory note to non-MIME compliant readers. 
295 |      --simple boundary 
296 | 
297 |      This is implicitly typed plain ASCII text. 
298 |      It does NOT end with a linebreak. 
299 |      --simple boundary 
300 |      Content-type: text/plain; charset=us-ascii 
301 | 
302 |      This is explicitly typed plain ASCII text. 
303 |      It DOES end with a linebreak. 
304 | 
305 |      --simple boundary-- 
306 |      This is the epilogue.  It is also to be ignored.
307 | 
308 | </PRE>The use of a Content-Type of multipart
309 | in a body part within  another multipart
310 |  entity  is explicitly allowed. 
311 |  In such  cases, for obvious reasons,
312 | care must  be  taken  to ensure 
313 | that  each  nested  multipart entity
314 |  must  use a different  boundary
315 | delimiter. See Appendix C for an
316 | example of  nested  multipart entities.
317 | <P>
318 | The use of the multipart Content-Type
319 | with  only  a  single  body  part
320 | may  be  useful  in  certain  contexts,
321 | and  is  explicitly permitted. <P>
322 | The only mandatory parameter for
323 | the multipart  Content-Type  is
324 | the  boundary  parameter,  which
325 | consists  of  1  to 70  characters
326 | from a set of characters known to
327 | be very  robust  through  email
328 | gateways,  and  NOT ending with white
329 | space.  (If a boundary appears to
330 | end with white  space,  the  white
331 |  space  must be presumed to have
332 | been added by a gateway, and  should
333 | be  deleted.)   It  is  formally
334 | specified  by  the  following BNF:
335 | 
336 | <PRE>
337 | boundary := 0*69&lt;bchars> bcharsnospace 
338 | 
339 | bchars := bcharsnospace / " " 
340 | 
341 | bcharsnospace :=    DIGIT / ALPHA / "'" / "(" / ")" / "+"  / 
342 | "_" 
343 |                / "," / "-" / "." / "/" / ":" / "=" / "?" 
344 | 
345 | </PRE>Overall, the body of a multipart
346 | entity may be specified  as  follows:
347 | 
348 | <PRE>
349 | multipart-body := preamble 1*encapsulation 
350 |                close-delimiter epilogue 
351 | 
352 | encapsulation := delimiter CRLF body-part 
353 | 
354 | delimiter := CRLF "--" boundary   ; taken from  Content-Type 
355 | field. 
356 |                                ;   when   content-type    is 
357 | multipart 
358 |                              ; There must be no space 
359 |                              ; between "--" and boundary. 
360 | 
361 | close-delimiter := delimiter "--" ; Again, no  space  before 
362 | "--" 
363 | 
364 | preamble :=  *text                  ;  to  be  ignored  upon 
365 | receipt. 
366 | 
367 | epilogue :=  *text                  ;  to  be  ignored  upon 
368 | receipt. 
369 | 
370 | body-part = &lt;"message" as defined in RFC 822, 
371 |          with all header fields optional, and with the 
372 |          specified delimiter not occurring anywhere in 
373 |          the message body, either on a line by itself 
374 |          or as a substring anywhere.  Note that the 
375 |          semantics of a part differ from the semantics 
376 |          of a message, as described in the text.> 
377 | 
378 | </PRE>NOTE:  Conspicuously missing from
379 | the multipart  type  is  a  notion
380 | of  structured,  related body parts.
381 | In general, it  seems premature
382 | to try to  standardize  interpart
383 | structure  yet.  It is recommended
384 | that those wishing to provide a more
385 |  structured or integrated multipart
386 | messaging facility should  define
387 |  a   subtype  of  multipart  that
388 | is  syntactically  identical, but
389 | that  always  expects  the  inclusion
390 | of  a  distinguished part that can
391 | be used to specify the structure
392 |  and integration of the other parts,
393 | probably  referring  to  them by
394 |  their Content-ID field.  If this
395 | approach is used,  other implementations
396 | will not recognize  the  new  subtype,
397 |  but  will  treat it as the primary
398 | subtype (multipart/mixed)  and will
399 | thus be able to show the user the
400 | parts  that  are  recognized. 
401 | <H2>7.2.2     The Multipart/mixed (primary)
402 | subtype</H2>The primary subtype for multipart,
403 | "mixed", is intended  for  use when
404 |  the body parts are independent and
405 | intended to be  displayed  serially.
406 |  Any  multipart   subtypes   that
407 |  an  implementation does not recognize
408 | should be treated as being  of subtype
409 | "mixed". 
410 | <H2>7.2.3     The Multipart/alternative
411 | subtype </H2>The multipart/alternative type is
412 | syntactically identical to  multipart/mixed,
413 |  but  the  semantics  are  different.
414 |  In  particular, each of the parts
415 | is an "alternative" version of 
416 | the same information.  User agents
417 | should recognize that the  content
418 | of the various parts are interchangeable.
419 | The  user  agent  should  either
420 | choose  the  "best" type based on
421 | the  user's environment and preferences,
422 | or offer  the  user  the  available
423 | alternatives.  In general, choosing
424 | the best type  means displaying
425 | only the LAST part that can  be displayed.
426 |  This  may be used, for example,
427 | to send mail in a fancy text  format
428 | in such  a  way  that it  can  easily
429 |  be  displayed  anywhere: 
430 | <PRE>
431 | From:  Nathaniel Borenstein &lt;nsb@bellcore.com> 
432 | To: Ned Freed &lt;ned@innosoft.com> 
433 | Subject: Formatted text mail 
434 | MIME-Version: 1.0 
435 | Content-Type: multipart/alternative; boundary=boundary42 
436 | 
437 | 
438 | --boundary42 
439 | Content-Type: text/plain; charset=us-ascii 
440 | 
441 | ...plain text version of message goes here.... 
442 | 
443 | --boundary42 
444 | Content-Type: text/richtext 
445 | 
446 | .... richtext version of same message goes here ... 
447 | --boundary42 
448 | Content-Type: text/x-whatever 
449 | 
450 | .... fanciest formatted version of same  message  goes  here 
451 | ... 
452 | --boundary42-- 
453 | 
454 | </PRE>In this example, users  whose  mail
455 | system  understood  the  "text/x-whatever"
456 | format  would see only the fancy
457 | version,  while other users would
458 | see only the richtext or plain  text
459 |  version, depending on the capabilities
460 | of their system. <P>
461 | In general, user agents that  compose
462 | multipart/alternative  entities
463 | should place the body parts in increasing
464 | order of  preference, that is, with
465 | the  preferred  format  last.  For
466 |  fancy  text,  the sending user
467 | agent should put the plainest  format
468 | first and the richest format  last.
469 |  Receiving  user  agents  should
470 | pick  and  display  the last format
471 | they are  capable of  displaying.
472 |  In  the  case  where  one  of the
473 |  alternatives  is  itself  of type
474 |  "multipart" and contains  unrecognized
475 | sub-parts, the user agent may choose
476 | either  to  show that alternative,
477 | an earlier alternative, or both.
478 | <P>
479 | NOTE:  From an implementor's perspective,
480 | it might seem more  sensible  to
481 | reverse  this  ordering, and have
482 | the plainest  alternative last.
483 | However, placing the plainest alternative
484 |  first    is    the    friendliest
485 |  possible   option   when  mutlipart/alternative
486 | entities are viewed using a  non-MIME-
487 |  compliant mail reader.  While this
488 | approach does impose some  burden
489 | on  compliant  mail  readers,  interoperability
490 | with  older  mail  readers was deemed
491 | to be more important in this  case.
492 | <P>
493 | It may be the case  that  some  user
494 | agents,  if  they  can  recognize
495 | more than one of the formats, will
496 | prefer to offer  the user the choice
497 | of which format  to  view.   This
498 | makes  sense, for example, if mail
499 | includes both a nicely-formatted
500 |  image version and an easily-edited
501 | text  version.   What  is  most
502 | critical,  however, is that the user
503 | not automatically  be shown multiple
504 | versions of the  same  data.  Either
505 |  the  user  should  be shown the
506 | last recognized version or should
507 |  explicitly be given the choice.
508 | 
509 | <H2>7.2.4     The Multipart/digest subtype
510 | </H2>This document defines a "digest"
511 | subtype  of  the  multipart  Content-Type.
512 |  This  type  is  syntactically identical
513 |  to  multipart/mixed, but  the 
514 | semantics  are  different.   In 
515 | particular,  in a digest, the default
516 | Content-Type value for  a   body
517 |   part   is   changed   from    "text/plain"
518 |    to  "message/rfc822".  This 
519 | is  done  to allow a more readable
520 |  digest format that is largely 
521 | compatible  (except  for the  quoting
522 | convention) with RFC 934. <P>
523 | A digest in this format might,  then,
524 | look  something  like  this: 
525 | <PRE>
526 | From: Moderator-Address 
527 | MIME-Version: 1.0 
528 | Subject:  Internet Digest, volume 42 
529 | Content-Type: multipart/digest; 
530 |      boundary="---- next message ----" 
531 | 
532 | 
533 | ------ next message ---- 
534 | 
535 | From: someone-else 
536 | Subject: my opinion 
537 | 
538 | ...body goes here ... 
539 | 
540 | ------ next message ---- 
541 | 
542 | From: someone-else-again 
543 | Subject: my different opinion 
544 | 
545 | ... another body goes here... 
546 | 
547 | ------ next message ------
548 | 
549 | 
550 | </PRE>
551 | <H2><A
552 | NAME="z1">7.2.5     The Multipart/parallel
553 | subtype</A></H2>This document defines a "parallel"
554 | subtype of the  multipart  Content-Type.
555 |  This  type  is  syntactically identical
556 |  to  multipart/mixed, but  the 
557 | semantics  are  different.   In 
558 | particular,  in  a  parallel entity,
559 |  all  of the parts are  intended
560 | to be presented in parallel, i.e.,
561 |  simultaneously,  on  hardware and
562 |  software  that  are  capable of
563 | doing so.  Composing agents should
564 | be aware that many mail readers will
565 |  lack this capability and will show
566 | the parts serially in any  event.
567 | </BODY>
568 | </HTML>
569 | 


--------------------------------------------------------------------------------
/rfc/2047-message-header-extensions-for-non-ascii-text.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | 
  5 | 
  6 | 
  7 | Network Working Group                                           K. Moore
  8 | Request for Comments: 2047                       University of Tennessee
  9 | Obsoletes: 1521, 1522, 1590                                November 1996
 10 | Category: Standards Track
 11 | 
 12 | 
 13 |         MIME (Multipurpose Internet Mail Extensions) Part Three:
 14 |               Message Header Extensions for Non-ASCII Text
 15 | 
 16 | Status of this Memo
 17 | 
 18 |    This document specifies an Internet standards track protocol for the
 19 |    Internet community, and requests discussion and suggestions for
 20 |    improvements.  Please refer to the current edition of the "Internet
 21 |    Official Protocol Standards" (STD 1) for the standardization state
 22 |    and status of this protocol.  Distribution of this memo is unlimited.
 23 | 
 24 | Abstract
 25 | 
 26 |    STD 11, RFC 822, defines a message representation protocol specifying
 27 |    considerable detail about US-ASCII message headers, and leaves the
 28 |    message content, or message body, as flat US-ASCII text.  This set of
 29 |    documents, collectively called the Multipurpose Internet Mail
 30 |    Extensions, or MIME, redefines the format of messages to allow for
 31 | 
 32 |    (1) textual message bodies in character sets other than US-ASCII,
 33 | 
 34 |    (2) an extensible set of different formats for non-textual message
 35 |        bodies,
 36 | 
 37 |    (3) multi-part message bodies, and
 38 | 
 39 |    (4) textual header information in character sets other than US-ASCII.
 40 | 
 41 |    These documents are based on earlier work documented in RFC 934, STD
 42 |    11, and RFC 1049, but extends and revises them.  Because RFC 822 said
 43 |    so little about message bodies, these documents are largely
 44 |    orthogonal to (rather than a revision of) RFC 822.
 45 | 
 46 |    This particular document is the third document in the series.  It
 47 |    describes extensions to RFC 822 to allow non-US-ASCII text data in
 48 |    Internet mail header fields.
 49 | 
 50 | 
 51 | 
 52 | 
 53 | 
 54 | 
 55 | 
 56 | 
 57 | 
 58 | Moore                       Standards Track                     [Page 1]
 59 | 
 60 | RFC 2047               Message Header Extensions           November 1996
 61 | 
 62 | 
 63 |    Other documents in this series include:
 64 | 
 65 |    + RFC 2045, which specifies the various headers used to describe
 66 |      the structure of MIME messages.
 67 | 
 68 |    + RFC 2046, which defines the general structure of the MIME media
 69 |      typing system and defines an initial set of media types,
 70 | 
 71 |    + RFC 2048, which specifies various IANA registration procedures
 72 |      for MIME-related facilities, and
 73 | 
 74 |    + RFC 2049, which describes MIME conformance criteria and
 75 |      provides some illustrative examples of MIME message formats,
 76 |      acknowledgements, and the bibliography.
 77 | 
 78 |    These documents are revisions of RFCs 1521, 1522, and 1590, which
 79 |    themselves were revisions of RFCs 1341 and 1342.  An appendix in RFC
 80 |    2049 describes differences and changes from previous versions.
 81 | 
 82 | 1. Introduction
 83 | 
 84 |    RFC 2045 describes a mechanism for denoting textual body parts which
 85 |    are coded in various character sets, as well as methods for encoding
 86 |    such body parts as sequences of printable US-ASCII characters.  This
 87 |    memo describes similar techniques to allow the encoding of non-ASCII
 88 |    text in various portions of a RFC 822 [2] message header, in a manner
 89 |    which is unlikely to confuse existing message handling software.
 90 | 
 91 |    Like the encoding techniques described in RFC 2045, the techniques
 92 |    outlined here were designed to allow the use of non-ASCII characters
 93 |    in message headers in a way which is unlikely to be disturbed by the
 94 |    quirks of existing Internet mail handling programs.  In particular,
 95 |    some mail relaying programs are known to (a) delete some message
 96 |    header fields while retaining others, (b) rearrange the order of
 97 |    addresses in To or Cc fields, (c) rearrange the (vertical) order of
 98 |    header fields, and/or (d) "wrap" message headers at different places
 99 |    than those in the original message.  In addition, some mail reading
100 |    programs are known to have difficulty correctly parsing message
101 |    headers which, while legal according to RFC 822, make use of
102 |    backslash-quoting to "hide" special characters such as "<", ",", or
103 |    ":", or which exploit other infrequently-used features of that
104 |    specification.
105 | 
106 |    While it is unfortunate that these programs do not correctly
107 |    interpret RFC 822 headers, to "break" these programs would cause
108 |    severe operational problems for the Internet mail system.  The
109 |    extensions described in this memo therefore do not rely on little-
110 |    used features of RFC 822.
111 | 
112 | 
113 | 
114 | Moore                       Standards Track                     [Page 2]
115 | 
116 | RFC 2047               Message Header Extensions           November 1996
117 | 
118 | 
119 |    Instead, certain sequences of "ordinary" printable ASCII characters
120 |    (known as "encoded-words") are reserved for use as encoded data.  The
121 |    syntax of encoded-words is such that they are unlikely to
122 |    "accidentally" appear as normal text in message headers.
123 |    Furthermore, the characters used in encoded-words are restricted to
124 |    those which do not have special meanings in the context in which the
125 |    encoded-word appears.
126 | 
127 |    Generally, an "encoded-word" is a sequence of printable ASCII
128 |    characters that begins with "=?", ends with "?=", and has two "?"s in
129 |    between.  It specifies a character set and an encoding method, and
130 |    also includes the original text encoded as graphic ASCII characters,
131 |    according to the rules for that encoding method.
132 | 
133 |    A mail composer that implements this specification will provide a
134 |    means of inputting non-ASCII text in header fields, but will
135 |    translate these fields (or appropriate portions of these fields) into
136 |    encoded-words before inserting them into the message header.
137 | 
138 |    A mail reader that implements this specification will recognize
139 |    encoded-words when they appear in certain portions of the message
140 |    header.  Instead of displaying the encoded-word "as is", it will
141 |    reverse the encoding and display the original text in the designated
142 |    character set.
143 | 
144 | NOTES
145 | 
146 |    This memo relies heavily on notation and terms defined RFC 822 and
147 |    RFC 2045.  In particular, the syntax for the ABNF used in this memo
148 |    is defined in RFC 822, as well as many of the terminal or nonterminal
149 |    symbols from RFC 822 are used in the grammar for the header
150 |    extensions defined here.  Among the symbols defined in RFC 822 and
151 |    referenced in this memo are: 'addr-spec', 'atom', 'CHAR', 'comment',
152 |    'CTLs', 'ctext', 'linear-white-space', 'phrase', 'quoted-pair'.
153 |    'quoted-string', 'SPACE', and 'word'.  Successful implementation of
154 |    this protocol extension requires careful attention to the RFC 822
155 |    definitions of these terms.
156 | 
157 |    When the term "ASCII" appears in this memo, it refers to the "7-Bit
158 |    American Standard Code for Information Interchange", ANSI X3.4-1986.
159 |    The MIME charset name for this character set is "US-ASCII".  When not
160 |    specifically referring to the MIME charset name, this document uses
161 |    the term "ASCII", both for brevity and for consistency with RFC 822.
162 |    However, implementors are warned that the character set name must be
163 |    spelled "US-ASCII" in MIME message and body part headers.
164 | 
165 | 
166 | 
167 | 
168 | 
169 | 
170 | Moore                       Standards Track                     [Page 3]
171 | 
172 | RFC 2047               Message Header Extensions           November 1996
173 | 
174 | 
175 |    This memo specifies a protocol for the representation of non-ASCII
176 |    text in message headers.  It specifically DOES NOT define any
177 |    translation between "8-bit headers" and pure ASCII headers, nor is
178 |    any such translation assumed to be possible.
179 | 
180 | 2. Syntax of encoded-words
181 | 
182 |    An 'encoded-word' is defined by the following ABNF grammar.  The
183 |    notation of RFC 822 is used, with the exception that white space
184 |    characters MUST NOT appear between components of an 'encoded-word'.
185 | 
186 |    encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
187 | 
188 |    charset = token    ; see section 3
189 | 
190 |    encoding = token   ; see section 4
191 | 
192 |    token = 1*<Any CHAR except SPACE, CTLs, and especials>
193 | 
194 |    especials = "(" / ")" / "<" / ">" / "@" / "," / ";" / ":" / "
195 |                <"> / "/" / "[" / "]" / "?" / "." / "="
196 | 
197 |    encoded-text = 1*<Any printable ASCII character other than "?"
198 |                      or SPACE>
199 |                   ; (but see "Use of encoded-words in message
200 |                   ; headers", section 5)
201 | 
202 |    Both 'encoding' and 'charset' names are case-independent.  Thus the
203 |    charset name "ISO-8859-1" is equivalent to "iso-8859-1", and the
204 |    encoding named "Q" may be spelled either "Q" or "q".
205 | 
206 |    An 'encoded-word' may not be more than 75 characters long, including
207 |    'charset', 'encoding', 'encoded-text', and delimiters.  If it is
208 |    desirable to encode more text than will fit in an 'encoded-word' of
209 |    75 characters, multiple 'encoded-word's (separated by CRLF SPACE) may
210 |    be used.
211 | 
212 |    While there is no limit to the length of a multiple-line header
213 |    field, each line of a header field that contains one or more
214 |    'encoded-word's is limited to 76 characters.
215 | 
216 |    The length restrictions are included both to ease interoperability
217 |    through internetwork mail gateways, and to impose a limit on the
218 |    amount of lookahead a header parser must employ (while looking for a
219 |    final ?= delimiter) before it can decide whether a token is an
220 |    "encoded-word" or something else.
221 | 
222 | 
223 | 
224 | 
225 | 
226 | Moore                       Standards Track                     [Page 4]
227 | 
228 | RFC 2047               Message Header Extensions           November 1996
229 | 
230 | 
231 |    IMPORTANT: 'encoded-word's are designed to be recognized as 'atom's
232 |    by an RFC 822 parser.  As a consequence, unencoded white space
233 |    characters (such as SPACE and HTAB) are FORBIDDEN within an
234 |    'encoded-word'.  For example, the character sequence
235 | 
236 |       =?iso-8859-1?q?this is some text?=
237 | 
238 |    would be parsed as four 'atom's, rather than as a single 'atom' (by
239 |    an RFC 822 parser) or 'encoded-word' (by a parser which understands
240 |    'encoded-words').  The correct way to encode the string "this is some
241 |    text" is to encode the SPACE characters as well, e.g.
242 | 
243 |       =?iso-8859-1?q?this=20is=20some=20text?=
244 | 
245 |    The characters which may appear in 'encoded-text' are further
246 |    restricted by the rules in section 5.
247 | 
248 | 3. Character sets
249 | 
250 |    The 'charset' portion of an 'encoded-word' specifies the character
251 |    set associated with the unencoded text.  A 'charset' can be any of
252 |    the character set names allowed in an MIME "charset" parameter of a
253 |    "text/plain" body part, or any character set name registered with
254 |    IANA for use with the MIME text/plain content-type.
255 | 
256 |    Some character sets use code-switching techniques to switch between
257 |    "ASCII mode" and other modes.  If unencoded text in an 'encoded-word'
258 |    contains a sequence which causes the charset interpreter to switch
259 |    out of ASCII mode, it MUST contain additional control codes such that
260 |    ASCII mode is again selected at the end of the 'encoded-word'.  (This
261 |    rule applies separately to each 'encoded-word', including adjacent
262 |    'encoded-word's within a single header field.)
263 | 
264 |    When there is a possibility of using more than one character set to
265 |    represent the text in an 'encoded-word', and in the absence of
266 |    private agreements between sender and recipients of a message, it is
267 |    recommended that members of the ISO-8859-* series be used in
268 |    preference to other character sets.
269 | 
270 | 4. Encodings
271 | 
272 |    Initially, the legal values for "encoding" are "Q" and "B".  These
273 |    encodings are described below.  The "Q" encoding is recommended for
274 |    use when most of the characters to be encoded are in the ASCII
275 |    character set; otherwise, the "B" encoding should be used.
276 |    Nevertheless, a mail reader which claims to recognize 'encoded-word's
277 |    MUST be able to accept either encoding for any character set which it
278 |    supports.
279 | 
280 | 
281 | 
282 | Moore                       Standards Track                     [Page 5]
283 | 
284 | RFC 2047               Message Header Extensions           November 1996
285 | 
286 | 
287 |    Only a subset of the printable ASCII characters may be used in
288 |    'encoded-text'.  Space and tab characters are not allowed, so that
289 |    the beginning and end of an 'encoded-word' are obvious.  The "?"
290 |    character is used within an 'encoded-word' to separate the various
291 |    portions of the 'encoded-word' from one another, and thus cannot
292 |    appear in the 'encoded-text' portion.  Other characters are also
293 |    illegal in certain contexts.  For example, an 'encoded-word' in a
294 |    'phrase' preceding an address in a From header field may not contain
295 |    any of the "specials" defined in RFC 822.  Finally, certain other
296 |    characters are disallowed in some contexts, to ensure reliability for
297 |    messages that pass through internetwork mail gateways.
298 | 
299 |    The "B" encoding automatically meets these requirements.  The "Q"
300 |    encoding allows a wide range of printable characters to be used in
301 |    non-critical locations in the message header (e.g., Subject), with
302 |    fewer characters available for use in other locations.
303 | 
304 | 4.1. The "B" encoding
305 | 
306 |    The "B" encoding is identical to the "BASE64" encoding defined by RFC
307 |    2045.
308 | 
309 | 4.2. The "Q" encoding
310 | 
311 |    The "Q" encoding is similar to the "Quoted-Printable" content-
312 |    transfer-encoding defined in RFC 2045.  It is designed to allow text
313 |    containing mostly ASCII characters to be decipherable on an ASCII
314 |    terminal without decoding.
315 | 
316 |    (1) Any 8-bit value may be represented by a "=" followed by two
317 |        hexadecimal digits.  For example, if the character set in use
318 |        were ISO-8859-1, the "=" character would thus be encoded as
319 |        "=3D", and a SPACE by "=20".  (Upper case should be used for
320 |        hexadecimal digits "A" through "F".)
321 | 
322 |    (2) The 8-bit hexadecimal value 20 (e.g., ISO-8859-1 SPACE) may be
323 |        represented as "_" (underscore, ASCII 95.).  (This character may
324 |        not pass through some internetwork mail gateways, but its use
325 |        will greatly enhance readability of "Q" encoded data with mail
326 |        readers that do not support this encoding.)  Note that the "_"
327 |        always represents hexadecimal 20, even if the SPACE character
328 |        occupies a different code position in the character set in use.
329 | 
330 |    (3) 8-bit values which correspond to printable ASCII characters other
331 |        than "=", "?", and "_" (underscore), MAY be represented as those
332 |        characters.  (But see section 5 for restrictions.)  In
333 |        particular, SPACE and TAB MUST NOT be represented as themselves
334 |        within encoded words.
335 | 
336 | 
337 | 
338 | Moore                       Standards Track                     [Page 6]
339 | 
340 | RFC 2047               Message Header Extensions           November 1996
341 | 
342 | 
343 | 5. Use of encoded-words in message headers
344 | 
345 |    An 'encoded-word' may appear in a message header or body part header
346 |    according to the following rules:
347 | 
348 | (1) An 'encoded-word' may replace a 'text' token (as defined by RFC 822)
349 |     in any Subject or Comments header field, any extension message
350 |     header field, or any MIME body part field for which the field body
351 |     is defined as '*text'.  An 'encoded-word' may also appear in any
352 |     user-defined ("X-") message or body part header field.
353 | 
354 |     Ordinary ASCII text and 'encoded-word's may appear together in the
355 |     same header field.  However, an 'encoded-word' that appears in a
356 |     header field defined as '*text' MUST be separated from any adjacent
357 |     'encoded-word' or 'text' by 'linear-white-space'.
358 | 
359 | (2) An 'encoded-word' may appear within a 'comment' delimited by "(" and
360 |     ")", i.e., wherever a 'ctext' is allowed.  More precisely, the RFC
361 |     822 ABNF definition for 'comment' is amended as follows:
362 | 
363 |     comment = "(" *(ctext / quoted-pair / comment / encoded-word) ")"
364 | 
365 |     A "Q"-encoded 'encoded-word' which appears in a 'comment' MUST NOT
366 |     contain the characters "(", ")" or "
367 |     'encoded-word' that appears in a 'comment' MUST be separated from
368 |     any adjacent 'encoded-word' or 'ctext' by 'linear-white-space'.
369 | 
370 |     It is important to note that 'comment's are only recognized inside
371 |     "structured" field bodies.  In fields whose bodies are defined as
372 |     '*text', "(" and ")" are treated as ordinary characters rather than
373 |     comment delimiters, and rule (1) of this section applies.  (See RFC
374 |     822, sections 3.1.2 and 3.1.3)
375 | 
376 | (3) As a replacement for a 'word' entity within a 'phrase', for example,
377 |     one that precedes an address in a From, To, or Cc header.  The ABNF
378 |     definition for 'phrase' from RFC 822 thus becomes:
379 | 
380 |     phrase = 1*( encoded-word / word )
381 | 
382 |     In this case the set of characters that may be used in a "Q"-encoded
383 |     'encoded-word' is restricted to: <upper and lower case ASCII
384 |     letters, decimal digits, "!", "*", "+", "-", "/", "=", and "_"
385 |     (underscore, ASCII 95.)>.  An 'encoded-word' that appears within a
386 |     'phrase' MUST be separated from any adjacent 'word', 'text' or
387 |     'special' by 'linear-white-space'.
388 | 
389 | 
390 | 
391 | 
392 | 
393 | 
394 | Moore                       Standards Track                     [Page 7]
395 | 
396 | RFC 2047               Message Header Extensions           November 1996
397 | 
398 | 
399 |    These are the ONLY locations where an 'encoded-word' may appear.  In
400 |    particular:
401 | 
402 |    + An 'encoded-word' MUST NOT appear in any portion of an 'addr-spec'.
403 | 
404 |    + An 'encoded-word' MUST NOT appear within a 'quoted-string'.
405 | 
406 |    + An 'encoded-word' MUST NOT be used in a Received header field.
407 | 
408 |    + An 'encoded-word' MUST NOT be used in parameter of a MIME
409 |      Content-Type or Content-Disposition field, or in any structured
410 |      field body except within a 'comment' or 'phrase'.
411 | 
412 |    The 'encoded-text' in an 'encoded-word' must be self-contained;
413 |    'encoded-text' MUST NOT be continued from one 'encoded-word' to
414 |    another.  This implies that the 'encoded-text' portion of a "B"
415 |    'encoded-word' will be a multiple of 4 characters long; for a "Q"
416 |    'encoded-word', any "=" character that appears in the 'encoded-text'
417 |    portion will be followed by two hexadecimal characters.
418 | 
419 |    Each 'encoded-word' MUST encode an integral number of octets.  The
420 |    'encoded-text' in each 'encoded-word' must be well-formed according
421 |    to the encoding specified; the 'encoded-text' may not be continued in
422 |    the next 'encoded-word'.  (For example, "=?charset?Q?=?=
423 |    =?charset?Q?AB?=" would be illegal, because the two hex digits "AB"
424 |    must follow the "=" in the same 'encoded-word'.)
425 | 
426 |    Each 'encoded-word' MUST represent an integral number of characters.
427 |    A multi-octet character may not be split across adjacent 'encoded-
428 |    word's.
429 | 
430 |    Only printable and white space character data should be encoded using
431 |    this scheme.  However, since these encoding schemes allow the
432 |    encoding of arbitrary octet values, mail readers that implement this
433 |    decoding should also ensure that display of the decoded data on the
434 |    recipient's terminal will not cause unwanted side-effects.
435 | 
436 |    Use of these methods to encode non-textual data (e.g., pictures or
437 |    sounds) is not defined by this memo.  Use of 'encoded-word's to
438 |    represent strings of purely ASCII characters is allowed, but
439 |    discouraged.  In rare cases it may be necessary to encode ordinary
440 |    text that looks like an 'encoded-word'.
441 | 
442 | 
443 | 
444 | 
445 | 
446 | 
447 | 
448 | 
449 | 
450 | Moore                       Standards Track                     [Page 8]
451 | 
452 | RFC 2047               Message Header Extensions           November 1996
453 | 
454 | 
455 | 6. Support of 'encoded-word's by mail readers
456 | 
457 | 6.1. Recognition of 'encoded-word's in message headers
458 | 
459 |    A mail reader must parse the message and body part headers according
460 |    to the rules in RFC 822 to correctly recognize 'encoded-word's.
461 | 
462 |    'encoded-word's are to be recognized as follows:
463 | 
464 |    (1) Any message or body part header field defined as '*text', or any
465 |        user-defined header field, should be parsed as follows: Beginning
466 |        at the start of the field-body and immediately following each
467 |        occurrence of 'linear-white-space', each sequence of up to 75
468 |        printable characters (not containing any 'linear-white-space')
469 |        should be examined to see if it is an 'encoded-word' according to
470 |        the syntax rules in section 2.  Any other sequence of printable
471 |        characters should be treated as ordinary ASCII text.
472 | 
473 |    (2) Any header field not defined as '*text' should be parsed
474 |        according to the syntax rules for that header field.  However,
475 |        any 'word' that appears within a 'phrase' should be treated as an
476 |        'encoded-word' if it meets the syntax rules in section 2.
477 |        Otherwise it should be treated as an ordinary 'word'.
478 | 
479 |    (3) Within a 'comment', any sequence of up to 75 printable characters
480 |        (not containing 'linear-white-space'), that meets the syntax
481 |        rules in section 2, should be treated as an 'encoded-word'.
482 |        Otherwise it should be treated as normal comment text.
483 | 
484 |    (4) A MIME-Version header field is NOT required to be present for
485 |        'encoded-word's to be interpreted according to this
486 |        specification.  One reason for this is that the mail reader is
487 |        not expected to parse the entire message header before displaying
488 |        lines that may contain 'encoded-word's.
489 | 
490 | 6.2. Display of 'encoded-word's
491 | 
492 |    Any 'encoded-word's so recognized are decoded, and if possible, the
493 |    resulting unencoded text is displayed in the original character set.
494 | 
495 |    NOTE: Decoding and display of encoded-words occurs *after* a
496 |    structured field body is parsed into tokens.  It is therefore
497 |    possible to hide 'special' characters in encoded-words which, when
498 |    displayed, will be indistinguishable from 'special' characters in the
499 |    surrounding text.  For this and other reasons, it is NOT generally
500 |    possible to translate a message header containing 'encoded-word's to
501 |    an unencoded form which can be parsed by an RFC 822 mail reader.
502 | 
503 | 
504 | 
505 | 
506 | Moore                       Standards Track                     [Page 9]
507 | 
508 | RFC 2047               Message Header Extensions           November 1996
509 | 
510 | 
511 |    When displaying a particular header field that contains multiple
512 |    'encoded-word's, any 'linear-white-space' that separates a pair of
513 |    adjacent 'encoded-word's is ignored.  (This is to allow the use of
514 |    multiple 'encoded-word's to represent long strings of unencoded text,
515 |    without having to separate 'encoded-word's where spaces occur in the
516 |    unencoded text.)
517 | 
518 |    In the event other encodings are defined in the future, and the mail
519 |    reader does not support the encoding used, it may either (a) display
520 |    the 'encoded-word' as ordinary text, or (b) substitute an appropriate
521 |    message indicating that the text could not be decoded.
522 | 
523 |    If the mail reader does not support the character set used, it may
524 |    (a) display the 'encoded-word' as ordinary text (i.e., as it appears
525 |    in the header), (b) make a "best effort" to display using such
526 |    characters as are available, or (c) substitute an appropriate message
527 |    indicating that the decoded text could not be displayed.
528 | 
529 |    If the character set being used employs code-switching techniques,
530 |    display of the encoded text implicitly begins in "ASCII mode".  In
531 |    addition, the mail reader must ensure that the output device is once
532 |    again in "ASCII mode" after the 'encoded-word' is displayed.
533 | 
534 | 6.3. Mail reader handling of incorrectly formed 'encoded-word's
535 | 
536 |    It is possible that an 'encoded-word' that is legal according to the
537 |    syntax defined in section 2, is incorrectly formed according to the
538 |    rules for the encoding being used.   For example:
539 | 
540 |    (1) An 'encoded-word' which contains characters which are not legal
541 |        for a particular encoding (for example, a "-" in the "B"
542 |        encoding, or a SPACE or HTAB in either the "B" or "Q" encoding),
543 |        is incorrectly formed.
544 | 
545 |    (2) Any 'encoded-word' which encodes a non-integral number of
546 |        characters or octets is incorrectly formed.
547 | 
548 |    A mail reader need not attempt to display the text associated with an
549 |    'encoded-word' that is incorrectly formed.  However, a mail reader
550 |    MUST NOT prevent the display or handling of a message because an
551 |    'encoded-word' is incorrectly formed.
552 | 
553 | 7. Conformance
554 | 
555 |    A mail composing program claiming compliance with this specification
556 |    MUST ensure that any string of non-white-space printable ASCII
557 |    characters within a '*text' or '*ctext' that begins with "=?" and
558 |    ends with "?=" be a valid 'encoded-word'.  ("begins" means: at the
559 | 
560 | 
561 | 
562 | Moore                       Standards Track                    [Page 10]
563 | 
564 | RFC 2047               Message Header Extensions           November 1996
565 | 
566 | 
567 |    start of the field-body, immediately following 'linear-white-space',
568 |    or immediately following a "(" for an 'encoded-word' within '*ctext';
569 |    "ends" means: at the end of the field-body, immediately preceding
570 |    'linear-white-space', or immediately preceding a ")" for an
571 |    'encoded-word' within '*ctext'.)  In addition, any 'word' within a
572 |    'phrase' that begins with "=?" and ends with "?=" must be a valid
573 |    'encoded-word'.
574 | 
575 |    A mail reading program claiming compliance with this specification
576 |    must be able to distinguish 'encoded-word's from 'text', 'ctext', or
577 |    'word's, according to the rules in section 6, anytime they appear in
578 |    appropriate places in message headers.  It must support both the "B"
579 |    and "Q" encodings for any character set which it supports.  The
580 |    program must be able to display the unencoded text if the character
581 |    set is "US-ASCII".  For the ISO-8859-* character sets, the mail
582 |    reading program must at least be able to display the characters which
583 |    are also in the ASCII set.
584 | 
585 | 8. Examples
586 | 
587 |    The following are examples of message headers containing 'encoded-
588 |    word's:
589 | 
590 |    From: =?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>
591 |    To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>
592 |    CC: =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>
593 |    Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
594 |     =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
595 | 
596 |       Note: In the first 'encoded-word' of the Subject field above, the
597 |       last "=" at the end of the 'encoded-text' is necessary because each
598 |       'encoded-word' must be self-contained (the "=" character completes a
599 |       group of 4 base64 characters representing 2 octets).  An additional
600 |       octet could have been encoded in the first 'encoded-word' (so that
601 |       the encoded-word would contain an exact multiple of 3 encoded
602 |       octets), except that the second 'encoded-word' uses a different
603 |       'charset' than the first one.
604 | 
605 |    From: =?ISO-8859-1?Q?Olle_J=E4rnefors?= <ojarnef@admin.kth.se>
606 |    To: ietf-822@dimacs.rutgers.edu, ojarnef@admin.kth.se
607 |    Subject: Time for ISO 10646?
608 | 
609 |    To: Dave Crocker <dcrocker@mordor.stanford.edu>
610 |    Cc: ietf-822@dimacs.rutgers.edu, paf@comsol.se
611 |    From: =?ISO-8859-1?Q?Patrik_F=E4ltstr=F6m?= <paf@nada.kth.se>
612 |    Subject: Re: RFC-HDR care and feeding
613 | 
614 | 
615 | 
616 | 
617 | 
618 | Moore                       Standards Track                    [Page 11]
619 | 
620 | RFC 2047               Message Header Extensions           November 1996
621 | 
622 | 
623 |    From: Nathaniel Borenstein <nsb@thumper.bellcore.com>
624 |          (=?iso-8859-8?b?7eXs+SDv4SDp7Oj08A==?=)
625 |    To: Greg Vaudreuil <gvaudre@NRI.Reston.VA.US>, Ned Freed
626 |       <ned@innosoft.com>, Keith Moore <moore@cs.utk.edu>
627 |    Subject: Test of new header generator
628 |    MIME-Version: 1.0
629 |    Content-type: text/plain; charset=ISO-8859-1
630 | 
631 |    The following examples illustrate how text containing 'encoded-word's
632 |    which appear in a structured field body.  The rules are slightly
633 |    different for fields defined as '*text' because "(" and ")" are not
634 |    recognized as 'comment' delimiters.  [Section 5, paragraph (1)].
635 | 
636 |    In each of the following examples, if the same sequence were to occur
637 |    in a '*text' field, the "displayed as" form would NOT be treated as
638 |    encoded words, but be identical to the "encoded form".  This is
639 |    because each of the encoded-words in the following examples is
640 |    adjacent to a "(" or ")" character.
641 | 
642 |    encoded form                                displayed as
643 |    ---------------------------------------------------------------------
644 |    (=?ISO-8859-1?Q?a?=)                        (a)
645 | 
646 |    (=?ISO-8859-1?Q?a?= b)                      (a b)
647 | 
648 |            Within a 'comment', white space MUST appear between an
649 |            'encoded-word' and surrounding text.  [Section 5,
650 |            paragraph (2)].  However, white space is not needed between
651 |            the initial "(" that begins the 'comment', and the
652 |            'encoded-word'.
653 | 
654 | 
655 |    (=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)     (ab)
656 | 
657 |            White space between adjacent 'encoded-word's is not
658 |            displayed.
659 | 
660 |    (=?ISO-8859-1?Q?a?=  =?ISO-8859-1?Q?b?=)    (ab)
661 | 
662 |         Even multiple SPACEs between 'encoded-word's are ignored
663 |         for the purpose of display.
664 | 
665 |    (=?ISO-8859-1?Q?a?=                         (ab)
666 |        =?ISO-8859-1?Q?b?=)
667 | 
668 |            Any amount of linear-space-white between 'encoded-word's,
669 |            even if it includes a CRLF followed by one or more SPACEs,
670 |            is ignored for the purposes of display.
671 | 
672 | 
673 | 
674 | Moore                       Standards Track                    [Page 12]
675 | 
676 | RFC 2047               Message Header Extensions           November 1996
677 | 
678 | 
679 |    (=?ISO-8859-1?Q?a_b?=)                      (a b)
680 | 
681 |            In order to cause a SPACE to be displayed within a portion
682 |            of encoded text, the SPACE MUST be encoded as part of the
683 |            'encoded-word'.
684 | 
685 |    (=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)    (a b)
686 | 
687 |            In order to cause a SPACE to be displayed between two strings
688 |            of encoded text, the SPACE MAY be encoded as part of one of
689 |            the 'encoded-word's.
690 | 
691 | 9. References
692 | 
693 |    [RFC 822] Crocker, D., "Standard for the Format of ARPA Internet Text
694 |        Messages", STD 11, RFC 822, UDEL, August 1982.
695 | 
696 |    [RFC 2049] Borenstein, N., and N. Freed, "Multipurpose Internet Mail
697 |        Extensions (MIME) Part Five: Conformance Criteria and Examples",
698 |        RFC 2049, November 1996.
699 | 
700 |    [RFC 2045] Borenstein, N., and N. Freed, "Multipurpose Internet Mail
701 |        Extensions (MIME) Part One: Format of Internet Message Bodies",
702 |        RFC 2045, November 1996.
703 | 
704 |    [RFC 2046] Borenstein N., and N. Freed, "Multipurpose Internet Mail
705 |        Extensions (MIME) Part Two: Media Types", RFC 2046,
706 |        November 1996.
707 | 
708 |    [RFC 2048] Freed, N., Klensin, J., and J. Postel, "Multipurpose
709 |        Internet Mail Extensions (MIME) Part Four: Registration
710 |        Procedures", RFC 2048, November 1996.
711 | 
712 | 
713 | 
714 | 
715 | 
716 | 
717 | 
718 | 
719 | 
720 | 
721 | 
722 | 
723 | 
724 | 
725 | 
726 | 
727 | 
728 | 
729 | 
730 | Moore                       Standards Track                    [Page 13]
731 | 
732 | RFC 2047               Message Header Extensions           November 1996
733 | 
734 | 
735 | 10. Security Considerations
736 | 
737 |    Security issues are not discussed in this memo.
738 | 
739 | 11. Acknowledgements
740 | 
741 |    The author wishes to thank Nathaniel Borenstein, Issac Chan, Lutz
742 |    Donnerhacke, Paul Eggert, Ned Freed, Andreas M. Kirchwitz, Olle
743 |    Jarnefors, Mike Rosin, Yutaka Sato, Bart Schaefer, and Kazuhiko
744 |    Yamamoto, for their helpful advice, insightful comments, and
745 |    illuminating questions in response to earlier versions of this
746 |    specification.
747 | 
748 | 12. Author's Address
749 | 
750 |    Keith Moore
751 |    University of Tennessee
752 |    107 Ayres Hall
753 |    Knoxville TN 37996-1301
754 | 
755 |    EMail: moore@cs.utk.edu
756 | 
757 | 
758 | 
759 | 
760 | 
761 | 
762 | 
763 | 
764 | 
765 | 
766 | 
767 | 
768 | 
769 | 
770 | 
771 | 
772 | 
773 | 
774 | 
775 | 
776 | 
777 | 
778 | 
779 | 
780 | 
781 | 
782 | 
783 | 
784 | 
785 | 
786 | Moore                       Standards Track                    [Page 14]
787 | 
788 | RFC 2047               Message Header Extensions           November 1996
789 | 
790 | 
791 | Appendix - changes since RFC 1522 (in no particular order)
792 | 
793 |    + explicitly state that the MIME-Version is not requried to use
794 |      'encoded-word's.
795 | 
796 |    + add explicit note that SPACEs and TABs are not allowed within
797 |      'encoded-word's, explaining that an 'encoded-word' must look like an
798 |      'atom' to an RFC822 parser.values, to be precise).
799 | 
800 |    + add examples from Olle Jarnefors (thanks!) which illustrate how
801 |      encoded-words with adjacent linear-white-space are displayed.
802 | 
803 |    + explicitly list terms defined in RFC822 and referenced in this memo
804 | 
805 |    + fix transcription typos that caused one or two lines and a couple of
806 |      characters to disappear in the resulting text, due to nroff quirks.
807 | 
808 |    + clarify that encoded-words are allowed in '*text' fields in both
809 |      RFC822 headers and MIME body part headers, but NOT as parameter
810 |      values.
811 | 
812 |    + clarify the requirement to switch back to ASCII within the encoded
813 |      portion of an 'encoded-word', for any charset that uses code switching
814 |      sequences.
815 | 
816 |    + add a note about 'encoded-word's being delimited by "(" and ")"
817 |      within a comment, but not in a *text (how bizarre!).
818 | 
819 |    + fix the Andre Pirard example to get rid of the trailing "_" after
820 |      the =E9.  (no longer needed post-1342).
821 | 
822 |    + clarification: an 'encoded-word' may appear immediately following
823 |      the initial "(" or immediately before the final ")" that delimits a
824 |      comment, not just adjacent to "(" and ")" *within* *ctext.
825 | 
826 |    + add a note to explain that a "B" 'encoded-word' will always have a
827 |      multiple of 4 characters in the 'encoded-text' portion.
828 | 
829 |    + add note about the "=" in the examples
830 | 
831 |    + note that processing of 'encoded-word's occurs *after* parsing, and
832 |      some of the implications thereof.
833 | 
834 |    + explicitly state that you can't expect to translate between
835 |      1522 and either vanilla 822 or so-called "8-bit headers".
836 | 
837 |    + explicitly state that 'encoded-word's are not valid within a
838 |      'quoted-string'.
839 | 
840 | 
841 | 
842 | Moore                       Standards Track                    [Page 15]
843 | 
844 | 


--------------------------------------------------------------------------------
/rfc/2048-registration-procedures.txt:
--------------------------------------------------------------------------------
   1 | 
   2 | 
   3 | 
   4 | 
   5 | 
   6 | 
   7 | Network Working Group                                           N. Freed
   8 | Request for Comments: 2048                                      Innosoft
   9 | BCP: 13                                                       J. Klensin
  10 | Obsoletes: 1521, 1522, 1590                                          MCI
  11 | Category: Best Current Practice                                J. Postel
  12 |                                                                      ISI
  13 |                                                            November 1996
  14 | 
  15 | 
  16 |                  Multipurpose Internet Mail Extensions
  17 |                            (MIME) Part Four:
  18 |                         Registration Procedures
  19 | 
  20 | Status of this Memo
  21 | 
  22 |    This document specifies an Internet Best Current Practices for the
  23 |    Internet Community, and requests discussion and suggestions for
  24 |    improvements.  Distribution of this memo is unlimited.
  25 | 
  26 | Abstract
  27 | 
  28 |    STD 11, RFC 822, defines a message representation protocol specifying
  29 |    considerable detail about US-ASCII message headers, and leaves the
  30 |    message content, or message body, as flat US-ASCII text.  This set of
  31 |    documents, collectively called the Multipurpose Internet Mail
  32 |    Extensions, or MIME, redefines the format of messages to allow for
  33 | 
  34 |     (1)   textual message bodies in character sets other than
  35 |           US-ASCII,
  36 | 
  37 |     (2)   an extensible set of different formats for non-textual
  38 |           message bodies,
  39 | 
  40 |     (3)   multi-part message bodies, and
  41 | 
  42 |     (4)   textual header information in character sets other than
  43 |           US-ASCII.
  44 | 
  45 |    These documents are based on earlier work documented in RFC 934, STD
  46 |    11, and RFC 1049, but extends and revises them.  Because RFC 822 said
  47 |    so little about message bodies, these documents are largely
  48 |    orthogonal to (rather than a revision of) RFC 822.
  49 | 
  50 | 
  51 | 
  52 | 
  53 | 
  54 | 
  55 | 
  56 | 
  57 | 
  58 | Freed, et. al.           Best Current Practice                  [Page 1]
  59 | 
  60 | RFC 2048              MIME Registration Procedures         November 1996
  61 | 
  62 | 
  63 |    This fourth document, RFC 2048, specifies various IANA registration
  64 |    procedures for the following MIME facilities:
  65 | 
  66 |     (1)   media types,
  67 | 
  68 |     (2)   external body access types,
  69 | 
  70 |     (3)   content-transfer-encodings.
  71 | 
  72 |    Registration of character sets for use in MIME is covered elsewhere
  73 |    and is no longer addressed by this document.
  74 | 
  75 |    These documents are revisions of RFCs 1521 and 1522, which themselves
  76 |    were revisions of RFCs 1341 and 1342.  An appendix in RFC 2049
  77 |    describes differences and changes from previous versions.
  78 | 
  79 | Table of Contents
  80 | 
  81 |    1. Introduction .........................................    3
  82 |    2. Media Type Registration ..............................    4
  83 |    2.1 Registration Trees and Subtype Names ................    4
  84 |    2.1.1 IETF Tree .........................................    4
  85 |    2.1.2 Vendor Tree .......................................    4
  86 |    2.1.3 Personal or Vanity Tree ...........................    5
  87 |    2.1.4 Special `x.' Tree .................................    5
  88 |    2.1.5 Additional Registration Trees .....................    6
  89 |    2.2 Registration Requirements ...........................    6
  90 |    2.2.1 Functionality Requirement .........................    6
  91 |    2.2.2 Naming Requirements ...............................    6
  92 |    2.2.3 Parameter Requirements ............................    7
  93 |    2.2.4 Canonicalization and Format Requirements ..........    7
  94 |    2.2.5 Interchange Recommendations .......................    8
  95 |    2.2.6 Security Requirements .............................    8
  96 |    2.2.7 Usage and Implementation Non-requirements .........    9
  97 |    2.2.8 Publication Requirements ..........................   10
  98 |    2.2.9 Additional Information ............................   10
  99 |    2.3 Registration Procedure ..............................   11
 100 |    2.3.1 Present the Media Type to the Community for  Review   11
 101 |    2.3.2 IESG Approval .....................................   12
 102 |    2.3.3 IANA Registration .................................   12
 103 |    2.4 Comments on Media Type Registrations ................   12
 104 |    2.5 Location of Registered Media Type List ..............   12
 105 |    2.6 IANA Procedures for Registering Media Types .........   12
 106 |    2.7 Change Control ......................................   13
 107 |    2.8 Registration Template ...............................   14
 108 |    3. External Body Access Types ...........................   14
 109 |    3.1 Registration Requirements ...........................   15
 110 |    3.1.1 Naming Requirements ...............................   15
 111 | 
 112 | 
 113 | 
 114 | Freed, et. al.           Best Current Practice                  [Page 2]
 115 | 
 116 | RFC 2048              MIME Registration Procedures         November 1996
 117 | 
 118 | 
 119 |    3.1.2 Mechanism Specification Requirements ..............   15
 120 |    3.1.3 Publication Requirements ..........................   15
 121 |    3.1.4 Security Requirements .............................   15
 122 |    3.2 Registration Procedure ..............................   15
 123 |    3.2.1 Present the Access Type to the Community ..........   16
 124 |    3.2.2 Access Type Reviewer ..............................   16
 125 |    3.2.3 IANA Registration .................................   16
 126 |    3.3 Location of Registered Access Type List .............   16
 127 |    3.4 IANA Procedures for Registering Access Types ........   16
 128 |    4. Transfer Encodings ...................................   17
 129 |    4.1 Transfer Encoding Requirements ......................   17
 130 |    4.1.1 Naming Requirements ...............................   17
 131 |    4.1.2 Algorithm Specification Requirements ..............   18
 132 |    4.1.3 Input Domain Requirements .........................   18
 133 |    4.1.4 Output Range Requirements .........................   18
 134 |    4.1.5 Data Integrity and Generality Requirements ........   18
 135 |    4.1.6 New Functionality Requirements ....................   18
 136 |    4.2 Transfer Encoding Definition Procedure ..............   19
 137 |    4.3 IANA Procedures for Transfer Encoding Registration...   19
 138 |    4.4 Location of Registered Transfer Encodings List ......   19
 139 |    5. Authors' Addresses ...................................   20
 140 |    A. Grandfathered Media Types ............................   21
 141 | 
 142 | 1.  Introduction
 143 | 
 144 |    Recent Internet protocols have been carefully designed to be easily
 145 |    extensible in certain areas.  In particular, MIME [RFC 2045] is an
 146 |    open-ended framework and can accommodate additional object types,
 147 |    character sets, and access methods without any changes to the basic
 148 |    protocol.  A registration process is needed, however, to ensure that
 149 |    the set of such values is developed in an orderly, well-specified,
 150 |    and public manner.
 151 | 
 152 |    This document defines registration procedures which use the Internet
 153 |    Assigned Numbers Authority (IANA) as a central registry for such
 154 |    values.
 155 | 
 156 |    Historical Note: The registration process for media types was
 157 |    initially defined in the context of the asynchronous Internet mail
 158 |    environment.  In this mail environment there is a need to limit the
 159 |    number of possible media types to increase the likelihood of
 160 |    interoperability when the capabilities of the remote mail system are
 161 |    not known.  As media types are used in new environments, where the
 162 |    proliferation of media types is not a hindrance to interoperability,
 163 |    the original procedure was excessively restrictive and had to be
 164 |    generalized.
 165 | 
 166 | 
 167 | 
 168 | 
 169 | 
 170 | Freed, et. al.           Best Current Practice                  [Page 3]
 171 | 
 172 | RFC 2048              MIME Registration Procedures         November 1996
 173 | 
 174 | 
 175 | 2.  Media Type Registration
 176 | 
 177 |    Registration of a new media type or types starts with the
 178 |    construction of a registration proposal.  Registration may occur in
 179 |    several different registration trees, which have different
 180 |    requirements as discussed below.  In general, the new registration
 181 |    proposal is circulated and reviewed in a fashion appropriate to the
 182 |    tree involved.  The media type is then registered if the proposal is
 183 |    acceptable.  The following sections describe the requirements and
 184 |    procedures used for each of the different registration trees.
 185 | 
 186 | 2.1.  Registration Trees and Subtype Names
 187 | 
 188 |    In order to increase the efficiency and flexibility of the
 189 |    registration process, different structures of subtype names may be
 190 |    registered to accomodate the different natural requirements for,
 191 |    e.g., a subtype that will be recommended for wide support and
 192 |    implementation by the Internet Community or a subtype that is used to
 193 |    move files associated with proprietary software.  The following
 194 |    subsections define registration "trees", distinguished by the use of
 195 |    faceted names (e.g., names of the form "tree.subtree...type").  Note
 196 |    that some media types defined prior to this document do not conform
 197 |    to the naming conventions described below.  See Appendix A for a
 198 |    discussion of them.
 199 | 
 200 | 2.1.1.  IETF Tree
 201 | 
 202 |    The IETF tree is intended for types of general interest to the
 203 |    Internet Community. Registration in the IETF tree requires approval
 204 |    by the IESG and publication of the media type registration as some
 205 |    form of RFC.
 206 | 
 207 |    Media types in the IETF tree are normally denoted by names that are
 208 |    not explicitly faceted, i.e., do not contain period (".", full stop)
 209 |    characters.
 210 | 
 211 |    The "owner" of a media type registration in the IETF tree is assumed
 212 |    to be the IETF itself.  Modification or alteration of the
 213 |    specification requires the same level of processing (e.g.  standards
 214 |    track) required for the initial registration.
 215 | 
 216 | 2.1.2.  Vendor Tree
 217 | 
 218 |    The vendor tree is used for media types associated with commercially
 219 |    available products.  "Vendor" or "producer" are construed as
 220 |    equivalent and very broadly in this context.
 221 | 
 222 | 
 223 | 
 224 | 
 225 | 
 226 | Freed, et. al.           Best Current Practice                  [Page 4]
 227 | 
 228 | RFC 2048              MIME Registration Procedures         November 1996
 229 | 
 230 | 
 231 |    A registration may be placed in the vendor tree by anyone who has
 232 |    need to interchange files associated with the particular product.
 233 |    However, the registration formally belongs to the vendor or
 234 |    organization producing the software or file format.  Changes to the
 235 |    specification will be made at their request, as discussed in
 236 |    subsequent sections.
 237 | 
 238 |    Registrations in the vendor tree will be distinguished by the leading
 239 |    facet "vnd.".  That may be followed, at the discretion of the
 240 |    registration, by either a media type name from a well-known producer
 241 |    (e.g., "vnd.mudpie") or by an IANA-approved designation of the
 242 |    producer's name which is then followed by a media type or product
 243 |    designation (e.g., vnd.bigcompany.funnypictures).
 244 | 
 245 |    While public exposure and review of media types to be registered in
 246 |    the vendor tree is not required, using the ietf-types list for review
 247 |    is strongly encouraged to improve the quality of those
 248 |    specifications. Registrations in the vendor tree may be submitted
 249 |    directly to the IANA.
 250 | 
 251 | 2.1.3.  Personal or Vanity Tree
 252 | 
 253 |    Registrations for media types created experimentally or as part of
 254 |    products that are not distributed commercially may be registered in
 255 |    the personal or vanity tree.  The registrations are distinguished by
 256 |    the leading facet "prs.".
 257 | 
 258 |    The owner of "personal" registrations and associated specifications
 259 |    is the person or entity making the registration, or one to whom
 260 |    responsibility has been transferred as described below.
 261 | 
 262 |    While public exposure and review of media types to be registered in
 263 |    the personal tree is not required, using the ietf-types list for
 264 |    review is strongly encouraged to improve the quality of those
 265 |    specifications.  Registrations in the personl tree may be submitted
 266 |    directly to the IANA.
 267 | 
 268 | 2.1.4.  Special `x.' Tree
 269 | 
 270 |    For convenience and symmetry with this registration scheme, media
 271 |    type names with "x." as the first facet may be used for the same
 272 |    purposes for which names starting in "x-" are normally used.  These
 273 |    types are unregistered, experimental, and should be used only with
 274 |    the active agreement of the parties exchanging them.
 275 | 
 276 | 
 277 | 
 278 | 
 279 | 
 280 | 
 281 | 
 282 | Freed, et. al.           Best Current Practice                  [Page 5]
 283 | 
 284 | RFC 2048              MIME Registration Procedures         November 1996
 285 | 
 286 | 
 287 |    However, with the simplified registration procedures described above
 288 |    for vendor and personal trees, it should rarely, if ever, be
 289 |    necessary to use unregistered experimental types, and as such use of
 290 |    both "x-" and "x." forms is discouraged.
 291 | 
 292 | 2.1.5.  Additional Registration Trees
 293 | 
 294 |    From time to time and as required by the community, the IANA may,
 295 |    with the advice and consent of the IESG, create new top-level
 296 |    registration trees.  It is explicitly assumed that these trees may be
 297 |    created for external registration and management by well-known
 298 |    permanent bodies, such as scientific societies for media types
 299 |    specific to the sciences they cover.  In general, the quality of
 300 |    review of specifications for one of these additional registration
 301 |    trees is expected to be equivalent to that which IETF would give to
 302 |    registrations in its own tree. Establishment of these new trees will
 303 |    be announced through RFC publication approved by the IESG.
 304 | 
 305 | 2.2.  Registration Requirements
 306 | 
 307 |    Media type registration proposals are all expected to conform to
 308 |    various requirements laid out in the following sections.  Note that
 309 |    requirement specifics sometimes vary depending on the registration
 310 |    tree, again as detailed in the following sections.
 311 | 
 312 | 2.2.1.  Functionality Requirement
 313 | 
 314 |    Media types must function as an actual media format: Registration of
 315 |    things that are better thought of as a transfer encoding, as a
 316 |    character set, or as a collection of separate entities of another
 317 |    type, is not allowed.  For example, although applications exist to
 318 |    decode the base64 transfer encoding [RFC 2045], base64 cannot be
 319 |    registered as a media type.
 320 | 
 321 |    This requirement applies regardless of the registration tree
 322 |    involved.
 323 | 
 324 | 2.2.2.  Naming Requirements
 325 | 
 326 |    All registered media types must be assigned MIME type and subtype
 327 |    names. The combination of these names then serves to uniquely
 328 |    identify the media type and the format of the subtype name identifies
 329 |    the registration tree.
 330 | 
 331 |    The choice of top-level type name must take the nature of media type
 332 |    involved into account. For example, media normally used for
 333 |    representing still images should be a subtype of the image content
 334 |    type, whereas media capable of representing audio information belongs
 335 | 
 336 | 
 337 | 
 338 | Freed, et. al.           Best Current Practice                  [Page 6]
 339 | 
 340 | RFC 2048              MIME Registration Procedures         November 1996
 341 | 
 342 | 
 343 |    under the audio content type. See RFC 2046 for additional information
 344 |    on the basic set of top-level types and their characteristics.
 345 | 
 346 |    New subtypes of top-level types must conform to the restrictions of
 347 |    the top-level type, if any. For example, all subtypes of the
 348 |    multipart content type must use the same encapsulation syntax.
 349 | 
 350 |    In some cases a new media type may not "fit" under any currently
 351 |    defined top-level content type. Such cases are expected to be quite
 352 |    rare. However, if such a case arises a new top-level type can be
 353 |    defined to accommodate it. Such a definition must be done via
 354 |    standards-track RFC; no other mechanism can be used to define
 355 |    additional top-level content types.
 356 | 
 357 |    These requirements apply regardless of the registration tree
 358 |    involved.
 359 | 
 360 | 2.2.3.  Parameter Requirements
 361 | 
 362 |    Media types may elect to use one or more MIME content type
 363 |    parameters, or some parameters may be automatically made available to
 364 |    the media type by virtue of being a subtype of a content type that
 365 |    defines a set of parameters applicable to any of its subtypes.  In
 366 |    either case, the names, values, and meanings of any parameters must
 367 |    be fully specified when a media type is registered in the IETF tree,
 368 |    and should be specified as completely as possible when media types
 369 |    are registered in the vendor or personal trees.
 370 | 
 371 |    New parameters must not be defined as a way to introduce new
 372 |    functionality in types registered in the IETF tree, although new
 373 |    parameters may be added to convey additional information that does
 374 |    not otherwise change existing functionality.  An example of this
 375 |    would be a "revision" parameter to indicate a revision level of an
 376 |    external specification such as JPEG.  Similar behavior is encouraged
 377 |    for media types registered in the vendor or personal trees but is not
 378 |    required.
 379 | 
 380 | 2.2.4.  Canonicalization and Format Requirements
 381 | 
 382 |    All registered media types must employ a single, canonical data
 383 |    format, regardless of registration tree.
 384 | 
 385 |    A precise and openly available specification of the format of each
 386 |    media type is required for all types registered in the IETF tree and
 387 |    must at a minimum be referenced by, if it isn't actually included in,
 388 |    the media type registration proposal itself.
 389 | 
 390 | 
 391 | 
 392 | 
 393 | 
 394 | Freed, et. al.           Best Current Practice                  [Page 7]
 395 | 
 396 | RFC 2048              MIME Registration Procedures         November 1996
 397 | 
 398 | 
 399 |    The specifications of format and processing particulars may or may
 400 |    not be publically available for media types registered in the vendor
 401 |    tree, and such registration proposals are explicitly permitted to
 402 |    include only a specification of which software and version produce or
 403 |    process such media types.  References to or inclusion of format
 404 |    specifications in registration proposals is encouraged but not
 405 |    required.
 406 | 
 407 |    Format specifications are still required for registration in the
 408 |    personal tree, but may be either published as RFCs or otherwise
 409 |    deposited with IANA. The deposited specifications will meet the same
 410 |    criteria as those required to register a well-known TCP port and, in
 411 |    particular, need not be made public.
 412 | 
 413 |    Some media types involve the use of patented technology.  The
 414 |    registration of media types involving patented technology is
 415 |    specifically permitted.  However, the restrictions set forth in RFC
 416 |    1602 on the use of patented technology in standards-track protocols
 417 |    must be respected when the specification of a media type is part of a
 418 |    standards-track protocol.
 419 | 
 420 | 2.2.5.  Interchange Recommendations
 421 | 
 422 |    Media types should, whenever possible, interoperate across as many
 423 |    systems and applications as possible. However, some media types will
 424 |    inevitably have problems interoperating across different platforms.
 425 |    Problems with different versions, byte ordering, and specifics of
 426 |    gateway handling can and will arise.
 427 | 
 428 |    Universal interoperability of media types is not required, but known
 429 |    interoperability issues should be identified whenever possible.
 430 |    Publication of a media type does not require an exhaustive review of
 431 |    interoperability, and the interoperability considerations section is
 432 |    subject to continuing evaluation.
 433 | 
 434 |    These recommendations apply regardless of the registration tree
 435 |    involved.
 436 | 
 437 | 2.2.6.  Security Requirements
 438 | 
 439 |    An analysis of security issues is required for for all types
 440 |    registered in the IETF Tree.  (This is in accordance with the basic
 441 |    requirements for all IETF protocols.) A similar analysis for media
 442 |    types registered in the vendor or personal trees is encouraged but
 443 |    not required.  However, regardless of what security analysis has or
 444 |    has not been done, all descriptions of security issues must be as
 445 |    accurate as possible regardless of registration tree.  In particular,
 446 |    a statement that there are "no security issues associated with this
 447 | 
 448 | 
 449 | 
 450 | Freed, et. al.           Best Current Practice                  [Page 8]
 451 | 
 452 | RFC 2048              MIME Registration Procedures         November 1996
 453 | 
 454 | 
 455 |    type" must not be confused with "the security issues associates with
 456 |    this type have not been assessed".
 457 | 
 458 |    There is absolutely no requirement that media types registered in any
 459 |    tree be secure or completely free from risks.  Nevertheless, all
 460 |    known security risks must be identified in the registration of a
 461 |    media type, again regardless of registration tree.
 462 | 
 463 |    The security considerations section of all registrations is subject
 464 |    to continuing evaluation and modification, and in particular may be
 465 |    extended by use of the "comments on media types" mechanism described
 466 |    in subsequent sections.
 467 | 
 468 |    Some of the issues that should be looked at in a security analysis of
 469 |    a media type are:
 470 | 
 471 |     (1)   Complex media types may include provisions for
 472 |           directives that institute actions on a recipient's
 473 |           files or other resources.  In many cases provision is
 474 |           made for originators to specify arbitrary actions in an
 475 |           unrestricted fashion which may then have devastating
 476 |           effects.  See the registration of the
 477 |           application/postscript media type in RFC 2046 for
 478 |           an example of such directives and how to handle them.
 479 | 
 480 |     (2)   Complex media types may include provisions for
 481 |           directives that institute actions which, while not
 482 |           directly harmful to the recipient, may result in
 483 |           disclosure of information that either facilitates a
 484 |           subsequent attack or else violates a recipient's
 485 |           privacy in some way.  Again, the registration of the
 486 |           application/postscript media type illustrates how such
 487 |           directives can be handled.
 488 | 
 489 |     (3)   A media type might be targeted for applications that
 490 |           require some sort of security assurance but not provide
 491 |           the necessary security mechanisms themselves. For
 492 |           example, a media type could be defined for storage of
 493 |           confidential medical information which in turn requires
 494 |           an external confidentiality service.
 495 | 
 496 | 2.2.7.  Usage and Implementation Non-requirements
 497 | 
 498 |    In the asynchronous mail environment, where information on the
 499 |    capabilities of the remote mail agent is frequently not available to
 500 |    the sender, maximum interoperability is attained by restricting the
 501 |    number of media types used to those "common" formats expected to be
 502 |    widely implemented.  This was asserted in the past as a reason to
 503 | 
 504 | 
 505 | 
 506 | Freed, et. al.           Best Current Practice                  [Page 9]
 507 | 
 508 | RFC 2048              MIME Registration Procedures         November 1996
 509 | 
 510 | 
 511 |    limit the number of possible media types and resulted in a
 512 |    registration process with a significant hurdle and delay for those
 513 |    registering media types.
 514 | 
 515 |    However, the need for "common" media types does not require limiting
 516 |    the registration of new media types. If a limited set of media types
 517 |    is recommended for a particular application, that should be asserted
 518 |    by a separate applicability statement specific for the application
 519 |    and/or environment.
 520 | 
 521 |    As such, universal support and implementation of a media type is NOT
 522 |    a requirement for registration.  If, however, a media type is
 523 |    explicitly intended for limited use, this should be noted in its
 524 |    registration.
 525 | 
 526 | 2.2.8.  Publication Requirements
 527 | 
 528 |    Proposals for media types registered in the IETF tree must be
 529 |    published as RFCs. RFC publication of vendor and personal media type
 530 |    proposals is encouraged but not required. In all cases IANA will
 531 |    retain copies of all media type proposals and "publish" them as part
 532 |    of the media types registration tree itself.
 533 | 
 534 |    Other than in the IETF tree, the registration of a data type does not
 535 |    imply endorsement, approval, or recommendation by IANA or IETF or
 536 |    even certification that the specification is adequate.  To become
 537 |    Internet Standards, protocol, data objects, or whatever must go
 538 |    through the IETF standards process.  This is too difficult and too
 539 |    lengthy a process for the convenient registration of media types.
 540 | 
 541 |    The IETF tree exists for media types that do require require a
 542 |    substantive review and approval process with the vendor and personal
 543 |    trees exist for those that do not. It is expected that applicability
 544 |    statements for particular applications will be published from time to
 545 |    time that recommend implementation of, and support for, media types
 546 |    that have proven particularly useful in those contexts.
 547 | 
 548 |    As discussed above, registration of a top-level type requires
 549 |    standards-track processing and, hence, RFC publication.
 550 | 
 551 | 2.2.9.  Additional Information
 552 | 
 553 |    Various sorts of optional information may be included in the
 554 |    specification of a media type if it is available:
 555 | 
 556 |     (1)   Magic number(s) (length, octet values). Magic numbers
 557 |           are byte sequences that are always present and thus can
 558 |           be used to identify entities as being of a given media
 559 | 
 560 | 
 561 | 
 562 | Freed, et. al.           Best Current Practice                 [Page 10]
 563 | 
 564 | RFC 2048              MIME Registration Procedures         November 1996
 565 | 
 566 | 
 567 |           type.
 568 | 
 569 |     (2)   File extension(s) commonly used on one or more
 570 |           platforms to indicate that some file containing a given
 571 |           type of media.
 572 | 
 573 |     (3)   Macintosh File Type code(s) (4 octets) used to label
 574 |           files containing a given type of media.
 575 | 
 576 |    Such information is often quite useful to implementors and if
 577 |    available should be provided.
 578 | 
 579 | 2.3.  Registration Procedure
 580 | 
 581 |    The following procedure has been implemented by the IANA for review
 582 |    and approval of new media types.  This is not a formal standards
 583 |    process, but rather an administrative procedure intended to allow
 584 |    community comment and sanity checking without excessive time delay.
 585 |    For registration in the IETF tree, the normal IETF processes should
 586 |    be followed, treating posting of an internet-draft and announcement
 587 |    on the ietf-types list (as described in the next subsection) as a
 588 |    first step.  For registrations in the vendor or personal tree, the
 589 |    initial review step described below may be omitted and the type
 590 |    registered directly by submitting the template and an explanation
 591 |    directly to IANA (at iana@iana.org).  However, authors of vendor or
 592 |    personal media type specifications are encouraged to seek community
 593 |    review and comment whenever that is feasible.
 594 | 
 595 | 2.3.1.  Present the Media Type to the Community for Review
 596 | 
 597 |    Send a proposed media type registration to the "ietf-types@iana.org"
 598 |    mailing list for a two week review period.  This mailing list has
 599 |    been established for the purpose of reviewing proposed media and
 600 |    access types. Proposed media types are not formally registered and
 601 |    must not be used; the "x-" prefix specified in RFC 2045 can be used
 602 |    until registration is complete.
 603 | 
 604 |    The intent of the public posting is to solicit comments and feedback
 605 |    on the choice of type/subtype name, the unambiguity of the references
 606 |    with respect to versions and external profiling information, and a
 607 |    review of any interoperability or security considerations. The
 608 |    submitter may submit a revised registration, or withdraw the
 609 |    registration completely, at any time.
 610 | 
 611 | 
 612 | 
 613 | 
 614 | 
 615 | 
 616 | 
 617 | 
 618 | Freed, et. al.           Best Current Practice                 [Page 11]
 619 | 
 620 | RFC 2048              MIME Registration Procedures         November 1996
 621 | 
 622 | 
 623 | 2.3.2.  IESG Approval
 624 | 
 625 |    Media types registered in the IETF tree must be submitted to the IESG
 626 |    for approval.
 627 | 
 628 | 2.3.3.  IANA Registration
 629 | 
 630 |    Provided that the media type meets the requirements for media types
 631 |    and has obtained approval that is necessary, the author may submit
 632 |    the registration request to the IANA, which will register the media
 633 |    type and make the media type registration available to the community.
 634 | 
 635 | 2.4.  Comments on Media Type Registrations
 636 | 
 637 |    Comments on registered media types may be submitted by members of the
 638 |    community to IANA.  These comments will be passed on to the "owner"
 639 |    of the media type if possible.  Submitters of comments may request
 640 |    that their comment be attached to the media type registration itself,
 641 |    and if IANA approves of this the comment will be made accessible in
 642 |    conjunction with the type registration itself.
 643 | 
 644 | 2.5.  Location of Registered Media Type List
 645 | 
 646 |    Media type registrations will be posted in the anonymous FTP
 647 |    directory "ftp://ftp.isi.edu/in-notes/iana/assignments/media-types/"
 648 |    and all registered media types will be listed in the periodically
 649 |    issued "Assigned Numbers" RFC [currently STD 2, RFC 1700].  The media
 650 |    type description and other supporting material may also be published
 651 |    as an Informational RFC by sending it to "rfc-editor@isi.edu" (please
 652 |    follow the instructions to RFC authors [RFC-1543]).
 653 | 
 654 | 2.6.  IANA Procedures for Registering Media Types
 655 | 
 656 |    The IANA will only register media types in the IETF tree in response
 657 |    to a communication from the IESG stating that a given registration
 658 |    has been approved. Vendor and personal types will be registered by
 659 |    the IANA automatically and without any formal review as long as the
 660 |    following minimal conditions are met:
 661 | 
 662 |     (1)   Media types must function as an actual media format.
 663 |           In particular, character sets and transfer encodings
 664 |           may not be registered as media types.
 665 | 
 666 |     (2)   All media types must have properly formed type and
 667 |           subtype names. All type names must be defined by a
 668 |           standards-track RFC. All subtype names must be unique,
 669 |           must conform to the MIME grammar for such names, and
 670 |           must contain the proper tree prefix.
 671 | 
 672 | 
 673 | 
 674 | Freed, et. al.           Best Current Practice                 [Page 12]
 675 | 
 676 | RFC 2048              MIME Registration Procedures         November 1996
 677 | 
 678 | 
 679 |     (3)   Types registered in the personal tree must either
 680 |           provide a format specification or a pointer to one.
 681 | 
 682 |     (4)   Any security considerations given must not be obviously
 683 |           bogus. (It is neither possible nor necessary for the
 684 |           IANA to conduct a comprehensive security review of
 685 |           media type registrations.  Nevertheless, IANA has the
 686 |           authority to identify obviously incompetent material
 687 |           and exclude it.)
 688 | 
 689 | 2.7.  Change Control
 690 | 
 691 |    Once a media type has been published by IANA, the author may request
 692 |    a change to its definition. The descriptions of the different
 693 |    registration trees above designate the "owners" of each type of
 694 |    registration. The change request follows the same procedure as the
 695 |    registration request:
 696 | 
 697 |     (1)   Publish the revised template on the ietf-types list.
 698 | 
 699 |     (2)   Leave at least two weeks for comments.
 700 | 
 701 |     (3)   Publish using IANA after formal review if required.
 702 | 
 703 |    Changes should be requested only when there are serious omission or
 704 |    errors in the published specification. When review is required, a
 705 |    change request may be denied if it renders entities that were valid
 706 |    under the previous definition invalid under the new definition.
 707 | 
 708 |    The owner of a content type may pass responsibility for the content
 709 |    type to another person or agency by informing IANA and the ietf-types
 710 |    list; this can be done without discussion or review.
 711 | 
 712 |    The IESG may reassign responsibility for a media type. The most
 713 |    common case of this will be to enable changes to be made to types
 714 |    where the author of the registration has died, moved out of contact
 715 |    or is otherwise unable to make changes that are important to the
 716 |    community.
 717 | 
 718 |    Media type registrations may not be deleted; media types which are no
 719 |    longer believed appropriate for use can be declared OBSOLETE by a
 720 |    change to their "intended use" field; such media types will be
 721 |    clearly marked in the lists published by IANA.
 722 | 
 723 | 
 724 | 
 725 | 
 726 | 
 727 | 
 728 | 
 729 | 
 730 | Freed, et. al.           Best Current Practice                 [Page 13]
 731 | 
 732 | RFC 2048              MIME Registration Procedures         November 1996
 733 | 
 734 | 
 735 | 2.8.  Registration Template
 736 | 
 737 |      To: ietf-types@iana.org
 738 |      Subject: Registration of MIME media type XXX/YYY
 739 | 
 740 |      MIME media type name:
 741 | 
 742 |      MIME subtype name:
 743 | 
 744 |      Required parameters:
 745 | 
 746 |      Optional parameters:
 747 | 
 748 |      Encoding considerations:
 749 | 
 750 |      Security considerations:
 751 | 
 752 |      Interoperability considerations:
 753 | 
 754 |      Published specification:
 755 | 
 756 |      Applications which use this media type:
 757 | 
 758 |      Additional information:
 759 | 
 760 |        Magic number(s):
 761 |        File extension(s):
 762 |        Macintosh File Type Code(s):
 763 | 
 764 |      Person & email address to contact for further information:
 765 | 
 766 |      Intended usage:
 767 | 
 768 |      (One of COMMON, LIMITED USE or OBSOLETE)
 769 | 
 770 |      Author/Change controller:
 771 | 
 772 |      (Any other information that the author deems interesting may be
 773 |      added below this line.)
 774 | 
 775 | 3.  External Body Access Types
 776 | 
 777 |    RFC 2046 defines the message/external-body media type, whereby a MIME
 778 |    entity can act as pointer to the actual body data in lieu of
 779 |    including the data directly in the entity body. Each
 780 |    message/external-body reference specifies an access type, which
 781 |    determines the mechanism used to retrieve the actual body data. RFC
 782 |    2046 defines an initial set of access types, but allows for the
 783 | 
 784 | 
 785 | 
 786 | Freed, et. al.           Best Current Practice                 [Page 14]
 787 | 
 788 | RFC 2048              MIME Registration Procedures         November 1996
 789 | 
 790 | 
 791 |    registration of additional access types to accommodate new retrieval
 792 |    mechanisms.
 793 | 
 794 | 3.1.  Registration Requirements
 795 | 
 796 |    New access type specifications must conform to a number of
 797 |    requirements as described below.
 798 | 
 799 | 3.1.1.  Naming Requirements
 800 | 
 801 |    Each access type must have a unique name.  This name appears in the
 802 |    access-type parameter in the message/external-body content-type
 803 |    header field, and must conform to MIME content type parameter syntax.
 804 | 
 805 | 3.1.2.  Mechanism Specification Requirements
 806 | 
 807 |    All of the protocols, transports, and procedures used by a given
 808 |    access type must be described, either in the specification of the
 809 |    access type itself or in some other publicly available specification,
 810 |    in sufficient detail for the access type to be implemented by any
 811 |    competent implementor.  Use of secret and/or proprietary methods in
 812 |    access types are expressly prohibited. The restrictions imposed by
 813 |    RFC 1602 on the standardization of patented algorithms must be
 814 |    respected as well.
 815 | 
 816 | 3.1.3.  Publication Requirements
 817 | 
 818 |    All access types must be described by an RFC. The RFC may be
 819 |    informational rather than standards-track, although standard-track
 820 |    review and approval are encouraged for all access types.
 821 | 
 822 | 3.1.4.  Security Requirements
 823 | 
 824 |    Any known security issues that arise from the use of the access type
 825 |    must be completely and fully described. It is not required that the
 826 |    access type be secure or that it be free from risks, but that the
 827 |    known risks be identified.  Publication of a new access type does not
 828 |    require an exhaustive security review, and the security
 829 |    considerations section is subject to continuing evaluation.
 830 |    Additional security considerations should be addressed by publishing
 831 |    revised versions of the access type specification.
 832 | 
 833 | 3.2.  Registration Procedure
 834 | 
 835 |    Registration of a new access type starts with the construction of a
 836 |    draft of an RFC.
 837 | 
 838 | 
 839 | 
 840 | 
 841 | 
 842 | Freed, et. al.           Best Current Practice                 [Page 15]
 843 | 
 844 | RFC 2048              MIME Registration Procedures         November 1996
 845 | 
 846 | 
 847 | 3.2.1.  Present the Access Type to the Community
 848 | 
 849 |    Send a proposed access type specification to the "ietf-
 850 |    types@iana.org" mailing list for a two week review period.  This
 851 |    mailing list has been established for the purpose of reviewing
 852 |    proposed access and media types.  Proposed access types are not
 853 |    formally registered and must not be used.
 854 | 
 855 |    The intent of the public posting is to solicit comments and feedback
 856 |    on the access type specification and a review of any security
 857 |    considerations.
 858 | 
 859 | 3.2.2.  Access Type Reviewer
 860 | 
 861 |    When the two week period has passed, the access type reviewer, who is
 862 |    appointed by the IETF Applications Area Director, either forwards the
 863 |    request to iana@isi.edu, or rejects it because of significant
 864 |    objections raised on the list.
 865 | 
 866 |    Decisions made by the reviewer must be posted to the ietf-types
 867 |    mailing list within 14 days. Decisions made by the reviewer may be
 868 |    appealed to the IESG.
 869 | 
 870 | 3.2.3.  IANA Registration
 871 | 
 872 |    Provided that the access type has either passed review or has been
 873 |    successfully appealed to the IESG, the IANA will register the access
 874 |    type and make the registration available to the community. The
 875 |    specification of the access type must also be published as an RFC.
 876 |    Informational RFCs are published by sending them to "rfc-
 877 |    editor@isi.edu" (please follow the instructions to RFC authors [RFC-
 878 |    1543]).
 879 | 
 880 | 3.3.  Location of Registered Access Type List
 881 | 
 882 |    Access type registrations will be posted in the anonymous FTP
 883 |    directory "ftp://ftp.isi.edu/in-notes/iana/assignments/access-types/"
 884 |    and all registered access types will be listed in the periodically
 885 |    issued "Assigned Numbers" RFC [currently RFC-1700].
 886 | 
 887 | 3.4.  IANA Procedures for Registering Access Types
 888 | 
 889 |    The identity of the access type reviewer is communicated to the IANA
 890 |    by the IESG.  The IANA then only acts in response to access type
 891 |    definitions that either are approved by the access type reviewer and
 892 |    forwarded by the reviewer to the IANA for registration, or in
 893 |    response to a communication from the IESG that an access type
 894 |    definition appeal has overturned the access type reviewer's ruling.
 895 | 
 896 | 
 897 | 
 898 | Freed, et. al.           Best Current Practice                 [Page 16]
 899 | 
 900 | RFC 2048              MIME Registration Procedures         November 1996
 901 | 
 902 | 
 903 | 4.  Transfer Encodings
 904 | 
 905 |    Transfer encodings are tranformations applied to MIME media types
 906 |    after conversion to the media type's canonical form.  Transfer
 907 |    encodings are used for several purposes:
 908 | 
 909 |     (1)   Many transports, especially message transports, can
 910 |           only handle data consisting of relatively short lines
 911 |           of text. There can also be severe restrictions on what
 912 |           characters can be used in these lines of text -- some
 913 |           transports are restricted to a small subset of US-ASCII
 914 |           and others cannot handle certain character sequences.
 915 |           Transfer encodings are used to transform binary data
 916 |           into textual form that can survive such transports.
 917 |           Examples of this sort of transfer encoding include the
 918 |           base64 and quoted-printable transfer encodings defined
 919 |           in RFC 2045.
 920 | 
 921 |     (2)   Image, audio, video, and even application entities are
 922 |           sometimes quite large. Compression algorithms are often
 923 |           quite effective in reducing the size of large entities.
 924 |           Transfer encodings can be used to apply general-purpose
 925 |           non-lossy compression algorithms to MIME entities.
 926 | 
 927 |     (3)   Transport encodings can be defined as a means of
 928 |           representing existing encoding formats in a MIME
 929 |           context.
 930 | 
 931 |    IMPORTANT:  The standardization of a large numbers of different
 932 |    transfer encodings is seen as a significant barrier to widespread
 933 |    interoperability and is expressely discouraged.  Nevertheless, the
 934 |    following procedure has been defined to provide a means of defining
 935 |    additional transfer encodings, should standardization actually be
 936 |    justified.
 937 | 
 938 | 4.1.  Transfer Encoding Requirements
 939 | 
 940 |    Transfer encoding specifications must conform to a number of
 941 |    requirements as described below.
 942 | 
 943 | 4.1.1.  Naming Requirements
 944 | 
 945 |    Each transfer encoding must have a unique name.  This name appears in
 946 |    the Content-Transfer-Encoding header field and must conform to the
 947 |    syntax of that field.
 948 | 
 949 | 
 950 | 
 951 | 
 952 | 
 953 | 
 954 | Freed, et. al.           Best Current Practice                 [Page 17]
 955 | 
 956 | RFC 2048              MIME Registration Procedures         November 1996
 957 | 
 958 | 
 959 | 4.1.2.  Algorithm Specification Requirements
 960 | 
 961 |    All of the algorithms used in a transfer encoding (e.g.  conversion
 962 |    to printable form, compression) must be described in their entirety
 963 |    in the transfer encoding specification.  Use of secret and/or
 964 |    proprietary algorithms in standardized transfer encodings are
 965 |    expressly prohibited. The restrictions imposed by RFC 1602 on the
 966 |    standardization of patented algorithms must be respected as well.
 967 | 
 968 | 4.1.3.  Input Domain Requirements
 969 | 
 970 |    All transfer encodings must be applicable to an arbitrary sequence of
 971 |    octets of any length.  Dependence on particular input forms is not
 972 |    allowed.
 973 | 
 974 |    It should be noted that the 7bit and 8bit encodings do not conform to
 975 |    this requirement. Aside from the undesireability of having
 976 |    specialized encodings, the intent here is to forbid the addition of
 977 |    additional encodings along the lines of 7bit and 8bit.
 978 | 
 979 | 4.1.4.  Output Range Requirements
 980 | 
 981 |    There is no requirement that a particular tranfer encoding produce a
 982 |    particular form of encoded output.  However, the output format for
 983 |    each transfer encoding must be fully and completely documented.  In
 984 |    particular, each specification must clearly state whether the output
 985 |    format always lies within the confines of 7bit data, 8bit data, or is
 986 |    simply pure binary data.
 987 | 
 988 | 4.1.5.  Data Integrity and Generality Requirements
 989 | 
 990 |    All transfer encodings must be fully invertible on any platform; it
 991 |    must be possible for anyone to recover the original data by
 992 |    performing the corresponding decoding operation.  Note that this
 993 |    requirement effectively excludes all forms of lossy compression as
 994 |    well as all forms of encryption from use as a transfer encoding.
 995 | 
 996 | 4.1.6.  New Functionality Requirements
 997 | 
 998 |    All transfer encodings must provide some sort of new functionality.
 999 |    Some degree of functionality overlap with previously defined transfer
1000 |    encodings is acceptable, but any new transfer encoding must also
1001 |    offer something no other transfer encoding provides.
1002 | 
1003 | 
1004 | 
1005 | 
1006 | 
1007 | 
1008 | 
1009 | 
1010 | Freed, et. al.           Best Current Practice                 [Page 18]
1011 | 
1012 | RFC 2048              MIME Registration Procedures         November 1996
1013 | 
1014 | 
1015 | 4.2.  Transfer Encoding Definition Procedure
1016 | 
1017 |    Definition of a new transfer encoding starts with the construction of
1018 |    a draft of a standards-track RFC.  The RFC must define the transfer
1019 |    encoding precisely and completely, and must also provide substantial
1020 |    justification for defining and standardizing a new transfer encoding.
1021 |    This specification must then be presented to the IESG for
1022 |    consideration.  The IESG can
1023 | 
1024 |     (1)   reject the specification outright as being
1025 |           inappropriate for standardization,
1026 | 
1027 |     (2)   approve the formation of an IETF working group to work
1028 |           on the specification in accordance with IETF
1029 |           procedures, or,
1030 | 
1031 |     (3)   accept the specification as-is and put it directly on
1032 |           the standards track.
1033 | 
1034 |    Transfer encoding specifications on the standards track follow normal
1035 |    IETF rules for standards track documents.  A transfer encoding is
1036 |    considered to be defined and available for use once it is on the
1037 |    standards track.
1038 | 
1039 | 4.3.  IANA Procedures for Transfer Encoding Registration
1040 | 
1041 |    There is no need for a special procedure for registering Transfer
1042 |    Encodings with the IANA. All legitimate transfer encoding
1043 |    registrations must appear as a standards-track RFC, so it is the
1044 |    IESG's responsibility to notify the IANA when a new transfer encoding
1045 |    has been approved.
1046 | 
1047 | 4.4.  Location of Registered Transfer Encodings List
1048 | 
1049 |    Transfer encoding registrations will be posted in the anonymous FTP
1050 |    directory "ftp://ftp.isi.edu/in-notes/iana/assignments/transfer-
1051 |    encodings/" and all registered transfer encodings will be listed in
1052 |    the periodically issued "Assigned Numbers" RFC [currently RFC-1700].
1053 | 
1054 | 
1055 | 
1056 | 
1057 | 
1058 | 
1059 | 
1060 | 
1061 | 
1062 | 
1063 | 
1064 | 
1065 | 
1066 | Freed, et. al.           Best Current Practice                 [Page 19]
1067 | 
1068 | RFC 2048              MIME Registration Procedures         November 1996
1069 | 
1070 | 
1071 | 5.  Authors' Addresses
1072 | 
1073 |    For more information, the authors of this document are best
1074 |    contacted via Internet mail:
1075 | 
1076 |    Ned Freed
1077 |    Innosoft International, Inc.
1078 |    1050 East Garvey Avenue South
1079 |    West Covina, CA 91790
1080 |    USA
1081 | 
1082 |    Phone: +1 818 919 3600
1083 |    Fax:   +1 818 919 3614
1084 |    EMail: ned@innosoft.com
1085 | 
1086 | 
1087 |    John Klensin
1088 |    MCI
1089 |    2100 Reston Parkway
1090 |    Reston, VA 22091
1091 | 
1092 |    Phone: +1 703 715-7361
1093 |    Fax:   +1 703 715-7436
1094 |    EMail: klensin@mci.net
1095 | 
1096 | 
1097 |    Jon Postel
1098 |    USC/Information Sciences Institute
1099 |    4676 Admiralty Way
1100 |    Marina del Rey, CA  90292
1101 |    USA
1102 | 
1103 | 
1104 |    Phone: +1 310 822 1511
1105 |    Fax:   +1 310 823 6714
1106 |    EMail: Postel@ISI.EDU
1107 | 
1108 | 
1109 | 
1110 | 
1111 | 
1112 | 
1113 | 
1114 | 
1115 | 
1116 | 
1117 | 
1118 | 
1119 | 
1120 | 
1121 | 
1122 | Freed, et. al.           Best Current Practice                 [Page 20]
1123 | 
1124 | RFC 2048              MIME Registration Procedures         November 1996
1125 | 
1126 | 
1127 | Appendix A -- Grandfathered Media Types
1128 | 
1129 |    A number of media types, registered prior to 1996, would, if
1130 |    registered under the guidelines in this document, be placed into
1131 |    either the vendor or personal trees.  Reregistration of those types
1132 |    to reflect the appropriate trees is encouraged, but not required.
1133 |    Ownership and change control principles outlined in this document
1134 |    apply to those types as if they had been registered in the trees
1135 |    described above.
1136 | 
1137 | 
1138 | 
1139 | 
1140 | 
1141 | 
1142 | 
1143 | 
1144 | 
1145 | 
1146 | 
1147 | 
1148 | 
1149 | 
1150 | 
1151 | 
1152 | 
1153 | 
1154 | 
1155 | 
1156 | 
1157 | 
1158 | 
1159 | 
1160 | 
1161 | 
1162 | 
1163 | 
1164 | 
1165 | 
1166 | 
1167 | 
1168 | 
1169 | 
1170 | 
1171 | 
1172 | 
1173 | 
1174 | 
1175 | 
1176 | 
1177 | 
1178 | 
1179 | Freed, et. al.           Best Current Practice                 [Page 21]
1180 | 
1181 | 


--------------------------------------------------------------------------------
/rfc/2387-mime-multipart-content-type.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | 
  5 | 
  6 | 
  7 | Network Working Group                                       E. Levinson
  8 | Request for Comments: 2387                                  August 1998
  9 | Obsoletes: 2112
 10 | Category: Standards Track
 11 | 
 12 | 
 13 |                 The MIME Multipart/Related Content-type
 14 | 
 15 | Status of this Memo
 16 | 
 17 |    This document specifies an Internet standards track protocol for the
 18 |    Internet community, and requests discussion and suggestions for
 19 |    improvements.  Please refer to the current edition of the "Internet
 20 |    Official Protocol Standards" (STD 1) for the standardization state
 21 |    and status of this protocol.  Distribution of this memo is unlimited.
 22 | 
 23 | Copyright Notice
 24 | 
 25 |    Copyright (C) The Internet Society (1998).  All Rights Reserved.
 26 | 
 27 | Abstract
 28 | 
 29 |    The Multipart/Related content-type provides a common mechanism for
 30 |    representing objects that are aggregates of related MIME body parts.
 31 |    This document defines the Multipart/Related content-type and provides
 32 |    examples of its use.
 33 | 
 34 | 1.  Introduction
 35 | 
 36 |    Several applications of MIME, including MIME-PEM, and MIME-Macintosh
 37 |    and other proposals, require multiple body parts that make sense only
 38 |    in the aggregate.  The present approach to these compound objects has
 39 |    been to define specific multipart subtypes for each new object.  In
 40 |    keeping with the MIME philosophy of having one mechanism to achieve
 41 |    the same goal for different purposes, this document describes a
 42 |    single mechanism for such aggregate or compound objects.
 43 | 
 44 |    The Multipart/Related content-type addresses the MIME representation
 45 |    of compound objects.  The object is categorized by a "type"
 46 |    parameter.  Additional parameters are provided to indicate a specific
 47 |    starting body part or root and auxiliary information which may be
 48 |    required when unpacking or processing the object.
 49 | 
 50 |    Multipart/Related MIME entities may contain Content-Disposition
 51 |    headers that provide suggestions for the storage and display of a
 52 |    body part.  Multipart/Related processing takes precedence over
 53 |    Content-Disposition; the interaction between them is discussed in
 54 |    section 4.
 55 | 
 56 | 
 57 | 
 58 | Levinson                    Standards Track                     [Page 1]
 59 | 
 60 | RFC 2387                   Multipart/Related                 August 1998
 61 | 
 62 | 
 63 |    Responsibility for the display or processing of a Multipart/Related's
 64 |    constituent entities rests with the application that handles the
 65 |    compound object.
 66 | 
 67 | 2.  Multipart/Related Registration Information
 68 | 
 69 |    The following form is copied from RFC 1590, Appendix A.
 70 | 
 71 |      To:  IANA@isi.edu
 72 |      Subject:  Registration of new Media Type content-type/subtype
 73 | 
 74 |      Media Type name:           Multipart
 75 | 
 76 |      Media subtype name:        Related
 77 | 
 78 |      Required parameters:       Type, a media type/subtype.
 79 | 
 80 |      Optional parameters:       Start
 81 |                                 Start-info
 82 | 
 83 |      Encoding considerations:   Multipart content-types cannot have
 84 |                                 encodings.
 85 | 
 86 |      Security considerations:   Depends solely on the referenced type.
 87 | 
 88 |      Published specification:   RFC-REL (this document).
 89 | 
 90 |      Person & email address to contact for further information:
 91 |                                 Edward Levinson
 92 |                                 47 Clive Street
 93 |                                 Metuchen, NJ  08840-1060
 94 |                                 +1 908 494 1606
 95 |                                 XIson@cnj.digex.net
 96 | 
 97 | 3.  Intended usage
 98 | 
 99 |    The Multipart/Related media type is intended for compound objects
100 |    consisting of several inter-related body parts.  For a
101 |    Multipart/Related object, proper display cannot be achieved by
102 |    individually displaying the constituent body parts.  The content-type
103 |    of the Multipart/Related object is specified by the type parameter.
104 |    The "start" parameter, if given, points, via a content-ID, to the
105 |    body part that contains the object root.  The default root is the
106 |    first body part within the Multipart/Related body.
107 | 
108 |    The relationships among the body parts of a compound object
109 |    distinguishes it from other object types.  These relationships are
110 |    often represented by links internal to the object's components that
111 | 
112 | 
113 | 
114 | Levinson                    Standards Track                     [Page 2]
115 | 
116 | RFC 2387                   Multipart/Related                 August 1998
117 | 
118 | 
119 |    reference the other components.  Within a single operating
120 |    environment the links are often file names, such links may be
121 |    represented within a MIME message using content-IDs or the value of
122 |    some other "Content-" headers.
123 | 
124 | 3.1.  The Type Parameter
125 | 
126 |    The type parameter must be specified and its value is the MIME media
127 |    type of the "root" body part.  It permits a MIME user agent to
128 |    determine the content-type without reference to the enclosed body
129 |    part.  If the value of the type parameter and the root body part's
130 |    content-type differ then the User Agent's behavior is undefined.
131 | 
132 | 3.2.  The Start Parameter
133 | 
134 |    The start parameter, if given, is the content-ID of the compound
135 |    object's "root".  If not present the "root" is the first body part in
136 |    the Multipart/Related entity.  The "root" is the element the
137 |    applications processes first.
138 | 
139 | 3.3.  The Start-Info Parameter
140 | 
141 |    Additional information can be provided to an application by the
142 |    start-info parameter.  It contains either a string or points, via a
143 |    content-ID, to another MIME entity in the message.  A typical use
144 |    might be to provide additional command line parameters or a MIME
145 |    entity giving auxiliary information for processing the compound
146 |    object.
147 | 
148 |    Applications that use Multipart/Related must specify the
149 |    interpretation of start-info.  User Agents shall provide the
150 |    parameter's value to the processing application.  Processes can
151 |    distinguish a start-info reference from a token or quoted-string by
152 |    examining the first non-white-space character, "<" indicates a
153 |    reference.
154 | 
155 | 3.4.  Syntax
156 | 
157 |      related-param   := [ ";" "start" "=" cid ]
158 |                         [ ";" "start-info"  "="
159 |                            ( cid-list / value ) ]
160 |                         [ ";" "type"  "=" type "/" subtype ]
161 |                         ; order independent
162 | 
163 |      cid-list        := cid cid-list
164 | 
165 |      cid             := msg-id     ; c.f. [822]
166 | 
167 | 
168 | 
169 | 
170 | Levinson                    Standards Track                     [Page 3]
171 | 
172 | RFC 2387                   Multipart/Related                 August 1998
173 | 
174 | 
175 |      value           := token / quoted-string    ; c.f. [MIME]
176 |                            ; value cannot begin with "<"
177 | 
178 |    Note that the parameter values will usually require quoting.  Msg-id
179 |    contains the special characters "<", ">", "@", and perhaps other
180 |    special characters.  If msg-id contains quoted-strings, those quote
181 |    marks must be escaped.  Similarly, the type parameter contains the
182 |    special character "/".
183 | 
184 | 4.  Handling Content-Disposition Headers
185 | 
186 |    Content-Disposition Headers [DISP] suggest presentation styles for
187 |    MIME body parts.  [DISP] describes two presentation styles, called
188 |    the disposition type, INLINE and ATTACHMENT.  These, used within a
189 |    multipart entity, allow the sender to suggest presentation
190 |    information.  [DISP] also provides for an optional storage (file)
191 |    name.  Content-Disposition headers could appear in one or more body
192 |    parts contained within a Multipart/Related entity.
193 | 
194 |    Using Content-Disposition headers in addition to Multipart/Related
195 |    provides presentation information to User Agents that do not
196 |    recognize Multipart/Related.  They will treat the multipart as
197 |    Multipart/Mixed and they may find the Content-Disposition information
198 |    useful.
199 | 
200 |    With Multipart/Related however, the application processing the
201 |    compound object determines the presentation style for all the
202 |    contained parts.  In that context the Content-Disposition header
203 |    information is redundant or even misleading.  Hence, User Agents that
204 |    understand Multipart/Related shall ignore the disposition type within
205 |    a Multipart/Related body part.
206 | 
207 |    It may be possible for a User Agent capable of handling both
208 |    Multipart/Related and Content-Disposition headers to provide the
209 |    invoked application the Content-Disposition header's optional
210 |    filename parameter to the Multipart/Related.  The use of that
211 |    information will depend on the specific application and should be
212 |    specified when describing the handling of the corresponding compound
213 |    object.  Such descriptions would be appropriate in an RFC registering
214 |    that object's media type.
215 | 
216 | 5.  Examples
217 | 
218 | 5.1 Application/X-FixedRecord
219 | 
220 |    The X-FixedRecord content-type consists of one or more octet-streams
221 |    and a list of the lengths of each record.  The root, which lists the
222 |    record lengths of each record within the streams.  The record length
223 | 
224 | 
225 | 
226 | Levinson                    Standards Track                     [Page 4]
227 | 
228 | RFC 2387                   Multipart/Related                 August 1998
229 | 
230 | 
231 |    list, type Application/X-FixedRecord, consists of a set of INTEGERs
232 |    in ASCII format, one per line.  Each INTEGER gives the number of
233 |    octets from the octet-stream body part that constitute the next
234 |    "record".
235 | 
236 |    The example below, uses a single data block.
237 | 
238 |      Content-Type: Multipart/Related; boundary=example-1
239 |              start="<950120.aaCC@XIson.com>";
240 |              type="Application/X-FixedRecord"
241 |              start-info="-o ps"
242 | 
243 |      --example-1
244 |      Content-Type: Application/X-FixedRecord
245 |      Content-ID: <950120.aaCC@XIson.com>
246 | 
247 |      25
248 |      10
249 |      34
250 |      10
251 |      25
252 |      21
253 |      26
254 |      10
255 |      --example-1
256 |      Content-Type: Application/octet-stream
257 |      Content-Description: The fixed length records
258 |      Content-Transfer-Encoding: base64
259 |      Content-ID: <950120.aaCB@XIson.com>
260 | 
261 |      T2xkIE1hY0RvbmFsZCBoYWQgYSBmYXJtCkUgSS
262 |      BFIEkgTwpBbmQgb24gaGlzIGZhcm0gaGUgaGFk
263 |      IHNvbWUgZHVja3MKRSBJIEUgSSBPCldpdGggYS
264 |      BxdWFjayBxdWFjayBoZXJlLAphIHF1YWNrIHF1
265 |      YWNrIHRoZXJlLApldmVyeSB3aGVyZSBhIHF1YW
266 |      NrIHF1YWNrCkUgSSBFIEkgTwo=
267 | 
268 |      --example-1--
269 | 
270 | 
271 | 
272 | 
273 | 
274 | 
275 | 
276 | 
277 | 
278 | 
279 | 
280 | 
281 | 
282 | Levinson                    Standards Track                     [Page 5]
283 | 
284 | RFC 2387                   Multipart/Related                 August 1998
285 | 
286 | 
287 | 5.2 Text/X-Okie
288 | 
289 |    The Text/X-Okie is an invented markup language permitting the
290 |    inclusion of images with text.  A feature of this example is the
291 |    inclusion of two additional body parts, both picture. They are
292 |    referred to internally by the encapsulated document via each
293 |    picture's body part content-ID.  Usage of "cid:", as in this example,
294 |    may be useful for a variety of compound objects.  It is not, however,
295 |    a part of the Multipart/Related specification.
296 | 
297 |      Content-Type: Multipart/Related; boundary=example-2;
298 |              start="<950118.AEBH@XIson.com>"
299 |              type="Text/x-Okie"
300 | 
301 |      --example-2
302 |      Content-Type: Text/x-Okie; charset=iso-8859-1;
303 |              declaration="<950118.AEB0@XIson.com>"
304 |      Content-ID: <950118.AEBH@XIson.com>
305 |      Content-Description: Document
306 | 
307 |      {doc}
308 |      This picture was taken by an automatic camera mounted ...
309 |      {image file=cid:950118.AECB@XIson.com}
310 |      {para}
311 |      Now this is an enlargement of the area ...
312 |      {image file=cid:950118:AFDH@XIson.com}
313 |      {/doc}
314 |      --example-2
315 |      Content-Type: image/jpeg
316 |      Content-ID: <950118.AFDH@XIson.com>
317 |      Content-Transfer-Encoding: BASE64
318 |      Content-Description: Picture A
319 | 
320 |      [encoded jpeg image]
321 |      --example-2
322 |      Content-Type: image/jpeg
323 |      Content-ID: <950118.AECB@XIson.com>
324 |      Content-Transfer-Encoding: BASE64
325 |      Content-Description: Picture B
326 | 
327 |      [encoded jpeg image]
328 |      --example-2--
329 | 
330 | 5.3 Content-Disposition
331 | 
332 |    In the above example each image body part could also have a Content-
333 |    Disposition header.  For example,
334 | 
335 | 
336 | 
337 | 
338 | Levinson                    Standards Track                     [Page 6]
339 | 
340 | RFC 2387                   Multipart/Related                 August 1998
341 | 
342 | 
343 |      --example-2
344 |      Content-Type: image/jpeg
345 |      Content-ID: <950118.AECB@XIson.com>
346 |      Content-Transfer-Encoding: BASE64
347 |      Content-Description: Picture B
348 |      Content-Disposition: INLINE
349 | 
350 |      [encoded jpeg image]
351 |      --example-2--
352 | 
353 |    User Agents that recognize Multipart/Related will ignore the
354 |    Content-Disposition header's disposition type.  Other User Agents
355 |    will process the Multipart/Related as Multipart/Mixed and may make
356 |    use of that header's information.
357 | 
358 | 6.  User Agent Requirements
359 | 
360 |    User agents that do not recognize Multipart/Related shall, in
361 |    accordance with [MIME], treat the entire entity as Multipart/Mixed.
362 |    MIME User Agents that do recognize Multipart/Related entities but are
363 |    unable to process the given type should give the user the option of
364 |    suppressing the entire Multipart/Related body part shall be.
365 | 
366 |    Existing MIME-capable mail user agents (MUAs) handle the existing
367 |    media types in a straightforward manner.  For discrete media types
368 |    (e.g. text, image, etc.) the body of the entity can be directly
369 |    passed to a display process.  Similarly the existing composite
370 |    subtypes can be reduced to handing one or more discrete types.
371 |    Handling Multipart/Related differs in that processing cannot be
372 |    reduced to handling the individual entities.
373 | 
374 |    The following sections discuss what information the processing
375 |    application requires.
376 | 
377 |    It is possible that an application specific "receiving agent" will
378 |    manipulate the entities for display prior to invoking actual
379 |    application process.  Okie, above, is an example of this; it may need
380 |    a receiving agent to parse the document and substitute local file
381 |    names for the originator's file names.  Other applications may just
382 |    require a table showing the correspondence between the local file
383 |    names and the originator's.  The receiving agent takes responsibility
384 |    for such processing.
385 | 
386 | 6.1 Data Requirements
387 | 
388 |    MIME-capable mail user agents (MUAs) are required to provide the
389 |    application:
390 | 
391 | 
392 | 
393 | 
394 | Levinson                    Standards Track                     [Page 7]
395 | 
396 | RFC 2387                   Multipart/Related                 August 1998
397 | 
398 | 
399 |    (a) the bodies of the MIME entities and the entity Content-* headers,
400 | 
401 |    (b) the parameters of the Multipart/Related Content-type header, and
402 | 
403 |    (c) the correspondence between each body's local file name, that
404 |        body's header data, and, if present, the body part's content-ID.
405 | 
406 | 6.2 Storing Multipart/Related Entities
407 | 
408 |    The Multipart/Related media type will be used for objects that have
409 |    internal linkages between the body parts.  When the objects are
410 |    stored the linkages may require processing by the application or its
411 |    receiving agent.
412 | 
413 | 6.3 Recursion
414 | 
415 |    MIME is a recursive structure.  Hence one must expect a
416 |    Multipart/Related entity to contain other Multipart/Related entities.
417 |    When a Multipart/Related entity is being processed for display or
418 |    storage, any enclosed Multipart/Related entities shall be processed
419 |    as though they were being stored.
420 | 
421 | 6.4 Configuration Considerations
422 | 
423 |    It is suggested that MUAs that use configuration mechanisms, see
424 |    [CFG] for an example, refer to Multipart/Related as Multi-
425 |    part/Related/<type>, were <type> is the value of the "type"
426 |    parameter.
427 | 
428 | 7.  Security Considerations
429 | 
430 |    Security considerations relevant to Multipart/Related are identical
431 |    to those of the underlying content-type.
432 | 
433 | 8.  Acknowledgments
434 | 
435 |    This proposal is the result of conversations the author has had with
436 |    many people.  In particular, Harald A. Alvestrand, James Clark,
437 |    Charles Goldfarb, Gary Houston, Ned Freed, Ray Moody, and Don
438 |    Stinchfield, provided both encouragement and invaluable help.  The
439 |    author, however, take full responsibility for all errors contained in
440 |    this document.
441 | 
442 | 
443 | 
444 | 
445 | 
446 | 
447 | 
448 | 
449 | 
450 | Levinson                    Standards Track                     [Page 8]
451 | 
452 | RFC 2387                   Multipart/Related                 August 1998
453 | 
454 | 
455 | 9.  References
456 | 
457 |    [822]       Crocker, D., "Standard for the Format of ARPA Internet
458 |                Text Messages", STD 11, RFC 822, August 1982.
459 | 
460 |    [CID]       Levinson, E., and J. Clark, "Message/External-Body
461 |                Content-ID Access Type",  RFC 1873, December 1995,
462 |                Levinson, E., "Message/External-Body Content-ID Access
463 |                Type", Work in Progress.
464 | 
465 |    [CFG]       Borenstein, N., "A User Agent Configuration Mechanism For
466 |                Multimedia Mail Format Information", RFC 1524, September
467 |                1993.
468 | 
469 |    [DISP]      Troost, R., and S. Dorner, "Communicating Presentation
470 |                Information in Internet Messages:  The Content-
471 |                Disposition Header", RFC 1806, June 1995.
472 | 
473 |    [MIME]      Borenstein, N., and Freed, N., "Multipurpose Internet
474 |                Mail Extensions (MIME) Part One: Format of Internet
475 |                Message Bodies", RFC 2045, November 1996.
476 | 
477 | 9.  Author's Address
478 | 
479 |    Edward Levinson
480 |    47 Clive Street
481 |    Metuchen, NJ  08840-1060
482 |    USA
483 | 
484 |    Phone: +1 908 494 1606
485 |    EMail: XIson@cnj.digex.com
486 | 
487 | 10.  Changes from previous draft (RFC 2112)
488 | 
489 |    Corrected cid urls to conform to RFC 2111; the angle brackets were
490 |    removed.
491 | 
492 | 
493 | 
494 | 
495 | 
496 | 
497 | 
498 | 
499 | 
500 | 
501 | 
502 | 
503 | 
504 | 
505 | 
506 | Levinson                    Standards Track                     [Page 9]
507 | 
508 | RFC 2387                   Multipart/Related                 August 1998
509 | 
510 | 
511 | 11.  Full Copyright Statement
512 | 
513 |    Copyright (C) The Internet Society (1998).  All Rights Reserved.
514 | 
515 |    This document and translations of it may be copied and furnished to
516 |    others, and derivative works that comment on or otherwise explain it
517 |    or assist in its implementation may be prepared, copied, published
518 |    and distributed, in whole or in part, without restriction of any
519 |    kind, provided that the above copyright notice and this paragraph are
520 |    included on all such copies and derivative works.  However, this
521 |    document itself may not be modified in any way, such as by removing
522 |    the copyright notice or references to the Internet Society or other
523 |    Internet organizations, except as needed for the purpose of
524 |    developing Internet standards in which case the procedures for
525 |    copyrights defined in the Internet Standards process must be
526 |    followed, or as required to translate it into languages other than
527 |    English.
528 | 
529 |    The limited permissions granted above are perpetual and will not be
530 |    revoked by the Internet Society or its successors or assigns.
531 | 
532 |    This document and the information contained herein is provided on an
533 |    "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
534 |    TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
535 |    BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
536 |    HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
537 |    MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
538 | 
539 | 
540 | 
541 | 
542 | 
543 | 
544 | 
545 | 
546 | 
547 | 
548 | 
549 | 
550 | 
551 | 
552 | 
553 | 
554 | 
555 | 
556 | 
557 | 
558 | 
559 | 
560 | 
561 | 
562 | Levinson                    Standards Track                    [Page 10]
563 | 
564 | 


--------------------------------------------------------------------------------
/rfc/2388-returning-values-from-forms-multipart-form-data.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | 
  5 | 
  6 | 
  7 | Network Working Group                                         L. Masinter
  8 | Request for Comments: 2388                              Xerox Corporation
  9 | Category: Standards Track                                     August 1998
 10 | 
 11 | 
 12 |            Returning Values from Forms:  multipart/form-data
 13 | 
 14 | Status of this Memo
 15 | 
 16 |    This document specifies an Internet standards track protocol for the
 17 |    Internet community, and requests discussion and suggestions for
 18 |    improvements.  Please refer to the current edition of the "Internet
 19 |    Official Protocol Standards" (STD 1) for the standardization state
 20 |    and status of this protocol.  Distribution of this memo is unlimited.
 21 | 
 22 | Copyright Notice
 23 | 
 24 |    Copyright (C) The Internet Society (1998).  All Rights Reserved.
 25 | 
 26 | 1. Abstract
 27 | 
 28 |    This specification defines an Internet Media Type, multipart/form-
 29 |    data, which can be used by a wide variety of applications and
 30 |    transported by a wide variety of protocols as a way of returning a
 31 |    set of values as the result of a user filling out a form.
 32 | 
 33 | 2. Introduction
 34 | 
 35 |    In many applications, it is possible for a user to be presented with
 36 |    a form. The user will fill out the form, including information that
 37 |    is typed, generated by user input, or included from files that the
 38 |    user has selected. When the form is filled out, the data from the
 39 |    form is sent from the user to the receiving application.
 40 | 
 41 |    The definition of MultiPart/Form-Data is derived from one of those
 42 |    applications, originally set out in [RFC1867] and subsequently
 43 |    incorporated into [HTML40], where forms are expressed in HTML, and in
 44 |    which the form values are sent via HTTP or electronic mail. This
 45 |    representation is widely implemented in numerous web browsers and web
 46 |    servers.
 47 | 
 48 |    However, multipart/form-data can be used for forms that are presented
 49 |    using representations other than HTML (spreadsheets, Portable
 50 |    Document Format, etc), and for transport using other means than
 51 |    electronic mail or HTTP. This document defines the representation of
 52 |    form values independently of the application for which it is used.
 53 | 
 54 | 
 55 | 
 56 | 
 57 | 
 58 | Masinter                    Standards Track                     [Page 1]
 59 | 
 60 | RFC 2388                  multipart/form-data                August 1998
 61 | 
 62 | 
 63 | 3. Definition of multipart/form-data
 64 | 
 65 |    The media-type multipart/form-data follows the rules of all multipart
 66 |    MIME data streams as outlined in [RFC 2046].  In forms, there are a
 67 |    series of fields to be supplied by the user who fills out the form.
 68 |    Each field has a name. Within a given form, the names are unique.
 69 | 
 70 |    "multipart/form-data" contains a series of parts. Each part is
 71 |    expected to contain a content-disposition header [RFC 2183] where the
 72 |    disposition type is "form-data", and where the disposition contains
 73 |    an (additional) parameter of "name", where the value of that
 74 |    parameter is the original field name in the form. For example, a part
 75 |    might contain a header:
 76 | 
 77 |         Content-Disposition: form-data; name="user"
 78 | 
 79 |    with the value corresponding to the entry of the "user" field.
 80 | 
 81 |    Field names originally in non-ASCII character sets may be encoded
 82 |    within the value of the "name" parameter using the standard method
 83 |    described in RFC 2047.
 84 | 
 85 |    As with all multipart MIME types, each part has an optional
 86 |    "Content-Type", which defaults to text/plain.  If the contents of a
 87 |    file are returned via filling out a form, then the file input is
 88 |    identified as the appropriate media type, if known, or
 89 |    "application/octet-stream".  If multiple files are to be returned as
 90 |    the result of a single form entry, they should be represented as a
 91 |    "multipart/mixed" part embedded within the "multipart/form-data".
 92 | 
 93 |    Each part may be encoded and the "content-transfer-encoding" header
 94 |    supplied if the value of that part does not conform to the default
 95 |    encoding.
 96 | 
 97 | 4. Use of multipart/form-data
 98 | 
 99 | 4.1 Boundary
100 | 
101 |    As with other multipart types, a boundary is selected that does not
102 |    occur in any of the data. Each field of the form is sent, in the
103 |    order defined by the sending appliction and form, as a part of the
104 |    multipart stream.  Each part identifies the INPUT name within the
105 |    original form. Each part should be labelled with an appropriate
106 |    content-type if the media type is known (e.g., inferred from the file
107 |    extension or operating system typing information) or as
108 |    "application/octet-stream".
109 | 
110 | 
111 | 
112 | 
113 | 
114 | Masinter                    Standards Track                     [Page 2]
115 | 
116 | RFC 2388                  multipart/form-data                August 1998
117 | 
118 | 
119 | 4.2 Sets of files
120 | 
121 |    If the value of a form field is a set of files rather than a single
122 |    file, that value can be transferred together using the
123 |    "multipart/mixed" format.
124 | 
125 | 4.3 Encoding
126 | 
127 |    While the HTTP protocol can transport arbitrary binary data, the
128 |    default for mail transport is the 7BIT encoding.  The value supplied
129 |    for a part may need to be encoded and the "content-transfer-encoding"
130 |    header supplied if the value does not conform to the default
131 |    encoding.  [See section 5 of RFC 2046 for more details.]
132 | 
133 | 4.4 Other attributes
134 | 
135 |    Forms may request file inputs from the user; the form software may
136 |    include the file name and other file attributes, as specified in [RFC
137 |    2184].
138 | 
139 |    The original local file name may be supplied as well, either as a
140 |    "filename" parameter either of the "content-disposition: form-data"
141 |    header or, in the case of multiple files, in a "content-disposition:
142 |    file" header of the subpart. The sending application MAY supply a
143 |    file name; if the file name of the sender's operating system is not
144 |    in US-ASCII, the file name might be approximated, or encoded using
145 |    the method of RFC 2231.
146 | 
147 |    This is a convenience for those cases where the files supplied by the
148 |    form might contain references to each other, e.g., a TeX file and its
149 |    .sty auxiliary style description.
150 | 
151 | 4.5 Charset of text in form data
152 | 
153 |    Each part of a multipart/form-data is supposed to have a content-
154 |    type.  In the case where a field element is text, the charset
155 |    parameter for the text indicates the character encoding used.
156 | 
157 |    For example, a form with a text field in which a user typed 'Joe owes
158 |    <eu>100' where <eu> is the Euro symbol might have form data returned
159 |    as:
160 | 
161 |     --AaB03x
162 |     content-disposition: form-data; name="field1"
163 |     content-type: text/plain;charset=windows-1250
164 |     content-transfer-encoding: quoted-printable
165 | 
166 | 
167 | 
168 | 
169 | 
170 | Masinter                    Standards Track                     [Page 3]
171 | 
172 | RFC 2388                  multipart/form-data                August 1998
173 | 
174 | 
175 |     Joe owes =80100.
176 |     --AaB03x
177 | 
178 | 5. Operability considerations
179 | 
180 | 5.1 Compression, encryption
181 | 
182 |    Some of the data in forms may be compressed or encrypted, using other
183 |    MIME mechanisms. This is a function of the application that is
184 |    generating the form-data.
185 | 
186 | 5.2 Other data encodings rather than multipart
187 | 
188 |    Various people have suggested using new mime top-level type
189 |    "aggregate", e.g., aggregate/mixed or a content-transfer-encoding of
190 |    "packet" to express indeterminate-length binary data, rather than
191 |    relying on the multipart-style boundaries. While this would be
192 |    useful, the "multipart" mechanisms are well established, simple to
193 |    implement on both the sending client and receiving server, and as
194 |    efficient as other methods of dealing with multiple combinations of
195 |    binary data.
196 | 
197 |    The multipart/form-data encoding has a high overhead and performance
198 |    impact if there are many fields with short values. However, in
199 |    practice, for the forms in use, for example, in HTML, the average
200 |    overhead is not significant.
201 | 
202 | 5.3 Remote files with third-party transfer
203 | 
204 |    In some scenarios, the user operating the form software might want to
205 |    specify a URL for remote data rather than a local file. In this case,
206 |    is there a way to allow the browser to send to the client a pointer
207 |    to the external data rather than the entire contents? This capability
208 |    could be implemented, for example, by having the client send to the
209 |    server data of type "message/external-body" with "access-type" set
210 |    to, say, "uri", and the URL of the remote data in the body of the
211 |    message.
212 | 
213 | 5.4 Non-ASCII field names
214 | 
215 |    Note that MIME headers are generally required to consist only of 7-
216 |    bit data in the US-ASCII character set. Hence field names should be
217 |    encoded according to the method in RFC 2047 if they contain
218 |    characters outside of that set.
219 | 
220 | 
221 | 
222 | 
223 | 
224 | 
225 | 
226 | Masinter                    Standards Track                     [Page 4]
227 | 
228 | RFC 2388                  multipart/form-data                August 1998
229 | 
230 | 
231 | 5.5 Ordered fields and duplicated field names
232 | 
233 |    The relationship of the ordering of fields within a form and the
234 |    ordering of returned values within "multipart/form-data" is not
235 |    defined by this specification, nor is the handling of the case where
236 |    a form has multiple fields with the same name. While HTML-based forms
237 |    may send back results in the order received, and intermediaries
238 |    should not reorder the results, there are some systems which might
239 |    not define a natural order for form fields.
240 | 
241 | 5.6 Interoperability with web applications
242 | 
243 |    Many web applications use the "application/x-url-encoded" method for
244 |    returning data from forms. This format is quite compact, e.g.:
245 | 
246 |    name=Xavier+Xantico&verdict=Yes&colour=Blue&happy=sad&Utf%F6r=Send
247 | 
248 |    however, there is no opportunity to label the enclosed data with
249 |    content type, apply a charset, or use other encoding mechanisms.
250 | 
251 |    Many form-interpreting programs (primarly web browsers) now implement
252 |    and generate multipart/form-data, but an existing application might
253 |    need to optionally support both the application/x-url-encoded format
254 |    as well.
255 | 
256 | 5.7 Correlating form data with the original form
257 | 
258 |    This specification provides no specific mechanism by which
259 |    multipart/form-data can be associated with the form that caused it to
260 |    be transmitted. This separation is intentional; many different forms
261 |    might be used for transmitting the same data. In practice,
262 |    applications may supply a specific form processing resource (in HTML,
263 |    the ACTION attribute in a FORM tag) for each different form.
264 |    Alternatively, data about the form might be encoded in a "hidden
265 |    field" (a field which is part of the form but which has a fixed value
266 |    to be transmitted back to the form-data processor.)
267 | 
268 | 6. Security Considerations
269 | 
270 |    The data format described in this document introduces no new security
271 |    considerations outside of those introduced by the protocols that use
272 |    it and of the component elements. It is important when interpreting
273 |    content-disposition to not overwrite files in the recipients address
274 |    space inadvertently.
275 | 
276 |    User applications that request form information from users must be
277 |    careful not to cause a user to send information to the requestor or a
278 |    third party unwillingly or unwittingly. For example, a form might
279 | 
280 | 
281 | 
282 | Masinter                    Standards Track                     [Page 5]
283 | 
284 | RFC 2388                  multipart/form-data                August 1998
285 | 
286 | 
287 |    request 'spam' information to be sent to an unintended third party,
288 |    or private information to be sent to someone that the user might not
289 |    actually intend. While this is primarily an issue for the
290 |    representation and interpretation of forms themselves, rather than
291 |    the data representation of the result of form transmission, the
292 |    transportation of private information must be done in a way that does
293 |    not expose it to unwanted prying.
294 | 
295 |    With the introduction of form-data that can reasonably send back the
296 |    content of files from user's file space, the possibility that a user
297 |    might be sent an automated script that fills out a form and then
298 |    sends the user's local file to another address arises. Thus,
299 |    additional caution is required when executing automated scripting
300 |    where form-data might include user's files.
301 | 
302 | 7. Author's Address
303 | 
304 |    Larry Masinter
305 |    Xerox Palo Alto Research Center
306 |    3333 Coyote Hill Road
307 |    Palo Alto, CA 94304
308 | 
309 |    Fax:    +1 650 812 4333
310 |    EMail:   masinter@parc.xerox.com
311 | 
312 | 
313 | 
314 | 
315 | 
316 | 
317 | 
318 | 
319 | 
320 | 
321 | 
322 | 
323 | 
324 | 
325 | 
326 | 
327 | 
328 | 
329 | 
330 | 
331 | 
332 | 
333 | 
334 | 
335 | 
336 | 
337 | 
338 | Masinter                    Standards Track                     [Page 6]
339 | 
340 | RFC 2388                  multipart/form-data                August 1998
341 | 
342 | 
343 | Appendix A. Media type registration for multipart/form-data
344 | 
345 |    Media Type name:
346 |      multipart
347 | 
348 |    Media subtype name:
349 |      form-data
350 | 
351 |    Required parameters:
352 |      none
353 | 
354 |    Optional parameters:
355 |      none
356 | 
357 |    Encoding considerations:
358 |      No additional considerations other than as for other multipart
359 |      types.
360 | 
361 |    Security Considerations
362 |      Applications which receive forms and process them must be careful
363 |      not to supply data back to the requesting form processing site that
364 |      was not intended to be sent by the recipient. This is a
365 |      consideration for any application that generates a multipart/form-
366 |      data.
367 | 
368 |      The multipart/form-data type introduces no new security
369 |      considerations for recipients beyond what might occur with any of
370 |      the enclosed parts.
371 | 
372 | 
373 | 
374 | 
375 | 
376 | 
377 | 
378 | 
379 | 
380 | 
381 | 
382 | 
383 | 
384 | 
385 | 
386 | 
387 | 
388 | 
389 | 
390 | 
391 | 
392 | 
393 | 
394 | Masinter                    Standards Track                     [Page 7]
395 | 
396 | RFC 2388                  multipart/form-data                August 1998
397 | 
398 | 
399 | References
400 | 
401 |    [RFC 2046] Freed, N., and N. Borenstein, "Multipurpose Internet Mail
402 |               Extensions (MIME) Part Two: Media Types", RFC 2046,
403 |               November 1996.
404 | 
405 |    [RFC 2047] Moore, K., "MIME (Multipurpose Internet Mail Extensions)
406 |               Part Three: Message Header Extensions for Non-ASCII Text",
407 |               RFC 2047, November 1996.
408 | 
409 |    [RFC 2231] Freed, N., and K. Moore, "MIME Parameter Value and Encoded
410 |               Word Extensions: Character Sets, Languages, and
411 |               Continuations", RFC 2231, November 1997.
412 | 
413 |    [RFC 1806] Troost, R., and S. Dorner, "Communicating Presentation
414 |               Information in Internet Messages: The Content-Disposition
415 |               Header", RFC 1806, June 1995.
416 | 
417 |    [RFC 1867] Nebel, E., and L. Masinter, "Form-based File Upload in
418 |               HTML", RFC 1867, November 1995.
419 | 
420 |    [RFC 2183] Troost, R., Dorner, S., and K. Moore, "Communicating
421 |               Presentation Information in Internet Messages: The
422 |               Content-Disposition Header Field", RFC 2183, August 1997.
423 | 
424 |    [RFC 2184] Freed, N., and K. Moore, "MIME Parameter Value and Encoded
425 |               Word Extensions: Character Sets, Languages, and
426 |               Continuations", RFC 2184, August 1997.
427 | 
428 |    [HTML40]   D. Raggett, A. Le Hors, I. Jacobs. "HTML 4.0
429 |               Specification", World Wide Web Consortium Technical Report
430 |               "REC-html40", December, 1997. <http://www.w3.org/TR/REC-
431 |               html40/>
432 | 
433 | 
434 | 
435 | 
436 | 
437 | 
438 | 
439 | 
440 | 
441 | 
442 | 
443 | 
444 | 
445 | 
446 | 
447 | 
448 | 
449 | 
450 | Masinter                    Standards Track                     [Page 8]
451 | 
452 | RFC 2388                  multipart/form-data                August 1998
453 | 
454 | 
455 | Full Copyright Statement
456 | 
457 |    Copyright (C) The Internet Society (1998).  All Rights Reserved.
458 | 
459 |    This document and translations of it may be copied and furnished to
460 |    others, and derivative works that comment on or otherwise explain it
461 |    or assist in its implementation may be prepared, copied, published
462 |    and distributed, in whole or in part, without restriction of any
463 |    kind, provided that the above copyright notice and this paragraph are
464 |    included on all such copies and derivative works.  However, this
465 |    document itself may not be modified in any way, such as by removing
466 |    the copyright notice or references to the Internet Society or other
467 |    Internet organizations, except as needed for the purpose of
468 |    developing Internet standards in which case the procedures for
469 |    copyrights defined in the Internet Standards process must be
470 |    followed, or as required to translate it into languages other than
471 |    English.
472 | 
473 |    The limited permissions granted above are perpetual and will not be
474 |    revoked by the Internet Society or its successors or assigns.
475 | 
476 |    This document and the information contained herein is provided on an
477 |    "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
478 |    TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
479 |    BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
480 |    HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
481 |    MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
482 | 
483 | 
484 | 
485 | 
486 | 
487 | 
488 | 
489 | 
490 | 
491 | 
492 | 
493 | 
494 | 
495 | 
496 | 
497 | 
498 | 
499 | 
500 | 
501 | 
502 | 
503 | 
504 | 
505 | 
506 | Masinter                    Standards Track                     [Page 9]
507 | 
508 | 


--------------------------------------------------------------------------------
/test/common.js:
--------------------------------------------------------------------------------
 1 | var path = require('path');
 2 | 
 3 | var root = path.join(__dirname, '../');
 4 | exports.dir = {
 5 |   root: root,
 6 |   lib: root + '/lib',
 7 |   fixture: root + '/test/fixture',
 8 | };
 9 | 
10 | exports.assert = require('assert');
11 | exports.fastOrSlow = require('fast-or-slow');
12 | 
13 | exports.require = function(lib) {
14 |   return require(exports.dir.lib + '/' + lib);
15 | };
16 | 


--------------------------------------------------------------------------------
/test/fast/test-fixtures.js:
--------------------------------------------------------------------------------
 1 | return console.log('disabled');
 2 | 
 3 | var common = require('../common');
 4 | var assert = common.assert;
 5 | var fs = require('fs');
 6 | var path = require('path');
 7 | var MultipartParser = common.require('multipart_parser');
 8 | 
 9 | findFixtures(common.dir.fixture + '/js');
10 | 
11 | function findFixtures(dir) {
12 |   fs.readdir(dir, function(err, files) {
13 |     if (err) throw err;
14 |     files
15 |       .map(function(file) {
16 |         return dir + '/' + file;
17 |       })
18 |       .forEach(load);
19 |   });
20 | }
21 | 
22 | 
23 | function load(jsFixture) {
24 |   if (!/\.js$/.test(jsFixture)) return;
25 | 
26 |   var tests = require(jsFixture);
27 |   var dir = common.dir.fixture + '/http/' + path.basename(jsFixture, '.js');
28 | 
29 |   for (var name in tests) {
30 |     var httpFixture = fs.createReadStream(dir + '/' + name);
31 |     verify(httpFixture, tests[name]);
32 |   }
33 | }
34 | 
35 | function verify(http, expected) {
36 |   var ended = false;
37 |   var parser = new MultipartParser();
38 |   var parts = [];
39 |   var shortPath = http.path.substr(common.dir.fixture.length + '/http/'.length);
40 | 
41 |   http.pipe(parser);
42 | 
43 |   parser
44 |     .on('part', function(part) {
45 |       parts.push(part);
46 |     })
47 |     .on('end', function() {
48 |       ended = true;
49 | 
50 |       assert.equal(
51 |         parts.length,
52 |         expected.length,
53 |         'Expected ' + expected.length + ' part(s), got: ' + parts.length + ': ' +
54 |         shortPath
55 |       );
56 |     });
57 | 
58 |   http
59 |     .on('end', function() {
60 |       assert.ok(ended, 'Parser did not end: ' + shortPath);
61 |     });
62 | }
63 | 


--------------------------------------------------------------------------------
/test/fast/test-multipart-parser.js:
--------------------------------------------------------------------------------
  1 | var common          = require('../common');
  2 | var test            = common.fastOrSlow.fast();
  3 | var assert          = common.assert;
  4 | var MultipartParser = common.require('multipart_parser');
  5 | var Part            = common.require('part');
  6 | var boundary        = '------WebKitFormBoundarytyE4wkKlZ5CQJVTG';
  7 | 
  8 | var parser;
  9 | test.before(function() {
 10 |   parser = MultipartParser.create(boundary);
 11 | });
 12 | 
 13 | function assertEmitsError(buffer, expectedError) {
 14 |   var hadError = false;
 15 |   parser.on('error', function(err) {
 16 |     hadError = true;
 17 |     assert.equal(err.message.substr(0, expectedError.length), expectedError);
 18 |   });
 19 | 
 20 |   parser.write(buffer);
 21 |   assert.ok(hadError, 'no error was emitted');
 22 | }
 23 | 
 24 | test('#write: error: invalid parser state', function() {
 25 |   parser._state = 'SOMETHING';
 26 |   assertEmitsError(new Buffer('123'), 'MultipartParser.InvalidParserState');
 27 | });
 28 | 
 29 | test('#write: error: write without boundary', function() {
 30 |   var buffer = new Buffer('a');
 31 |   parser     = new MultipartParser();
 32 | 
 33 |   assert.throws(function() {
 34 |     parser.write(buffer);
 35 |   }, /Bad state: NO_BOUNDARY/);
 36 | });
 37 | 
 38 | test('#write: tolerate missing CRLF on first boundary', function() {
 39 |   var buffer = new Buffer('--' + boundary + '\r\n');
 40 |   parser.write(buffer);
 41 | 
 42 |   assert.equal(parser._state, 'HEADER_FIELD');
 43 | });
 44 | 
 45 | test('#write: leading preamble', function() {
 46 |   parser.write(new Buffer(boundary.substr(0, 4) + 'HAHA'));
 47 |   assert.equal(parser._state, 'PREAMBLE');
 48 | 
 49 |   parser.write(new Buffer('--' + boundary + '\r\n'));
 50 |   assert.equal(parser._state, 'HEADER_FIELD');
 51 | });
 52 | 
 53 | test('#write: error: Invalid header token', function() {
 54 |     // ',' is an example for an invalid token for header fields (see RFC 2616)
 55 |   var buffer = new Buffer('Invalid,Header: ');
 56 |   parser._state = 'HEADER_FIELD';
 57 |   assertEmitsError(buffer, 'MultipartParser.InvalidHeaderFieldToken');
 58 | });
 59 | 
 60 | test('#write: Emit part object with lowercased headers', function() {
 61 |   var buffer = new Buffer('Header-1:value-1\r\nHeader-2:value-2\r\n\r\n');
 62 |   parser._state = 'HEADER_FIELD';
 63 |   parser._part  = new Part();
 64 | 
 65 |   parser.write(buffer);
 66 | 
 67 |   assert.deepEqual(parser._part.headers, {
 68 |     'header-1': 'value-1',
 69 |     'header-2': 'value-2',
 70 |   });
 71 | });
 72 | 
 73 | test('#write: Trim leading and trailing header value whitespace', function() {
 74 |   var buffer = new Buffer('header: value \r\n\r\n');
 75 |   parser._state = 'HEADER_FIELD';
 76 |   parser._part  = new Part();
 77 | 
 78 |   parser.write(buffer);
 79 | 
 80 |   assert.deepEqual(parser._part.headers, {'header': 'value'});
 81 | });
 82 | 
 83 | test('#write: error: CR on non-empty _headerField', function() {
 84 |   var buffer = new Buffer('head\r');
 85 |   parser._state = 'HEADER_FIELD';
 86 |   assertEmitsError(buffer, 'MultipartParser.InvalidHeaderFieldToken');
 87 | });
 88 | 
 89 | test('#write: no part headers', function() {
 90 |   var buffer = new Buffer('\r\n');
 91 |   parser._state = 'HEADER_FIELD';
 92 |   parser._part  = new Part();
 93 | 
 94 |   parser.write(buffer);
 95 | 
 96 |   assert.deepEqual(parser._part.headers, {});
 97 | });
 98 | 
 99 | test('#write: header buffer overflow in field', function() {
100 |   parser._headerBufferLimit = 2;
101 | 
102 |   var buffer = new Buffer('ab');
103 |   parser._state = 'HEADER_FIELD';
104 |   parser.write(buffer);
105 |   assertEmitsError(new Buffer('c'), 'MultipartParser.HeaderBufferOverflow');
106 | });
107 | 
108 | test('#write: emit part data', function() {
109 |   parser._part  = new Part();
110 |   parser._state = 'PART_BODY';
111 | 
112 |   var expected = [
113 |     new Buffer('abc'),
114 |     new Buffer('def'),
115 |   ];
116 | 
117 |   parser._part.on('data', function(buffer) {
118 |     assert.equal(''+buffer, ''+expected.shift());
119 |   });
120 | 
121 |   parser.write(expected[0]);
122 |   parser.write(expected[0]);
123 | 
124 |   assert.strictEqual(expected.length, 0);
125 |   assert.equal(parser._offset, 6);
126 | });
127 | 
128 | test('#write: hit partial boundary in part data', function() {
129 |   parser.boundary('end');
130 |   parser._preamble = false;
131 |   parser._part     = new Part();
132 |   parser._state    = 'PART_BODY';
133 | 
134 |   var buffers =[];
135 |   parser._part.on('data', function(buffer) {
136 |     buffers.push(''+buffer);
137 |   });
138 | 
139 |   parser.write(new Buffer('ab\r\n--enc'));
140 |   assert.deepEqual(buffers, ['ab', '\r\n--en', 'c']);
141 | });
142 | 
143 | test('#write: hit partial boundary in part data spread over 2 buffers', function() {
144 |   parser.boundary('end');
145 |   parser._preamble = false;
146 |   parser._part     = new Part();
147 |   parser._state    = 'PART_BODY';
148 | 
149 |   var buffers =[];
150 |   parser._part.on('data', function(buffer) {
151 |     buffers.push(''+buffer);
152 |   });
153 | 
154 |   var first = new Buffer('ab\r\n--e');
155 |   var second = new Buffer('haha');
156 | 
157 |   parser.write(first);
158 |   assert.equal(buffers.length, 1);
159 | 
160 |   parser.write(second);
161 |   assert.deepEqual(buffers, ['ab', '\r\n--e', 'haha']);
162 | });
163 | 
164 | test('#write: hit partial boundary in part data spread over 3 buffers', function() {
165 |   parser.boundary('end');
166 |   parser._preamble = false;
167 |   parser._part     = new Part();
168 |   parser._state    = 'PART_BODY';
169 | 
170 |   var buffers =[];
171 |   parser._part.on('data', function(buffer) {
172 |     buffers.push(''+buffer);
173 |   });
174 | 
175 |   var first = new Buffer('ab\r\n--e');
176 |   var second = new Buffer('n');
177 |   var third = new Buffer('haha');
178 | 
179 |   parser.write(first);
180 |   assert.equal(buffers.length, 1);
181 | 
182 |   parser.write(second);
183 |   assert.equal(buffers.length, 1);
184 | 
185 |   parser.write(third);
186 |   assert.deepEqual(buffers, ['ab', '\r\n--en', 'haha']);
187 | });
188 | 
189 | function testRfc1341Entity(chunkSize) {
190 |   parser.boundary('simple boundary');
191 | 
192 |   var part1 =
193 |     'This is implicitly typed plain ASCII text.\r\n' +
194 |     'It does NOT end with a linebreak.';
195 |   var part2 =
196 |     'This is explicitly typed plain ASCII text.\r\n' +
197 |     'It DOES end with a linebreak.\r\n';
198 | 
199 |   var rfc1341Entity =
200 |     'This is the preamble.  It is to be ignored, though it\r\n' +
201 |     'is a handy place for mail composers to include an\r\n' +
202 |     'explanatory note to non-MIME compliant readers.\r\n' +
203 |     '--simple boundary\r\n' +
204 |     '\r\n' +
205 |     part1 +
206 |     '\r\n' +
207 |     '--simple boundary\r\n' +
208 |     'Content-type: text/plain; charset=us-ascii\r\n' +
209 |     '\r\n' +
210 |     part2 +
211 |     '\r\n' +
212 |     '--simple boundary--\r\n' +
213 |     'This is the epilogue.  It is also to be ignored.\r\n';
214 | 
215 |   var parts = [];
216 |   var ended = false;
217 |   parser
218 |     .on('error', function(error) {
219 |       throw error;
220 |     })
221 |     .on('part', function(part) {
222 |       parts.push(part);
223 | 
224 |       part.data = '';
225 |       part
226 |         .on('data', function(chunk) {
227 |           part.data += chunk;
228 |         })
229 |         .on('end', function() {
230 |           part.ended = true;
231 |         });
232 |     })
233 |     .on('end', function() {
234 |       ended = true;
235 |     });
236 | 
237 |   var buffer = new Buffer(rfc1341Entity);
238 |   if (!chunkSize) {
239 |     parser.write(buffer);
240 |   } else {
241 |     for (var i = 0; i < buffer.length; i += chunkSize) {
242 |       var end = (i + chunkSize < buffer.length)
243 |         ? i + chunkSize
244 |         : buffer.length;
245 | 
246 |       var chunk = new Buffer(buffer.slice(i, end));
247 |       parser.write(chunk);
248 |     }
249 |   }
250 | 
251 |   assert.equal(parts.length, 2);
252 |   assert.equal(parts[0].data, part1);
253 |   assert.equal(parts[1].data, part2);
254 |   parts.forEach(function(part, i) {
255 |     assert.ok(part.ended, 'Part ' + (i + 1) + ' did not end.');
256 |   });
257 |   assert.ok(ended);
258 | }
259 | 
260 | test('#write: full rfc1341 entity', function() {
261 |   testRfc1341Entity();
262 | });
263 | 
264 | // What can I say, my ability to visualize this state machine has its limits :)
265 | for (var i = 1; i <= 10; i++) {
266 |   test('#write: full rfc1341 entity with chunk size: ' + i, function() {
267 |     var chunkSize = parseInt(this.name.match(/\d+$/), 10);
268 |     testRfc1341Entity(chunkSize);
269 |   });
270 | }
271 | 


--------------------------------------------------------------------------------
/test/run.js:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 | var far = require('far').create();
3 | 
4 | far.add(__dirname);
5 | far.include(/test-.*\.js$/);
6 | 
7 | far.execute();
8 | 


--------------------------------------------------------------------------------