├── BufferView.js
├── README
└── tests
    ├── ListZip.html
    └── ListZipContents.js


/BufferView.js:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * BufferView.js
  3 |  * Wrap an ArrayBuffer (using a DataView internally) and allow
  4 |  * reading and writing of values from it.  Differs from (and improves upon)
  5 |  * the DataView API in these ways:
  6 |  * 
  7 |  * - allows a default endianness to be specified
  8 |  * - keeps track of current buffer position, so you can read
  9 |  *   and write sequential values without tracking it yourself
 10 |  * - has methods for reading and writing UTF-8 strings
 11 |  */
 12 | "use strict";
 13 | 
 14 | var BufferView = (function() {
 15 | 
 16 |     function fail(msg) { throw new Error(msg); }
 17 | 
 18 |     /*
 19 |      * This constructor is like the DataView constructor, but requires you to
 20 |      * specify a default byte order.  First arg is always the buffer. Last arg
 21 |      * is the order.  Optional arguments between can specify buffer offset
 22 |      * and length.  Invoke in one of these 3 ways:
 23 |      * 
 24 |      *   new BufferView(buffer, order)
 25 |      *   new BufferView(buffer, offset, order)
 26 |      *   new BufferView(buffer, offset, length, order)
 27 |      */
 28 |     function BufferView(buffer, offset, length, byteorder) {
 29 |         if (arguments.length < 2 || arguments.length > 4) 
 30 |             fail("Wrong number of argments");
 31 |         if (arguments.length === 2) {
 32 |             byteorder = offset;
 33 |             offset = 0;
 34 |             length = buffer.byteLength;
 35 |         }
 36 |         else if (arguments.length === 3) {
 37 |             byteorder = length;
 38 |             length = buffer.byteLength - offset;
 39 |         }
 40 |         
 41 |         // XXX Should I support binary strings as well as Array buffers?
 42 |         // jDataView does that
 43 |         if (!(buffer instanceof ArrayBuffer))
 44 |             fail("Bad ArrayBuffer");
 45 | 
 46 |         // XXX Should negative offsets be measured from the end of the buffer?
 47 |         if (offset < 0 || offset > buffer.byteLength)
 48 |             fail("Illegal offset");
 49 |         if (length < 0 || offset+length > buffer.byteLength)
 50 |             fail("Illegal length");
 51 |         if (byteorder !== BufferView.LE && byteorder !== BufferView.BE)
 52 |             fail("Bad byte order");
 53 | 
 54 |         // Note that most of these properties are read-only
 55 |         Object.defineProperties(this, {
 56 |             buffer: {        // ArrayBufferView defines this property
 57 |                 value: buffer,
 58 |                 enumerable:false, writable: false, configurable: false
 59 |             },
 60 |             byteOffset: {   // ArrayBufferView defines this property
 61 |                 value: offset,
 62 |                 enumerable:false, writable: false, configurable: false
 63 |             },
 64 |             byteLength: {   // ArrayBufferView defines this property
 65 |                 value: length,
 66 |                 enumerable:false, writable: false, configurable: false
 67 |             },
 68 |             byteOrder: {    // New public read-only property of this type
 69 |                 value: byteorder,
 70 |                 enumerable:true, writable: false, configurable: false
 71 |             },
 72 |             index: {        // Public getter/setter for the buffer offset
 73 |                 get: function() { return this._index; },
 74 |                 set: function(x) {
 75 |                     if (x < 0) fail("negative index");
 76 |                     if (x > this.byteLength) 
 77 |                         fail("buffer overflow: index too large");
 78 |                     this._index = x;
 79 |                 },
 80 |                 enumerable: true, configurable: false
 81 |             },
 82 |             _index: {       // non-public property holds actual offset value
 83 |                 value: 0,
 84 |                 enumerable: false, writable: true, configurable: true
 85 |             },
 86 |             _bytes: {       // Raw bytes, non-public
 87 |                 value: new Uint8Array(buffer, offset, length),
 88 |                 enumerable:false, writable: false, configurable: false
 89 |             },
 90 |             _view: {       // non-public DataView for getting/setting numbers
 91 |                 value: new DataView(buffer, offset, length),
 92 |                 enumerable:false, writable: false, configurable: false
 93 |             }
 94 |         });
 95 |     }
 96 | 
 97 |     BufferView.prototype = {
 98 |         constructor: BufferView,
 99 |         isLE: function(order) {
100 |             switch(order) {
101 |             case BufferView.LE: return true;
102 |             case BufferView.BE: return false;
103 |             case undefined: return this.byteOrder == BufferView.LE;
104 |             default: fail("Invalid byte order");
105 |             }
106 |         },
107 | 
108 |         // Should I test the offset and raise my own exception if at EOF?
109 |         // Or can I just rely on the DataView exception?
110 |         // Or: should I return some kind of EOF indicator?  Tricky with
111 |         // 0 being a falsy value.  Can't just test with if.
112 |         readByte: function() {
113 |             return this._view.getInt8(this.index++);
114 |         },
115 |         readUnsignedByte: function() {
116 |             return this._view.getUint8(this.index++);
117 |         },
118 |         readShort: function(order) {
119 |             var val = this._view.getInt16(this.index, this.isLE(order));
120 |             this.index += 2;
121 |             return val;
122 |         },
123 |         readUnsignedShort: function(order) {
124 |             var val = this._view.getUint16(this.index, this.isLE(order));
125 |             this.index += 2;
126 |             return val;
127 |         },
128 |         readInt: function(order) {
129 |             var val = this._view.getInt32(this.index, this.isLE(order));
130 |             this.index += 4;
131 |             return val;
132 |         },
133 |         readUnsignedInt: function(order) {
134 |             var val = this._view.getUint32(this.index, this.isLE(order));
135 |             this.index += 4;
136 |             return val;
137 |         },
138 |         readFloat: function(order) {
139 |             var val = this._view.getFloat32(this.index, this.isLE(order));
140 |             this.index += 4;
141 |             return val;
142 |         },
143 |         readDouble: function(order) {
144 |             var val = this._view.getFloat64(this.index, this.isLE(order));
145 |             this.index += 8;
146 |             return val;
147 |         },
148 | 
149 |         // Do I need to do any error checking to ensure that the argument value
150 |         // is in the appropriate range?
151 |         writeByte: function(val) {
152 |             this._view.setInt8(this.index++, val);
153 |             return this;
154 |         },
155 |         writeUnsignedByte: function(val) {
156 |             this._view.setUint8(this.index++, val);
157 |             return this;
158 |         },
159 |         writeShort: function(val,order) {
160 |             var val = this._view.setInt16(this.index, val, this.isLE(order));
161 |             this.index += 2;
162 |             return this;
163 |         },
164 |         writeUnsignedShort: function(val,order) {
165 |             var val = this._view.setUint16(this.index, val, this.isLE(order));
166 |             this.index += 2;
167 |             return this;
168 |         },
169 |         writeInt: function(val,order) {
170 |             var val = this._view.setInt32(this.index, val, this.isLE(order));
171 |             this.index += 4;
172 |             return this;
173 |         },
174 |         writeUnsignedInt: function(val,order) {
175 |             var val = this._view.setUint32(this.index, val, this.isLE(order));
176 |             this.index += 4;
177 |             return this;
178 |         },
179 |         writeFloat: function(val,order) {
180 |             var val = this._view.setFloat32(this.index, val, this.isLE(order));
181 |             this.index += 4;
182 |             return this;
183 |         },
184 |         writeDouble: function(val,order) {
185 |             var val = this._view.setFloat64(this.index, val, this.isLE(order));
186 |             this.index += 8;
187 |             return this;
188 |         },
189 | 
190 |         skip: function(bytes) {
191 |             var newidx = this.index + bytes;
192 |             if (newidx < 0 || newidx > this.byteLength) fail("bad offset");
193 |             this.index = newidx;
194 |         },
195 | 
196 |         // Read n UTF-8 encoded characters and return them as a string.
197 |         // A UTF-16 surrogate pair counts as two characters.
198 |         readUTF8Chars: function(n) {
199 |             var bytes = this._bytes;  // The bytes we're decoding
200 |             var b = this.index;       // Index into bytes[]
201 |             var codepoints = [];      // Holds decoded characters
202 |             var c = 0;                // Index into codepoints[]
203 |             var b1, b2, b3, b4;       // Up to 4 bytes
204 | 
205 |             while(c < n) {
206 |                 b1 = bytes[b];
207 |                 if (b1 < 128) {
208 |                     codepoints[c++] = b1;
209 |                     b++;
210 |                 }
211 |                 else if (b1 < 194) {
212 |                     fail("unexpected continuation byte");
213 |                 }
214 |                 else if (b1 < 224) {
215 |                     // 2-byte sequence
216 |                     if (b+1 >= bytes.length) fail("unexepected end-of-buffer");
217 |                     b2 = bytes[b+1];
218 |                     if (b2 < 128 || b2 > 191) fail("bad continuation byte");
219 |                     codepoints[c++] = ((b1 & 0x1f) << 6) + (b2 & 0x3f);
220 |                     b+=2;
221 |                 }
222 |                 else if (b1 < 240) {
223 |                     // 3-byte sequence
224 |                     if (b+2 >= bytes.length) fail("unexepected end-of-buffer");
225 |                     b2 = bytes[b+1];
226 |                     if (b2 < 128 || b2 > 191) fail("bad continuation byte");
227 |                     b3 = bytes[b+2];
228 |                     if (b3 < 128 || b3 > 191) ffail("bad continuation byte");
229 |                     codepoints[c++] = ((b1 & 0x0f) << 12) +
230 |                         ((b2 & 0x3f) << 6) + (b3 & 0x3f);
231 |                     b+=3;
232 |                 }
233 |                 else if (b1 < 245) {
234 |                     // 4-byte sequence
235 |                     if (b+3 >= bytes.length) fail("unexepected end-of-buffer");
236 |                     b2 = bytes[b+1];
237 |                     if (b2 < 128 || b2 > 191) fail("bad continuation byte");
238 |                     b3 = bytes[b+2];
239 |                     if (b3 < 128 || b3 > 191) fail("bad continuation byte");
240 |                     b4 = bytes[b+3];
241 |                     if (b4 < 128 || b4 > 191) fail("bad continuation byte");
242 |                     var cp = ((b1 & 0x07) << 18) + ((b2 & 0x3f) << 12) +
243 |                         ((b3 & 0x3f) << 6) + (b4 & 0x3f);
244 |                     cp -= 0x10000;
245 | 
246 |                     // If there isn't room for two UTF-16 pairs
247 |                     if (c == n-1) fail("Unexpected surrogate pair");
248 | 
249 |                     // Now turn this code point into two surrogate pairs
250 |                     codepoints[c++] = 0xd800 + ((cp & 0x0FFC00)>>>10);
251 |                     codepoints[c++] = 0xdc00 + (cp & 0x0003FF);
252 | 
253 |                     b+=4;
254 |                 }
255 |                 else {
256 |                     // Illegal byte
257 |                     fail();
258 |                 }
259 |             }
260 | 
261 |             this.index = b;
262 |             return stringFromCodepoints(codepoints);
263 |         },
264 | 
265 |         // Encode the characters of s as UTF-8 and write them.
266 |         // Return the number of bytes written.
267 |         // This method is named "writeUTF8Chars" instead of "writeUTF8String"
268 |         // because it does not record the length of the string or write a
269 |         // terminating byte to mark the end of the string, so some higher-level
270 |         // mechanism of recording the number of characters is necessary.
271 |         writeUTF8Chars: function(s) {
272 |             var bytes = this._bytes;
273 |             var b = this.index;  // byte index in bytes array
274 |             var i=0;             // character index in the string s;
275 |             
276 |             for(i = 0; i < s.length; i++) {
277 |                 var c = s.charCodeAt(i);
278 |                 
279 |                 if (c <= 0x7F) {       // One byte of UTF-8
280 |                     if (b >= bytes.length) fail("ArrayBuffer overflow");
281 |                     bytes[b++] = c;
282 |                 }
283 |                 else if (c <= 0x7FF) { // Two bytes of UTF-8
284 |                     if (b+1 >= bytes.length) fail("ArrayBuffer overflow");
285 |                     bytes[b++] = 0xC0 | ((c & 0x7C0)>>>6);
286 |                     bytes[b++] = 0x80 | (c & 0x3F);
287 |                 }
288 |                 else if (c <= 0xD7FF || (c >= 0xE000 && c <= 0xFFFF)) {
289 |                     // Three bytes of UTF-8.  
290 |                     // Source character is not a UTF-16 surrogate.
291 |                     if (b+2 >= bytes.length) fail("ArrayBuffer overflow");
292 |                     bytes[b++] = 0xE0 | ((c & 0xF000) >>> 12);
293 |                     bytes[b++] = 0x80 | ((c & 0x0FC0) >>> 6);
294 |                     bytes[b++] = 0x80 | (c & 0x3f);
295 |                 }
296 |                 else {
297 |                     if (b+3 >= bytes.length) fail("ArrayBuffer overflow");
298 |                     if (i == s.length-1) fail("Unpaired surrogate");
299 |                     var d = s.charCodeAt(++i);
300 |                     if (c < 0xD800 || c > 0xDBFF || d < 0xDC00 || d > 0xDFFF) {
301 |                         console.log(i-2, c.toString(16), d.toString(16))
302 |                         fail("Unpaired surrogate");
303 |                     }
304 |                     
305 |                     var cp = ((c & 0x03FF) << 10) + (d & 0x03FF) + 0x10000;
306 | 
307 |                     bytes[b++] = 0xF0 | ((cp & 0x1C0000) >>> 18);
308 |                     bytes[b++] = 0x80 | ((cp & 0x03F000) >>> 12);
309 |                     bytes[b++] = 0x80 | ((cp & 0x000FC0) >>> 6);
310 |                     bytes[b++] = 0x80 | (cp & 0x3f);
311 |                 }
312 |             }
313 |             var numbytes = b - this.index;  // How many bytes written
314 |             this.index = b;
315 |             return numbytes;
316 |         },
317 | 
318 |         // Also methods for reading and writing binary strings?
319 |     };
320 | 
321 |     // The following are constants for specifying endianness and can also be
322 |     // used as factory functions or constructors.
323 |     BufferView.LE = function(buffer, offset, length) {
324 |         return new BufferView(buffer, offset, length, BufferView.LE);
325 |     }
326 | 
327 |     BufferView.BE = function(buffer, offset, length) {
328 |         return new BufferView(buffer, offset, length, BufferView.BE);
329 |     }
330 | 
331 |     return BufferView;
332 | 
333 |     function stringFromCodepoints(codepoints) {
334 |         // Not all browsers allow you to call Function.apply() 
335 |         // with arbitrarily long arrays.
336 |         if (codepoints.length < 65536) 
337 |             return String.fromCharCode.apply(String, codepoints);
338 |         else {
339 |             var chunks = [];
340 |             var start = 0, end = 65536;
341 |             while(start < codepoints.length) {
342 |                 var slice = codepoints.slice(start, end);
343 |                 chunks.push(String.fromCharCode.apply(String, slice));
344 |                 start = end;
345 |                 end = end + 65536;
346 |                 if (end > codepoints.length) end = codepoints.length;
347 |             }
348 |             return chunks.join("");
349 |         }
350 |     }
351 | }());
352 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
 1 | BufferView is a wrapper around the DataView class from the Typed Arrays
 2 | specification: https://www.khronos.org/registry/typedarray/specs/1.0/
 3 | 
 4 | DataView has a minimal and awkward API. BufferView improves the API by
 5 | keeping track of the current offset and allowing the specification of a
 6 | default endianness.  BufferView also adds UTF-8 encoding and decoding
 7 | support.
 8 | 
 9 | I've been thinking about ways to read and write multiple values at a
10 | time, and am considering implementing methods like Python's (and Perl's)
11 | pack() and unpack(). 
12 | 
13 | If I get more ambitious, I may try to make BufferView work with binary
14 | strings or with Node's Buffer class.
15 | 
16 | Right now this code is mostly untested.  But see the example in the test
17 | directory: it lists the contents of a zip file.
18 | 
19 | Note that Firefox 4 does not support DataView.  But you can emulate it
20 | with https://github.com/davidflanagan/DataView.js
21 | 


--------------------------------------------------------------------------------
/tests/ListZip.html:
--------------------------------------------------------------------------------
 1 | <script src="../BufferView.js"></script>
 2 | <script src="ListZipContents.js"></script>
 3 | <script>
 4 | function list(f) {
 5 |     var reader = new FileReader();
 6 |     reader.readAsArrayBuffer(f);
 7 |     reader.onload = function() {
 8 |         var buffer = reader.result;
 9 |         var contents = listZipContents(buffer);
10 |         console.log(contents);
11 |     }
12 |     reader.onerror = function(e) {
13 |         console.log("File reader error", e);
14 |     }
15 | }
16 | </script>
17 | <body>
18 | Select a zip file: <input type="file" onchange="list(this.files[0])">
19 | </body>
20 | 


--------------------------------------------------------------------------------
/tests/ListZipContents.js:
--------------------------------------------------------------------------------
 1 | // Given an ArrayBuffer that holds a zip archive,
 2 | // return an array filenames
 3 | function listZipContents(buffer) {
 4 |     var bv = new BufferView(buffer, BufferView.LE);
 5 |     
 6 |     // Verify that this is actually a 
 7 |     var magic = bv.readUnsignedShort();
 8 |     if (magic !== 0x4b50) throw new Error("Not a zip file");
 9 | 
10 |     // Skip to the end of the file and search backwards for the
11 |     // end-of-central-directory record
12 |     bv.index = bv.byteLength - 22;
13 |     while((magic = bv.readUnsignedInt()) !== 0x06054b50) bv.index -= 5;
14 | 
15 |     // Skip preliminary bytes in the e-o-c-d record
16 |     bv.skip(12);
17 |     
18 |     // Read the offset of the start of the central directory, and go there.
19 |     bv.index = bv.readUnsignedInt();
20 |     
21 |     var filenames = [];  // where we'll store our results
22 | 
23 |     // While the central directory contains another file entry
24 |     while(bv.readUnsignedInt() === 0x02014b50) {
25 |         bv.skip(24);
26 |         var namelen = bv.readUnsignedShort();
27 |         var extralen = bv.readUnsignedShort();
28 |         var commentlen = bv.readUnsignedShort();
29 |         bv.skip(12);
30 |         // We're assuming all names are ASCII and the length in bytes
31 |         // equals the length in characters.  Using readUTF8Chars instead
32 |         // of a simpler but non-existent readASCIIChars method.
33 |         filenames.push(bv.readUTF8Chars(namelen));
34 |         bv.skip(extralen + commentlen);
35 |     }
36 | 
37 |     return filenames;
38 | }
39 | 


--------------------------------------------------------------------------------