├── BufferView.js ├── README └── tests ├── ListZip.html └── ListZipContents.js /BufferView.js: -------------------------------------------------------------------------------- 1 | /* 2 | * BufferView.js 3 | * Wrap an ArrayBuffer (using a DataView internally) and allow 4 | * reading and writing of values from it. Differs from (and improves upon) 5 | * the DataView API in these ways: 6 | * 7 | * - allows a default endianness to be specified 8 | * - keeps track of current buffer position, so you can read 9 | * and write sequential values without tracking it yourself 10 | * - has methods for reading and writing UTF-8 strings 11 | */ 12 | "use strict"; 13 | 14 | var BufferView = (function() { 15 | 16 | function fail(msg) { throw new Error(msg); } 17 | 18 | /* 19 | * This constructor is like the DataView constructor, but requires you to 20 | * specify a default byte order. First arg is always the buffer. Last arg 21 | * is the order. Optional arguments between can specify buffer offset 22 | * and length. Invoke in one of these 3 ways: 23 | * 24 | * new BufferView(buffer, order) 25 | * new BufferView(buffer, offset, order) 26 | * new BufferView(buffer, offset, length, order) 27 | */ 28 | function BufferView(buffer, offset, length, byteorder) { 29 | if (arguments.length < 2 || arguments.length > 4) 30 | fail("Wrong number of argments"); 31 | if (arguments.length === 2) { 32 | byteorder = offset; 33 | offset = 0; 34 | length = buffer.byteLength; 35 | } 36 | else if (arguments.length === 3) { 37 | byteorder = length; 38 | length = buffer.byteLength - offset; 39 | } 40 | 41 | // XXX Should I support binary strings as well as Array buffers? 42 | // jDataView does that 43 | if (!(buffer instanceof ArrayBuffer)) 44 | fail("Bad ArrayBuffer"); 45 | 46 | // XXX Should negative offsets be measured from the end of the buffer? 47 | if (offset < 0 || offset > buffer.byteLength) 48 | fail("Illegal offset"); 49 | if (length < 0 || offset+length > buffer.byteLength) 50 | fail("Illegal length"); 51 | if (byteorder !== BufferView.LE && byteorder !== BufferView.BE) 52 | fail("Bad byte order"); 53 | 54 | // Note that most of these properties are read-only 55 | Object.defineProperties(this, { 56 | buffer: { // ArrayBufferView defines this property 57 | value: buffer, 58 | enumerable:false, writable: false, configurable: false 59 | }, 60 | byteOffset: { // ArrayBufferView defines this property 61 | value: offset, 62 | enumerable:false, writable: false, configurable: false 63 | }, 64 | byteLength: { // ArrayBufferView defines this property 65 | value: length, 66 | enumerable:false, writable: false, configurable: false 67 | }, 68 | byteOrder: { // New public read-only property of this type 69 | value: byteorder, 70 | enumerable:true, writable: false, configurable: false 71 | }, 72 | index: { // Public getter/setter for the buffer offset 73 | get: function() { return this._index; }, 74 | set: function(x) { 75 | if (x < 0) fail("negative index"); 76 | if (x > this.byteLength) 77 | fail("buffer overflow: index too large"); 78 | this._index = x; 79 | }, 80 | enumerable: true, configurable: false 81 | }, 82 | _index: { // non-public property holds actual offset value 83 | value: 0, 84 | enumerable: false, writable: true, configurable: true 85 | }, 86 | _bytes: { // Raw bytes, non-public 87 | value: new Uint8Array(buffer, offset, length), 88 | enumerable:false, writable: false, configurable: false 89 | }, 90 | _view: { // non-public DataView for getting/setting numbers 91 | value: new DataView(buffer, offset, length), 92 | enumerable:false, writable: false, configurable: false 93 | } 94 | }); 95 | } 96 | 97 | BufferView.prototype = { 98 | constructor: BufferView, 99 | isLE: function(order) { 100 | switch(order) { 101 | case BufferView.LE: return true; 102 | case BufferView.BE: return false; 103 | case undefined: return this.byteOrder == BufferView.LE; 104 | default: fail("Invalid byte order"); 105 | } 106 | }, 107 | 108 | // Should I test the offset and raise my own exception if at EOF? 109 | // Or can I just rely on the DataView exception? 110 | // Or: should I return some kind of EOF indicator? Tricky with 111 | // 0 being a falsy value. Can't just test with if. 112 | readByte: function() { 113 | return this._view.getInt8(this.index++); 114 | }, 115 | readUnsignedByte: function() { 116 | return this._view.getUint8(this.index++); 117 | }, 118 | readShort: function(order) { 119 | var val = this._view.getInt16(this.index, this.isLE(order)); 120 | this.index += 2; 121 | return val; 122 | }, 123 | readUnsignedShort: function(order) { 124 | var val = this._view.getUint16(this.index, this.isLE(order)); 125 | this.index += 2; 126 | return val; 127 | }, 128 | readInt: function(order) { 129 | var val = this._view.getInt32(this.index, this.isLE(order)); 130 | this.index += 4; 131 | return val; 132 | }, 133 | readUnsignedInt: function(order) { 134 | var val = this._view.getUint32(this.index, this.isLE(order)); 135 | this.index += 4; 136 | return val; 137 | }, 138 | readFloat: function(order) { 139 | var val = this._view.getFloat32(this.index, this.isLE(order)); 140 | this.index += 4; 141 | return val; 142 | }, 143 | readDouble: function(order) { 144 | var val = this._view.getFloat64(this.index, this.isLE(order)); 145 | this.index += 8; 146 | return val; 147 | }, 148 | 149 | // Do I need to do any error checking to ensure that the argument value 150 | // is in the appropriate range? 151 | writeByte: function(val) { 152 | this._view.setInt8(this.index++, val); 153 | return this; 154 | }, 155 | writeUnsignedByte: function(val) { 156 | this._view.setUint8(this.index++, val); 157 | return this; 158 | }, 159 | writeShort: function(val,order) { 160 | var val = this._view.setInt16(this.index, val, this.isLE(order)); 161 | this.index += 2; 162 | return this; 163 | }, 164 | writeUnsignedShort: function(val,order) { 165 | var val = this._view.setUint16(this.index, val, this.isLE(order)); 166 | this.index += 2; 167 | return this; 168 | }, 169 | writeInt: function(val,order) { 170 | var val = this._view.setInt32(this.index, val, this.isLE(order)); 171 | this.index += 4; 172 | return this; 173 | }, 174 | writeUnsignedInt: function(val,order) { 175 | var val = this._view.setUint32(this.index, val, this.isLE(order)); 176 | this.index += 4; 177 | return this; 178 | }, 179 | writeFloat: function(val,order) { 180 | var val = this._view.setFloat32(this.index, val, this.isLE(order)); 181 | this.index += 4; 182 | return this; 183 | }, 184 | writeDouble: function(val,order) { 185 | var val = this._view.setFloat64(this.index, val, this.isLE(order)); 186 | this.index += 8; 187 | return this; 188 | }, 189 | 190 | skip: function(bytes) { 191 | var newidx = this.index + bytes; 192 | if (newidx < 0 || newidx > this.byteLength) fail("bad offset"); 193 | this.index = newidx; 194 | }, 195 | 196 | // Read n UTF-8 encoded characters and return them as a string. 197 | // A UTF-16 surrogate pair counts as two characters. 198 | readUTF8Chars: function(n) { 199 | var bytes = this._bytes; // The bytes we're decoding 200 | var b = this.index; // Index into bytes[] 201 | var codepoints = []; // Holds decoded characters 202 | var c = 0; // Index into codepoints[] 203 | var b1, b2, b3, b4; // Up to 4 bytes 204 | 205 | while(c < n) { 206 | b1 = bytes[b]; 207 | if (b1 < 128) { 208 | codepoints[c++] = b1; 209 | b++; 210 | } 211 | else if (b1 < 194) { 212 | fail("unexpected continuation byte"); 213 | } 214 | else if (b1 < 224) { 215 | // 2-byte sequence 216 | if (b+1 >= bytes.length) fail("unexepected end-of-buffer"); 217 | b2 = bytes[b+1]; 218 | if (b2 < 128 || b2 > 191) fail("bad continuation byte"); 219 | codepoints[c++] = ((b1 & 0x1f) << 6) + (b2 & 0x3f); 220 | b+=2; 221 | } 222 | else if (b1 < 240) { 223 | // 3-byte sequence 224 | if (b+2 >= bytes.length) fail("unexepected end-of-buffer"); 225 | b2 = bytes[b+1]; 226 | if (b2 < 128 || b2 > 191) fail("bad continuation byte"); 227 | b3 = bytes[b+2]; 228 | if (b3 < 128 || b3 > 191) ffail("bad continuation byte"); 229 | codepoints[c++] = ((b1 & 0x0f) << 12) + 230 | ((b2 & 0x3f) << 6) + (b3 & 0x3f); 231 | b+=3; 232 | } 233 | else if (b1 < 245) { 234 | // 4-byte sequence 235 | if (b+3 >= bytes.length) fail("unexepected end-of-buffer"); 236 | b2 = bytes[b+1]; 237 | if (b2 < 128 || b2 > 191) fail("bad continuation byte"); 238 | b3 = bytes[b+2]; 239 | if (b3 < 128 || b3 > 191) fail("bad continuation byte"); 240 | b4 = bytes[b+3]; 241 | if (b4 < 128 || b4 > 191) fail("bad continuation byte"); 242 | var cp = ((b1 & 0x07) << 18) + ((b2 & 0x3f) << 12) + 243 | ((b3 & 0x3f) << 6) + (b4 & 0x3f); 244 | cp -= 0x10000; 245 | 246 | // If there isn't room for two UTF-16 pairs 247 | if (c == n-1) fail("Unexpected surrogate pair"); 248 | 249 | // Now turn this code point into two surrogate pairs 250 | codepoints[c++] = 0xd800 + ((cp & 0x0FFC00)>>>10); 251 | codepoints[c++] = 0xdc00 + (cp & 0x0003FF); 252 | 253 | b+=4; 254 | } 255 | else { 256 | // Illegal byte 257 | fail(); 258 | } 259 | } 260 | 261 | this.index = b; 262 | return stringFromCodepoints(codepoints); 263 | }, 264 | 265 | // Encode the characters of s as UTF-8 and write them. 266 | // Return the number of bytes written. 267 | // This method is named "writeUTF8Chars" instead of "writeUTF8String" 268 | // because it does not record the length of the string or write a 269 | // terminating byte to mark the end of the string, so some higher-level 270 | // mechanism of recording the number of characters is necessary. 271 | writeUTF8Chars: function(s) { 272 | var bytes = this._bytes; 273 | var b = this.index; // byte index in bytes array 274 | var i=0; // character index in the string s; 275 | 276 | for(i = 0; i < s.length; i++) { 277 | var c = s.charCodeAt(i); 278 | 279 | if (c <= 0x7F) { // One byte of UTF-8 280 | if (b >= bytes.length) fail("ArrayBuffer overflow"); 281 | bytes[b++] = c; 282 | } 283 | else if (c <= 0x7FF) { // Two bytes of UTF-8 284 | if (b+1 >= bytes.length) fail("ArrayBuffer overflow"); 285 | bytes[b++] = 0xC0 | ((c & 0x7C0)>>>6); 286 | bytes[b++] = 0x80 | (c & 0x3F); 287 | } 288 | else if (c <= 0xD7FF || (c >= 0xE000 && c <= 0xFFFF)) { 289 | // Three bytes of UTF-8. 290 | // Source character is not a UTF-16 surrogate. 291 | if (b+2 >= bytes.length) fail("ArrayBuffer overflow"); 292 | bytes[b++] = 0xE0 | ((c & 0xF000) >>> 12); 293 | bytes[b++] = 0x80 | ((c & 0x0FC0) >>> 6); 294 | bytes[b++] = 0x80 | (c & 0x3f); 295 | } 296 | else { 297 | if (b+3 >= bytes.length) fail("ArrayBuffer overflow"); 298 | if (i == s.length-1) fail("Unpaired surrogate"); 299 | var d = s.charCodeAt(++i); 300 | if (c < 0xD800 || c > 0xDBFF || d < 0xDC00 || d > 0xDFFF) { 301 | console.log(i-2, c.toString(16), d.toString(16)) 302 | fail("Unpaired surrogate"); 303 | } 304 | 305 | var cp = ((c & 0x03FF) << 10) + (d & 0x03FF) + 0x10000; 306 | 307 | bytes[b++] = 0xF0 | ((cp & 0x1C0000) >>> 18); 308 | bytes[b++] = 0x80 | ((cp & 0x03F000) >>> 12); 309 | bytes[b++] = 0x80 | ((cp & 0x000FC0) >>> 6); 310 | bytes[b++] = 0x80 | (cp & 0x3f); 311 | } 312 | } 313 | var numbytes = b - this.index; // How many bytes written 314 | this.index = b; 315 | return numbytes; 316 | }, 317 | 318 | // Also methods for reading and writing binary strings? 319 | }; 320 | 321 | // The following are constants for specifying endianness and can also be 322 | // used as factory functions or constructors. 323 | BufferView.LE = function(buffer, offset, length) { 324 | return new BufferView(buffer, offset, length, BufferView.LE); 325 | } 326 | 327 | BufferView.BE = function(buffer, offset, length) { 328 | return new BufferView(buffer, offset, length, BufferView.BE); 329 | } 330 | 331 | return BufferView; 332 | 333 | function stringFromCodepoints(codepoints) { 334 | // Not all browsers allow you to call Function.apply() 335 | // with arbitrarily long arrays. 336 | if (codepoints.length < 65536) 337 | return String.fromCharCode.apply(String, codepoints); 338 | else { 339 | var chunks = []; 340 | var start = 0, end = 65536; 341 | while(start < codepoints.length) { 342 | var slice = codepoints.slice(start, end); 343 | chunks.push(String.fromCharCode.apply(String, slice)); 344 | start = end; 345 | end = end + 65536; 346 | if (end > codepoints.length) end = codepoints.length; 347 | } 348 | return chunks.join(""); 349 | } 350 | } 351 | }()); 352 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | BufferView is a wrapper around the DataView class from the Typed Arrays 2 | specification: https://www.khronos.org/registry/typedarray/specs/1.0/ 3 | 4 | DataView has a minimal and awkward API. BufferView improves the API by 5 | keeping track of the current offset and allowing the specification of a 6 | default endianness. BufferView also adds UTF-8 encoding and decoding 7 | support. 8 | 9 | I've been thinking about ways to read and write multiple values at a 10 | time, and am considering implementing methods like Python's (and Perl's) 11 | pack() and unpack(). 12 | 13 | If I get more ambitious, I may try to make BufferView work with binary 14 | strings or with Node's Buffer class. 15 | 16 | Right now this code is mostly untested. But see the example in the test 17 | directory: it lists the contents of a zip file. 18 | 19 | Note that Firefox 4 does not support DataView. But you can emulate it 20 | with https://github.com/davidflanagan/DataView.js 21 | -------------------------------------------------------------------------------- /tests/ListZip.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 17 |
18 | Select a zip file: 19 | 20 | -------------------------------------------------------------------------------- /tests/ListZipContents.js: -------------------------------------------------------------------------------- 1 | // Given an ArrayBuffer that holds a zip archive, 2 | // return an array filenames 3 | function listZipContents(buffer) { 4 | var bv = new BufferView(buffer, BufferView.LE); 5 | 6 | // Verify that this is actually a 7 | var magic = bv.readUnsignedShort(); 8 | if (magic !== 0x4b50) throw new Error("Not a zip file"); 9 | 10 | // Skip to the end of the file and search backwards for the 11 | // end-of-central-directory record 12 | bv.index = bv.byteLength - 22; 13 | while((magic = bv.readUnsignedInt()) !== 0x06054b50) bv.index -= 5; 14 | 15 | // Skip preliminary bytes in the e-o-c-d record 16 | bv.skip(12); 17 | 18 | // Read the offset of the start of the central directory, and go there. 19 | bv.index = bv.readUnsignedInt(); 20 | 21 | var filenames = []; // where we'll store our results 22 | 23 | // While the central directory contains another file entry 24 | while(bv.readUnsignedInt() === 0x02014b50) { 25 | bv.skip(24); 26 | var namelen = bv.readUnsignedShort(); 27 | var extralen = bv.readUnsignedShort(); 28 | var commentlen = bv.readUnsignedShort(); 29 | bv.skip(12); 30 | // We're assuming all names are ASCII and the length in bytes 31 | // equals the length in characters. Using readUTF8Chars instead 32 | // of a simpler but non-existent readASCIIChars method. 33 | filenames.push(bv.readUTF8Chars(namelen)); 34 | bv.skip(extralen + commentlen); 35 | } 36 | 37 | return filenames; 38 | } 39 | --------------------------------------------------------------------------------