├── .gitignore ├── .travis.yml ├── CHANGELOG.md ├── Makefile ├── README.md ├── example_reader.js ├── example_writer.js ├── index.js ├── lib ├── datafile.js ├── errors.js ├── io.js └── schema.js ├── package.json ├── run_tests.js ├── test.avro ├── test.js ├── test ├── data │ ├── dependents │ │ ├── child.avsc │ │ ├── parent.avsc │ │ └── parent.json │ ├── double.json │ ├── double.schema │ ├── enum.json │ ├── enum.schema │ ├── float.json │ ├── float.schema │ ├── log.schema │ ├── nested.json │ ├── nested.schema │ ├── string.json │ ├── string.schema │ ├── string_test.json │ └── string_test.schema ├── datafile.test.js ├── io.test.js ├── mocha.opts ├── schema.js └── schema.test.js └── tools └── avro-tools-1.7.7.jar /.gitignore: -------------------------------------------------------------------------------- 1 | cache/ 2 | node_modules/ 3 | .DS_Store 4 | test/data/*.avro* 5 | npm-debug.log 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - "4" 4 | - "6" 5 | - "8" 6 | - "10" 7 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## v2.0.0 4 | 5 | - Adopt snappy 6.x 6 | - Update int64-native to 0.5 7 | - Drop support for node 0.10 & 0.12 8 | 9 | ## v1.0.0 10 | 11 | - Adopt snappy 5.x 12 | - Support parsing schemas that import and depend on other schemas 13 | - Drop support for iojs & node 0.8 14 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PATH := ${PATH}:/usr/local/bin:./node_modules/.bin/ 2 | AVRO_TOOLS_JAR = avro-tools-1.7.7.jar 3 | AVRO_TOOLS = tools/$(AVRO_TOOLS_JAR) 4 | TEST_DATA = test/data 5 | REPORTER = spec 6 | AVRO_SRCS := $(wildcard $(TEST_DATA)/*.json) 7 | AVRO_FILES := $(AVRO_SRCS:.json=.avro) 8 | 9 | $(AVRO_TOOLS): 10 | -@[ -d "tools" ] || mkdir tools 11 | -@[ -e $(AVRO_TOOLS) ] || (cd tools && curl -sO http://www.mirrorservice.org/sites/ftp.apache.org/avro/avro-1.7.7/java/$(AVRO_TOOLS_JAR)) 12 | 13 | test: $(AVRO_FILES) 14 | @NODE_ENV=test ./node_modules/.bin/mocha -R $(REPORTER) 15 | 16 | lib-cov: 17 | @jscoverage --no-highlight lib lib-cov 18 | 19 | coverage: lib-cov 20 | -@MOCHA_COV=1 $(MAKE) test REPORTER=html-cov > coverage.html 21 | -@open coverage.html 22 | 23 | $(TEST_DATA)/%.avro: $(AVRO_TOOLS) $(TEST_DATA)/%.json $(TEST_DATA)/%.schema 24 | java -jar $(AVRO_TOOLS) fromjson --schema-file $(word 3,$^) $(word 2,$^) > $@ 25 | java -jar $(AVRO_TOOLS) random --codec deflate --count 4096 --schema-file $(TEST_DATA)/log.schema $(TEST_DATA)/log.deflate.avro 26 | java -jar $(AVRO_TOOLS) random --codec snappy --count 4096 --schema-file $(TEST_DATA)/log.schema $(TEST_DATA)/log.snappy.avro 27 | 28 | debug: 29 | @NODE_ENV=test ./node_modules/.bin/mocha debug -R $(REPORTER) 30 | 31 | clean: 32 | -@[ -f coverage.html ] && rm coverage.html || exit 0 33 | -@[ -d lib-cov ] && rm -rf lib-cov || exit 0 34 | -@rm $(TEST_DATA)/*.avro || exit 0 35 | -@rm $(TEST_DATA)/.*avro.crc || exit 0 36 | -@rm -rf tools || exit 0 37 | 38 | all: test coverage 39 | 40 | .PHONY: test lib-cov coverage avro debug clean all 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Node Avro IO 2 | ============ 3 | 4 | [![Build Status](https://secure.travis-ci.org/jamesbrucepower/node-avro-io.png)](http://travis-ci.org/jamesbrucepower/node-avro-io) 5 | 6 | Implements the [avro spec](http://avro.apache.org/docs/current/spec.html) 7 | 8 | This status of this repository is *initial release* 9 | 10 | ```bash 11 | npm install node-avro-io 12 | ``` 13 | 14 | or 15 | 16 | ```bash 17 | npm install git://github.com/jamesbrucepower/node-avro-io.git 18 | ``` 19 | 20 | Serializing data to an avro binary file 21 | ``` 22 | var avro = require('./index').DataFile.AvroFile(); 23 | var schema = { 24 | "name": "data", 25 | "type": "record", 26 | "fields": [ 27 | {"name":"key","type": "string"}, 28 | {"name":"value","type": "string"}, 29 | {"name":"flag","type": "boolean"}, 30 | {"name":"subrecord","type":"record","fields":[ 31 | {"name":"key","type":"string"}, 32 | {"name":"value","type":["string","int","null"]} 33 | ]} 34 | ] 35 | }; 36 | var writer = avro.open("test.avro", schema, { flags: 'w', codec: 'deflate' }); 37 | writer 38 | .append({ key:"john", value:"hive", flag: true, subrecord: { key: "preference", value: 2}}) 39 | .append({ key:"eric", value:"lola", flag: true, subrecord: { key: "postcode", value: null}}) 40 | .end({ key:"fred", value:"wonka", flag: false, subrecord: { key: "city", value: "michigan"}}); 41 | ``` 42 | 43 | Deserializing data to from avro binary file 44 | ``` 45 | require("./index").DataFile.AvroFile() 46 | .open('test.avro', null, { flags: 'r' }) 47 | .on('data', function(data) { 48 | console.log(data); 49 | }); 50 | ``` 51 | 52 | See tests for more usage examples 53 | 54 | TODO: 55 | 56 | - Avro RPC 57 | - Support for Trevni (column major data serialization) 58 | -------------------------------------------------------------------------------- /example_reader.js: -------------------------------------------------------------------------------- 1 | require("./index").DataFile.AvroFile() 2 | .open('test.avro', null, { flags: 'r' }) 3 | .on('data', function(data) { 4 | console.log(data); 5 | }); 6 | -------------------------------------------------------------------------------- /example_writer.js: -------------------------------------------------------------------------------- 1 | var avro = require('./index').DataFile.AvroFile(); 2 | var schema = { 3 | "name": "data", 4 | "type": "record", 5 | "fields": [ 6 | {"name":"key","type": "string"}, 7 | {"name":"value","type": "string"}, 8 | {"name":"flag","type": "boolean"}, 9 | {"name":"subrecord","type":"record","fields":[ 10 | {"name":"key","type":"string"}, 11 | {"name":"value","type":["string","int","null"]} 12 | ]} 13 | ] 14 | }; 15 | var writer = avro.open("test.avro", schema, { flags: 'w', codec: 'deflate' }); 16 | writer 17 | .append({ key:"john", value:"hive", flag: true, subrecord: { key: "preference", value: 2}}) 18 | .append({ key:"eric", value:"lola", flag: true, subrecord: { key: "postcode", value: null}}) 19 | .end({ key:"fred", value:"wonka", flag: false, subrecord: { key: "city", value: "michigan"}}); 20 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | var avro = exports; 2 | 3 | avro.DataFile = require("./lib/datafile"); 4 | avro.IO = require("./lib/io"); 5 | avro.Schema = require("./lib/schema"); 6 | -------------------------------------------------------------------------------- /lib/datafile.js: -------------------------------------------------------------------------------- 1 | var libpath = process.env['MOCHA_COV'] ? __dirname + '/../lib-cov/' : __dirname + '/'; 2 | 3 | var fs = require('fs'); 4 | var util = require('util'); 5 | var zlib = require('zlib'); 6 | var snappy = require('snappy'); 7 | var crc32 = require('buffer-crc32'); 8 | var _ = require('lodash'); 9 | var Stream = require('stream').Stream; 10 | var Transform = require('stream').Transform; 11 | var IO = require(libpath + 'io'); 12 | var Avro = require(libpath + 'schema'); 13 | var AvroErrors = require(libpath + 'errors'); 14 | 15 | // Constants 16 | var VERSION = 1; 17 | var SYNC_SIZE = 16; 18 | var DEFAULT_BUFFER_SIZE = 8192; 19 | var VALID_CODECS = ["null", "deflate", "snappy"]; 20 | 21 | function magic() { 22 | return "Obj" + String.fromCharCode(VERSION); 23 | }; 24 | 25 | function metaSchema() { 26 | return Avro.Schema({ 27 | "type": "record", 28 | "name": "org.apache.avro.file.Header", 29 | "fields" : [ 30 | { 31 | "name": "magic", 32 | "type": { 33 | "type": "fixed", 34 | "name": "magic", 35 | "size": magic().length 36 | } 37 | }, 38 | { 39 | "name": "meta", 40 | "type": { 41 | "type": "map", 42 | "values": "string" 43 | } 44 | }, 45 | { 46 | "name": "sync", 47 | "type": { 48 | "type": "fixed", 49 | "name": "sync", 50 | "size": SYNC_SIZE 51 | } 52 | } 53 | ] 54 | }); 55 | }; 56 | 57 | function blockSchema() { 58 | return Avro.Schema({ 59 | "type": "record", "name": "org.apache.avro.block", 60 | "fields" : [ 61 | {"name": "objectCount", "type": "long" }, 62 | {"name": "objects", "type": "bytes" }, 63 | {"name": "sync", "type": {"type": "fixed", "name": "sync", "size": SYNC_SIZE}} 64 | ] 65 | }); 66 | }; 67 | 68 | // AvroFile Class 69 | var AvroFile = function() { 70 | 71 | if ((this instanceof arguments.callee) === false) 72 | return new arguments.callee(); 73 | 74 | var _operation; 75 | 76 | // Public methods 77 | this.open = function(path, schema, options) { 78 | var _options = _.extend({ 79 | codec: "null", 80 | flags: 'r', 81 | encoding: null, 82 | mode: 0666, 83 | bufferSize: 64 * 1024 84 | }, options); 85 | 86 | switch (_options.flags) { 87 | case "r": 88 | _operation = new Reader(schema); 89 | var fileStream = fs.createReadStream(path, _options); 90 | fileStream.pipe(_operation); 91 | break; 92 | case "w": 93 | var fileStream = fs.createWriteStream(path, _options); 94 | _operation = new Writer(schema, _options.codec); 95 | _operation.pipe(fileStream); 96 | break; 97 | default: 98 | throw new AvroErrors.FileError("Unsupported operation %s on file", _options.flags); 99 | } 100 | return _operation; 101 | }; 102 | 103 | } 104 | 105 | function Block(size) { 106 | 107 | if ((this instanceof arguments.callee) === false) 108 | return new arguments.callee(size); 109 | 110 | size = size || 0; 111 | this._writeOffset = 0; 112 | this._readOffset = 0; 113 | this._buffer = new Buffer(size); 114 | this.reUseBuffer = true; 115 | } 116 | 117 | Block.prototype.__defineGetter__('length', function () { 118 | return this._writeOffset; 119 | }); 120 | 121 | Block.prototype.__defineGetter__('offset', function () { 122 | return this._readOffset; 123 | }); 124 | 125 | Block.prototype.__defineGetter__('remainingBytes', function() { 126 | return this._writeOffset - this._readOffset; 127 | }); 128 | 129 | _.extend(Block.prototype, { 130 | 131 | flush: function() { 132 | this._writeOffset = this._readOffset = 0; 133 | }, 134 | 135 | rewind: function() { 136 | this._readOffset = 0; 137 | }, 138 | 139 | _bufferSize: function(size) { 140 | if (!this._buffer.length) { 141 | return size; 142 | } else if (this._buffer.length < DEFAULT_BUFFER_SIZE) { 143 | var doubleSize = this._buffer.length * 2; 144 | return (doubleSize - this._buffer.length) > size ? doubleSize : this._buffer.length + size; 145 | } else { 146 | return this._writeOffset + size; 147 | } 148 | }, 149 | 150 | _canReUseBuffer: function(size) { 151 | return this.reUseBuffer && this._readOffset >= size; 152 | }, 153 | 154 | _resizeIfRequired: function(size) { 155 | if (this._canReUseBuffer(size)) { 156 | if (this._readOffset != this._writeOffset) 157 | this._buffer.copy(this._buffer, 0, this._readOffset, this._writeOffset); 158 | this._writeOffset = this.remainingBytes; 159 | this._readOffset = 0; 160 | } else if (this._writeOffset + size > this._buffer.length) { 161 | var oldBuffer = this._buffer; 162 | this._buffer = new Buffer(this._bufferSize(size)); 163 | oldBuffer.copy(this._buffer, 0); 164 | oldBuffer = null; 165 | } 166 | }, 167 | 168 | skip: function(size) { 169 | if ((this._readOffset + size) >= 0 && size <= this.remainingBytes) 170 | this._readOffset += size; 171 | else 172 | throw new AvroErrors.BlockError('tried to skip(%d) outsite of the block(%d) at %d', size, this.remainingBytes, this._readOffset); 173 | }, 174 | 175 | read: function(size) { 176 | var self = this; 177 | if (size > this.remainingBytes) { 178 | return new AvroErrors.BlockDelayReadError('tried to read %d bytes past the amount written to the block with remaining bytes %d at read offset %d', 179 | size, this.remainingBytes, this._readOffset); 180 | } else if (this._readOffset + size > this._buffer.length) { 181 | throw new AvroErrors.BlockError('tried to read %d bytes outside of the buffer(%d) at read offset %d', 182 | size, this._buffer.length, this._readOffset); 183 | } else if (size === 1) { 184 | return this._buffer[this._readOffset++]; 185 | } else if (size < 0) { 186 | throw new AvroErrors.BlockError("Tried to read a negative amount of %d bytes", size); 187 | } else { 188 | this._readOffset += size; 189 | return this._buffer.slice(this._readOffset - size, this._readOffset); 190 | } 191 | }, 192 | 193 | // write() supports an array of numbers of a Buffer 194 | write: function(value) { 195 | var len = (Buffer.isBuffer(value) || _.isArray(value)) ? value.length : 1; 196 | this._resizeIfRequired(len); 197 | 198 | if (Buffer.isBuffer(value)) { 199 | value.copy(this._buffer, this._writeOffset); 200 | this._writeOffset += value.length; 201 | } else if (_.isArray(value)) { 202 | var item; 203 | // TODO: items in array could be an object 204 | while (item = value.shift()) { 205 | this._buffer[this._writeOffset++] = item; 206 | } 207 | } else { 208 | this._buffer[this._writeOffset++] = value; 209 | } 210 | }, 211 | 212 | isEqual: function(value) { 213 | if (Buffer.isBuffer(value) || _.isArray(value)) { 214 | for (var i = 0; i < value.length; i++) { 215 | if (this._buffer[i] !== value[i]) 216 | return false; 217 | } 218 | } else { 219 | throw new AvroErrors.BlockError("must supply an array or buffer"); 220 | } 221 | return true; 222 | }, 223 | 224 | slice: function(start, end) { 225 | start = start || 0; 226 | end = end || this._writeOffset; 227 | return this._buffer.slice(start, end); 228 | }, 229 | 230 | // Convert a Block to a Buffer 231 | toBuffer: function() { 232 | return this.slice(); 233 | }, 234 | 235 | // Show a string representation of the Block 236 | toString: function() { 237 | return "Block: " + util.inspect(this.slice()); 238 | }, 239 | 240 | inpect: function() { 241 | return this.toString(); 242 | } 243 | }); 244 | 245 | // Reader Class 246 | function Reader(options) { 247 | 248 | if ((this instanceof arguments.callee) === false) 249 | return new arguments.callee(options); 250 | 251 | options = options || {} 252 | options.objectMode = true; 253 | Transform.call(this, options); 254 | 255 | this._fileBlock = new Block(); 256 | this._datumBlock = new Block(); 257 | this._inBody = false; 258 | this.header = null; 259 | this.decoder = options.decoder || IO.BinaryDecoder(this._fileBlock); 260 | this.datumReader = IO.DatumReader(null, options.schema); 261 | } 262 | 263 | util.inherits(Reader, Transform); 264 | 265 | _.extend(Reader.prototype, { 266 | 267 | _snappyDecompress: function(rawData, callback) { 268 | var compressedData = rawData.slice(0, rawData.length - 4); 269 | var checksum = rawData.slice(rawData.length - 4, rawData.length); 270 | snappy.uncompress(compressedData, function(err, data) { 271 | if (err) return callback(err); 272 | var calculatedChecksum = crc32(data); 273 | if (calculatedChecksum.readUInt32BE(0) !== checksum.readUInt32BE(0)) 274 | callback(new AvroErrors.FileError("Failed checksum from decompressed snappy data block %d !== %d", 275 | calculatedChecksum.readUInt32BE(0), checksum.readUInt32BE(0))); 276 | else 277 | callback(null, data); 278 | }); 279 | }, 280 | 281 | decompressData: function(data, codec, callback) { 282 | switch(codec) { 283 | case "null": callback(null, data); break; 284 | case "deflate": zlib.inflateRaw(data, callback); break; 285 | case "snappy": this._snappyDecompress(data, callback); break; 286 | default: 287 | callback(new AvroErrors.FileError("Unsupported codec %s", codec)); 288 | break; 289 | } 290 | }, 291 | 292 | _readHeader: function() { 293 | var self = this; 294 | var header = this.datumReader.readData(metaSchema(), null, this.decoder); 295 | if (header instanceof Error) { 296 | if (header instanceof AvroErrors.BlockDelayReadError) 297 | return header; 298 | else 299 | this.emit('error', header); 300 | } else if (header.magic.toString() !== magic()) { 301 | this.emit('error', new AvroErrors.FileError("Not an avro file, header was %j", header)); 302 | } 303 | try { 304 | var schema = JSON.parse(header.meta['avro.schema'].toString()); 305 | } catch(e) { 306 | schema = header.meta['avro.schema'].toString(); 307 | } 308 | this.writersSchema = Avro.Schema(schema); 309 | this.datumReader.writersSchema = this.writersSchema; 310 | this.header = header; 311 | this.header.meta['avro.schema'] = schema; 312 | this.emit('header', header); 313 | 314 | return this._fileBlock.offset; 315 | }, 316 | 317 | _readBlock: function(cb) { 318 | 319 | var self = this; 320 | this.decoder.input(this._fileBlock); 321 | var block = this.datumReader.readData(blockSchema(), null, this.decoder); 322 | 323 | if (block instanceof AvroErrors.BlockDelayReadError) { 324 | cb(block); 325 | } else { 326 | // If the sync marker doesn't match, maybe there is no sync marker, try skipping back 327 | if (block.sync && block.sync.toString() !== this.header.sync.toString()) 328 | self._fileBlock.skip(-SYNC_SIZE); 329 | 330 | var codec = this.header.meta['avro.codec'].toString(); 331 | this.decompressData(block.objects, codec, function(err, data) { 332 | if (err) 333 | self.emit('error', err); 334 | else { 335 | if (data) { 336 | self._datumBlock.write(data); 337 | self.decoder.input(self._datumBlock); 338 | while (block.objectCount--) { 339 | var decoded = self.datumReader.read(self.decoder); 340 | if (decoded !== null) self.push(decoded); 341 | } 342 | cb(); 343 | } 344 | } 345 | }); 346 | } 347 | }, 348 | 349 | _readBlocks: function(cb) { 350 | var self = this; 351 | self._readBlock(function(err) { 352 | if (err || self._fileBlock.remainingBytes === 0) { 353 | cb(); 354 | } else { 355 | self._readBlocks(cb); 356 | } 357 | }); 358 | }, 359 | 360 | _transform: function(chunk, encoding, done) { 361 | this._fileBlock.write(chunk); 362 | 363 | if (!this._inBody) { 364 | var header = this._readHeader(); 365 | if (!(header instanceof AvroErrors.BlockDelayReadError)) 366 | this._inBody = true; 367 | } 368 | 369 | if (this._inBody) { 370 | this._readBlocks(done); 371 | } else 372 | done(); 373 | }, 374 | 375 | _flush: function(done) { 376 | 377 | if (this._fileBlock.remainingBytes > 0) 378 | this._readBlocks(done); 379 | else 380 | done(); 381 | } 382 | 383 | }); 384 | 385 | // Writer Class 386 | function Writer(writersSchema, codec) { 387 | 388 | if ((this instanceof arguments.callee) === false) 389 | return new arguments.callee(writersSchema, codec); 390 | 391 | Stream.call(this); 392 | this.readable = true; 393 | this.writable = true; 394 | this._streamOffset = 0; 395 | this._paused = false; 396 | this.codec = codec || "null"; 397 | this._datumBlock = new Block(); 398 | this._fileBlock = new Block(); 399 | this._resetBlocks(); 400 | this._writersSchema = writersSchema ? Avro.Schema(writersSchema) : null; 401 | this.datumWriter = IO.DatumWriter(this._writersSchema); 402 | this.encoder = IO.BinaryEncoder(this._fileBlock); 403 | this._inputSchema = writersSchema; 404 | return this; 405 | } 406 | 407 | util.inherits(Writer, Stream); 408 | 409 | _.extend(Writer.prototype, { 410 | 411 | syncInterval: 1000 * SYNC_SIZE, 412 | 413 | _generateSyncMarker: function(size) { 414 | if (size < 1) return null; 415 | var marker = ""; 416 | for (var i = 0; i < size; i++) { 417 | marker += String.fromCharCode(Math.floor(Math.random() * 0xFF)); 418 | } 419 | return marker; 420 | }, 421 | 422 | _metaData: function(codec, schema) { 423 | return { 424 | "avro.codec": codec ? codec : null, 425 | "avro.schema": _.isObject(schema) ? JSON.stringify(schema): schema 426 | }; 427 | }, 428 | 429 | _blockData: function(data) { 430 | return { 431 | "objectCount": this._blockCount, 432 | "objects": data, 433 | "sync": this.syncMarker 434 | } 435 | }, 436 | 437 | _snappyCompress: function(data, callback) { 438 | var calculatedChecksum = crc32(data); 439 | snappy.compress(data, function(err, data) { 440 | if (err) return callback(err); 441 | // TODO: this might be a performance hit, having to create a new buffer just to add a crc32 442 | var checksumBuffer = new Buffer(data.length + 4); 443 | data.copy(checksumBuffer); 444 | checksumBuffer.writeUInt32BE(calculatedChecksum.readUInt32BE(0), checksumBuffer.length - 4); 445 | callback(null, checksumBuffer); 446 | }); 447 | }, 448 | 449 | compressData: function(data, codec, callback) { 450 | switch(codec) { 451 | case 'null': callback(null, data); break; 452 | case 'deflate': zlib.deflateRaw(data, callback); break; 453 | case 'snappy': this._snappyCompress(data, callback); break; 454 | default: 455 | callback(new AvroErrors.FileError("Unsupported codec %s", codec)); 456 | break; 457 | } 458 | }, 459 | 460 | _writeHeader: function(schema) { 461 | this.syncMarker = this._generateSyncMarker(SYNC_SIZE); 462 | var avroHeader = { 463 | 'magic': magic(), 464 | 'meta': this._metaData(this.codec, schema), 465 | 'sync': this.syncMarker 466 | }; 467 | this.datumWriter.writeData(metaSchema(), avroHeader, this.encoder); 468 | this.emit('data', this._datumBlock.toBuffer()); 469 | this.encoder.output(this._datumBlock); 470 | return this._fileBlock.length; 471 | }, 472 | 473 | _resetBlocks: function() { 474 | this._fileBlock.flush(); 475 | this._datumBlock.flush(); 476 | this._blockOffset = 0; 477 | this._blockCount = 0; 478 | }, 479 | 480 | pause: function(){ 481 | this._paused = true; 482 | }, 483 | 484 | resume: function() { 485 | if (this.writable) { 486 | this._paused = false; 487 | this.emit('drain'); 488 | } 489 | }, 490 | 491 | _writeBlock: function(final) { 492 | var self = this; 493 | final = final || false; 494 | if (this._blockCount > 0) { 495 | this.compressData(this._datumBlock.toBuffer(), this.codec, function(err, buffer) { 496 | if (err) self.emit('error', err); 497 | self.encoder.output(self._fileBlock); 498 | self.datumWriter.writeData(blockSchema(), self._blockData(buffer), self.encoder); 499 | if (!self._paused) { 500 | self.emit('data', self._fileBlock.toBuffer()); 501 | self._resetBlocks(); 502 | if (final) { 503 | self.emit('end'); 504 | setImmediate(function() { 505 | self.destroy(); 506 | }); 507 | } 508 | } 509 | self.encoder.output(self._datumBlock); 510 | }); 511 | } else { 512 | if (final) { 513 | self.emit('end'); 514 | setImmediate(function() { 515 | self.destroy(); 516 | }); 517 | } 518 | } 519 | }, 520 | 521 | write: function(data) { 522 | if (_.isUndefined(data)) 523 | throw new AvroErrors.FileError('no data passed to write()'); 524 | 525 | if (this._streamOffset === 0) 526 | this._streamOffset += this._writeHeader(this._inputSchema); 527 | 528 | this.datumWriter.writeData(this._writersSchema, data, this.encoder); 529 | this._blockCount++; 530 | this._blockOffset += this._datumBlock.length; 531 | this._streamOffset += this._datumBlock.length; 532 | 533 | if (this._blockOffset > this.syncInterval) { 534 | this._writeBlock(); 535 | } 536 | 537 | return !this._paused; 538 | }, 539 | 540 | append: function(data) { 541 | this.write(data); 542 | return this; 543 | }, 544 | 545 | end: function(data) { 546 | var self = this; 547 | if (this._paused) { 548 | setImmediate(function() { 549 | self.end(data); 550 | }); 551 | } else { 552 | var self = this; 553 | if (data) this.write(data); 554 | this._writeBlock(true); 555 | return self; 556 | } 557 | }, 558 | 559 | destroy: function() { 560 | this._fileBlock = null; 561 | this._datumBlock = null; 562 | this.writable = false; 563 | this.readable = false; 564 | this.emit('close'); 565 | } 566 | 567 | }); 568 | 569 | if (!_.isUndefined(exports)) { 570 | exports.AvroFile = AvroFile; 571 | exports.Reader = Reader; 572 | exports.Writer = Writer; 573 | exports.Block = Block; 574 | } 575 | -------------------------------------------------------------------------------- /lib/errors.js: -------------------------------------------------------------------------------- 1 | var util = require('util'); 2 | 3 | var AvroIOError = function() { 4 | Error.call(this); 5 | this.name = 'Avro IO Error'; 6 | this.message = util.format.apply(null, arguments); 7 | Error.captureStackTrace(this, arguments.callee); 8 | }; 9 | 10 | var AvroFileError = function() { 11 | Error.call(this); 12 | this.name = 'Avro File Error'; 13 | this.message = util.format.apply(null, arguments); 14 | Error.captureStackTrace(this, arguments.callee); 15 | }; 16 | 17 | var AvroBlockError = function() { 18 | Error.call(this); 19 | this.name = 'Avro Block Error'; 20 | this.message = util.format.apply(null, arguments); 21 | Error.captureStackTrace(this, arguments.callee); 22 | }; 23 | 24 | var AvroBlockDelayReadError = function() { 25 | Error.call(this); 26 | this.name = 'Avro Block Delay Read Error'; 27 | this.message = util.format.apply(null, arguments); 28 | Error.captureStackTrace(this, arguments.callee); 29 | }; 30 | 31 | var AvroInvalidSchemaError = function() { 32 | Error.call(this); 33 | this.name = 'Avro Invalid Schema Error'; 34 | this.message = util.format.apply(null, arguments); 35 | Error.captureStackTrace(this, arguments.callee); 36 | }; 37 | 38 | var AvroDataValidationError = function() { 39 | Error.call(this); 40 | this.name = 'Avro Data Validation Error'; 41 | this.message = util.format.apply(null, arguments); 42 | this.fieldPath = []; 43 | Error.captureStackTrace(this, arguments.callee); 44 | }; 45 | 46 | util.inherits(AvroIOError, Error); 47 | util.inherits(AvroFileError, Error); 48 | util.inherits(AvroBlockError, Error); 49 | util.inherits(AvroBlockDelayReadError, Error); 50 | util.inherits(AvroInvalidSchemaError, Error); 51 | util.inherits(AvroDataValidationError, Error); 52 | 53 | exports.IOError = AvroIOError; 54 | exports.FileError = AvroFileError; 55 | exports.BlockError = AvroBlockError; 56 | exports.BlockDelayReadError = AvroBlockDelayReadError; 57 | exports.InvalidSchemaError = AvroInvalidSchemaError; 58 | exports.DataValidationError = AvroDataValidationError; 59 | -------------------------------------------------------------------------------- /lib/io.js: -------------------------------------------------------------------------------- 1 | var libpath = process.env['MOCHA_COV'] ? __dirname + '/../lib-cov/' : __dirname + '/'; 2 | 3 | var _ = require('lodash'); 4 | var Int64 = require("int64-native") 5 | var util = require('util'); 6 | var Avro = require(libpath + 'schema'); 7 | var AvroErrors = require(libpath + 'errors.js'); 8 | 9 | var BinaryDecoder = function(input) { 10 | 11 | if (!input || input == 'undefined') 12 | throw new AvroErrors.IOError('Must provide input'); 13 | 14 | if ((this instanceof arguments.callee) === false) 15 | return new arguments.callee(input); 16 | 17 | this.input(input); 18 | }; 19 | 20 | BinaryDecoder.prototype = { 21 | 22 | input: function(input) { 23 | if (!input || !input.read || !_.isFunction(input.read)) 24 | throw new AvroErrors.IOError("Must provide an input object that implements a read method"); 25 | else 26 | this._input = input; 27 | }, 28 | 29 | readNull: function () { 30 | // No bytes consumed 31 | return null; 32 | }, 33 | 34 | readByte: function() { 35 | return this._input.read(1); 36 | }, 37 | 38 | readBoolean: function () { 39 | var bool = this.readByte(); 40 | if (bool instanceof AvroErrors.BlockDelayReadError) 41 | return bool; 42 | else 43 | return bool === 1 ? true : false; 44 | }, 45 | 46 | readInt: function () { 47 | return this.readLong(); 48 | }, 49 | 50 | readLong: function () { 51 | var oldOffset = this._input.offset; 52 | var b = this.readByte(); 53 | if (b instanceof AvroErrors.BlockDelayReadError) return b; 54 | 55 | var n = b & 0x7F; 56 | var shift = 7; 57 | 58 | while ((b & 0x80) != 0) { 59 | b = this.readByte(); 60 | if (b instanceof AvroErrors.BlockDelayReadError) { 61 | //console.error("offset now is %d, old was %d, shift is %d", this._input.offset, oldOffset, shift); 62 | this._input.skip(oldOffset - this._input.offset); 63 | return b; 64 | } 65 | n |= (b & 0x7F) << shift 66 | shift += 7 67 | } 68 | 69 | return (n >> 1) ^ -(n & 1); 70 | }, 71 | 72 | readFloat: function() { 73 | var bytes = this._input.read(4); 74 | if (bytes instanceof AvroErrors.BlockDelayReadError) 75 | return bytes; 76 | else 77 | return bytes.readFloatLE(0); 78 | }, 79 | 80 | readDouble: function() { 81 | var bytes = this._input.read(8); 82 | if (bytes instanceof AvroErrors.BlockDelayReadError) 83 | return bytes; 84 | else 85 | return bytes.readDoubleLE(0); 86 | }, 87 | 88 | readFixed: function(len) { 89 | if (len < 1) 90 | throw new AvroErrors.IOError("readFixed only read %d", len); 91 | else 92 | return this._input.read(len); 93 | }, 94 | 95 | readBytes: function() { 96 | var oldOffset = this._input.offset; 97 | var len = this.readLong(); 98 | //console.error("I want to read %d bytes, from %d (%d)", len, oldOffset, this._input.length); 99 | if (len && len > 0) { 100 | var bytes = this.readFixed(len); 101 | if (bytes instanceof AvroErrors.BlockDelayReadError) 102 | this._input.skip(oldOffset - this._input.offset); 103 | return bytes; 104 | } else 105 | return new Buffer(0); 106 | }, 107 | 108 | readString: function() { 109 | var bytes = this.readBytes(); 110 | if (bytes instanceof AvroErrors.BlockDelayReadError) 111 | return bytes; 112 | else { 113 | if (Buffer.isBuffer(bytes)) 114 | return bytes.toString(); 115 | else 116 | return String.fromCharCode(bytes); 117 | } 118 | }, 119 | 120 | skipNull: function(){ 121 | return; 122 | }, 123 | 124 | skipBoolean: function() { 125 | return this._input.skip(1); 126 | }, 127 | 128 | skipLong: function() { 129 | while((this.readByte() & 0x80) != 0) {} 130 | }, 131 | 132 | skipFloat: function() { 133 | return this._input.skip(4); 134 | }, 135 | 136 | skipDouble: function() { 137 | return this._input.skip(8); 138 | }, 139 | 140 | skipFixed: function(len){ 141 | return this._input.skip(len); 142 | }, 143 | 144 | skipBytes: function() { 145 | var len = this.readLong(); 146 | this._input.skip(len); 147 | }, 148 | 149 | skipString: function() { 150 | this.skipBytes(); 151 | } 152 | } 153 | 154 | var BinaryEncoder = function(output) { 155 | 156 | if (!output || output === 'undefined') 157 | throw new AvroErrors.IOError("Must provide an output object"); 158 | 159 | if ((this instanceof arguments.callee) === false) 160 | return new arguments.callee(output); 161 | 162 | this.output(output); 163 | }; 164 | 165 | var L_0x7f = new Int64(0x7F); 166 | var L_0x80 = new Int64(0x80); 167 | var NL_0X7f = new Int64(-1,-128); 168 | var L_0xFFFE = new Int64(0xFFFFFFFF, 0xFFFFFFFE); 169 | var L_SIGN = new Int64(0x80000000,0) 170 | var L_0xFF = new Int64(0xFF); 171 | var L_0x0 = new Int64(0) 172 | 173 | BinaryEncoder.prototype = { 174 | 175 | output: function(output) { 176 | if (!output || !output.write || !_.isFunction(output.write)) 177 | throw new AvroErrors.IOError("Must provide an output object that implements the write method"); 178 | else 179 | this._output = output; 180 | }, 181 | 182 | writeByte: function(value){ 183 | this._output.write(new Buffer([value])); 184 | }, 185 | 186 | writeNull : function() { 187 | // This is a no-op 188 | }, 189 | 190 | writeBoolean : function(value) { 191 | this.writeByte(value ? 1 : 0); 192 | }, 193 | 194 | writeInt: function(value) { 195 | this.writeLong(value); 196 | }, 197 | 198 | writeLong: function(value) { 199 | var n = new Int64(value); 200 | var self = this; 201 | 202 | function wb(byte){ 203 | self.writeByte(byte); 204 | } 205 | 206 | // taken from Avro's BinaryData.java 207 | // move sign to low-order bit, and flip others if negative 208 | var signbit = L_0xFFFE; 209 | if (n.and(L_SIGN).compare(L_0x0) == 0) { 210 | signbit = L_0x0; 211 | } 212 | 213 | n = (n.shiftLeft(1)).xor((n.shiftRight(63).or(signbit))); 214 | if (n.and(NL_0X7f).compare(L_0x0) != 0) { 215 | wb(n.or(L_0x80).and(L_0xFF).low32()); 216 | n = n.shiftRight(7); 217 | var numShifts = 0; 218 | while (n.compare(L_0x7f)==1 && numShifts < 8) { 219 | wb(n.or(L_0x80).and(L_0xFF).low32()); 220 | n = n.shiftRight(7); 221 | numShifts++; 222 | } 223 | } 224 | wb(n.and(L_0xFF).low32()); 225 | }, 226 | 227 | writeFloat: function(value) { 228 | var floatBuffer = new Buffer(4); 229 | floatBuffer.writeFloatLE(value, 0); 230 | this._output.write(floatBuffer); 231 | }, 232 | 233 | writeDouble: function(value) { 234 | var doubleBuffer = new Buffer(8); 235 | doubleBuffer.writeDoubleLE(value, 0); 236 | this._output.write(doubleBuffer); 237 | }, 238 | 239 | writeBytes: function(datum) { 240 | if (!Buffer.isBuffer(datum) && !_.isArray(datum)) 241 | throw new AvroErrors.IOError("must pass in an array of byte values or a buffer"); 242 | 243 | this.writeLong(datum.length); 244 | this._output.write(datum); 245 | }, 246 | 247 | writeString: function(datum) { 248 | if (!_.isString(datum)) 249 | throw new AvroErrors.IOError("argument must be a string but was %s(%s)", datum, typeof(datum)); 250 | 251 | var size = Buffer.byteLength(datum); 252 | var stringBuffer = new Buffer(size); 253 | stringBuffer.write(datum); 254 | this.writeLong(size); 255 | this._output.write(stringBuffer); 256 | } 257 | } 258 | 259 | var DatumReader = function(writersSchema, readersSchema) { 260 | 261 | if ((this instanceof arguments.callee) === false) 262 | return new arguments.callee(writersSchema, readersSchema); 263 | 264 | this.writersSchema = writersSchema; 265 | this.readersSchema = readersSchema; 266 | }; 267 | 268 | DatumReader.prototype = { 269 | 270 | read: function(decoder){ 271 | if (!this.readersSchema) this.readersSchema = this.writersSchema; 272 | return this.readData(this.writersSchema, this.readersSchema, decoder); 273 | }, 274 | 275 | readData: function(writersSchema, readersSchema, decoder) { 276 | 277 | if (!(writersSchema instanceof Avro.Schema)) 278 | throw new AvroErrors.IOError("writersSchema is not a valid schema object"); 279 | 280 | if (readersSchema && !(readersSchema instanceof Avro.Schema)) 281 | throw new AvroErrors.IOError("readersSchema is not a valid schema object"); 282 | 283 | if (!readersSchema) readersSchema = writersSchema; 284 | 285 | // The type RecordReferenceSchema's will be a record itself 286 | if (writersSchema.constructor.name === "RecordReferenceSchema" && writersSchema.type) { 287 | return this.readRecord(writersSchema.type, writersSchema.type, decoder); 288 | } 289 | 290 | switch(writersSchema.type) { 291 | case "null": return decoder.readNull(); break; 292 | case "boolean": return decoder.readBoolean(); break; 293 | case "string": return decoder.readString(); break; 294 | case "int": return decoder.readInt(); break; 295 | case "long": return decoder.readLong(); break; 296 | case "float": return decoder.readFloat(); break; 297 | case "double": return decoder.readDouble(); break; 298 | case "bytes": return decoder.readBytes(); break; 299 | case "fixed": return decoder.readFixed(writersSchema.size); break; 300 | case "enum": return this.readEnum(writersSchema, readersSchema, decoder); break; 301 | case "array": return this.readArray(writersSchema, readersSchema, decoder); break; 302 | case "map": return this.readMap(writersSchema, readersSchema, decoder); break; 303 | case "union": return this.readUnion(writersSchema, readersSchema, decoder); break; 304 | case "record": 305 | case "errors": 306 | case "request": return this.readRecord(writersSchema, readersSchema, decoder); break; 307 | default: 308 | throw new AvroErrors.IOError("Unknown type: %j", writersSchema); 309 | } 310 | }, 311 | 312 | skipData: function(writersSchema, decoder) { 313 | 314 | if (!(writersSchema instanceof Avro.Schema)) 315 | throw new AvroErrors.IOError("writersSchema is not a valid schema object"); 316 | 317 | switch(writersSchema.type) { 318 | case "null": return decoder.skipNull(); break; 319 | case "boolean": return decoder.skipBoolean(); break; 320 | case "string": return decoder.skipString(); break; 321 | case "int": return decoder.skipLong(); break; 322 | case "long": return decoder.skipLong(); break; 323 | case "float": return decoder.skipFloat(); break; 324 | case "double": return decoder.skipDouble(); break; 325 | case "bytes": return decoder.skipBytes(); break; 326 | case "fixed": return decoder.skipFixed(writersSchema.size); break; 327 | case "enum": return this.skipEnum(writersSchema, decoder); break; 328 | case "array": return this.skipArray(writersSchema, decoder); break; 329 | case "map": return this.skipMap(writersSchema, decoder); break; 330 | case "union": return this.skipUnion(writersSchema, decoder); break; 331 | case "record": 332 | case "errors": 333 | case "request": return this.skipRecord(writersSchema, decoder); break; 334 | default: 335 | throw new AvroErrors.IOError("Unknown type: %j", writersSchema); 336 | } 337 | }, 338 | 339 | readEnum: function(writersSchema, readersSchema, decoder) { 340 | var anEnum = decoder.readInt(); 341 | if (anEnum instanceof AvroErrors.BlockDelayReadError) 342 | return anEnum; 343 | var symbolIndex = Math.abs(anEnum); 344 | if (symbolIndex >= 0 && symbolIndex < writersSchema.symbols.length) 345 | return writersSchema.symbols[symbolIndex]; 346 | }, 347 | 348 | skipEnum: function(writersSchema, decoder) { 349 | return decoder.skipLong(); 350 | }, 351 | 352 | readArray: function(writersSchema, readersSchema, decoder) { 353 | var self = this; 354 | var anArray = []; 355 | this.readBlocks(decoder, function() { 356 | anArray.push(self.readData(writersSchema.items, readersSchema.items, decoder)); 357 | }) 358 | return anArray; 359 | }, 360 | 361 | skipArray: function(writersSchema, decoder) { 362 | var self = this; 363 | this.skipBlocks(decoder, function() { 364 | self.skipData(writersSchema.items, decoder); 365 | }) 366 | }, 367 | 368 | readMap: function(writersSchema, readersSchema, decoder) { 369 | var self = this; 370 | var map = {}; 371 | var block = this.readBlocks(decoder, function() { 372 | if (map instanceof AvroErrors.BlockDelayReadError) return; 373 | var key = decoder.readString(); 374 | var value = self.readData(writersSchema.values, readersSchema.values, decoder); 375 | if (key instanceof AvroErrors.BlockDelayReadError) 376 | map = key; 377 | else if (value instanceof AvroErrors.BlockDelayReadError) { 378 | map = value; 379 | } else 380 | map[key] = value; 381 | }); 382 | if (block instanceof AvroErrors.BlockDelayReadError) 383 | return block; 384 | else 385 | return map; 386 | }, 387 | 388 | skipMap: function(writersSchema, decoder) { 389 | var self = this; 390 | this.skipBlocks(decoder, function() { 391 | decoder.skipString(); 392 | self.skipData(writersSchema.values, decoder); 393 | }) 394 | }, 395 | 396 | readUnion: function(writersSchema, readersSchema, decoder) { 397 | var oldOffset = decoder._input.offset; 398 | var schemaIndex = decoder.readLong(); 399 | if (schemaIndex instanceof AvroErrors.BlockDelayReadError) return schemaIndex; 400 | if (schemaIndex < 0 || schemaIndex >= writersSchema.schemas.length) { 401 | throw new AvroErrors.IOError("Union %j is out of bounds for %d, %d, %d", writersSchema, schemaIndex, decoder._input.offset, decoder._input.length); 402 | } 403 | var selectedWritersSchema = writersSchema.schemas[schemaIndex]; 404 | var union = {}; 405 | var data = this.readData(selectedWritersSchema, readersSchema.schemas[schemaIndex], decoder); 406 | if (data instanceof AvroErrors.BlockDelayReadError) { 407 | decoder._input.skip(oldOffset - decoder._input.offset); 408 | return data; 409 | } 410 | union = data; 411 | 412 | return union; 413 | }, 414 | 415 | skipUnion: function(writersSchema, decoder) { 416 | var index = decoder.readLong(); 417 | if (index === null) 418 | return null 419 | else 420 | return this.skipData(writersSchema.schemas[index], decoder) 421 | }, 422 | 423 | readRecord: function(writersSchema, readersSchema, decoder) { 424 | var self = this; 425 | var record = {}; 426 | var oldOffset = decoder._input.offset; 427 | for (var fieldIdx in writersSchema.fields) { 428 | var field = writersSchema.fields[fieldIdx]; 429 | var readersField = readersSchema.fieldsHash[field.name]; 430 | if (readersField) { 431 | var data = self.readData(field.type, readersField.type, decoder); 432 | if (data instanceof AvroErrors.BlockDelayReadError) { 433 | decoder._input.skip(oldOffset - decoder._input.offset); 434 | return data; 435 | } else 436 | record[field.name] = data; 437 | } else { 438 | console.error('SKIPPING'); 439 | self.skipData(field.type, decoder); 440 | } 441 | }; 442 | return record; 443 | }, 444 | 445 | skipRecord: function(writersSchema, decoder) { 446 | var self = this; 447 | _.each(writersSchema.fields, function(field) { 448 | self.skipData(field.type, decoder); 449 | }); 450 | }, 451 | 452 | _iterateBlocks: function(decoder, iteration, lambda) { 453 | var oldOffset = decoder._input.offset; 454 | var count = decoder.readLong(); 455 | if (count instanceof AvroErrors.BlockDelayReadError) return count; 456 | while(count) { 457 | if (count < 0) { 458 | count = -count; 459 | var output = iteration(); 460 | if (output instanceof AvroErrors.BlockDelayReadError) { 461 | decoder._input.skip(oldOffset - decoder._input.offset); 462 | return output; 463 | } 464 | } 465 | while(count--) lambda(); 466 | count = decoder.readLong(); 467 | if (count instanceof AvroErrors.BlockDelayReadError) { 468 | decoder._input.skip(oldOffset - decoder._input.offset); 469 | return count; 470 | } 471 | } 472 | }, 473 | 474 | readBlocks: function(decoder, lambda) { 475 | return this._iterateBlocks(decoder, function() { return decoder.readLong(); }, lambda); 476 | }, 477 | 478 | skipBlocks: function(decoder, lambda) { 479 | return this._iterateBlocks(decoder, function() { decoder.skipFixed(decoder.readLong()) }, lambda); 480 | } 481 | } 482 | 483 | var DatumWriter = function(writersSchema) { 484 | 485 | if ((this instanceof arguments.callee) === false) 486 | return new arguments.callee(writersSchema); 487 | 488 | if (writersSchema && !(writersSchema instanceof Avro.Schema)) 489 | throw new AvroErrors.IOError("writersSchema should be an instance of Schema"); 490 | 491 | this.writersSchema = writersSchema; 492 | }; 493 | 494 | DatumWriter.prototype = { 495 | 496 | write: function(datum, encoder) { 497 | this.writeData(this.writersSchema, datum, encoder); 498 | }, 499 | 500 | writeData: function(writersSchema, datum, encoder) { 501 | if (!(writersSchema instanceof Avro.Schema)) 502 | throw new AvroErrors.IOError("writersSchema is not a valid schema object, it is %j", writersSchema); 503 | 504 | writersSchema.validateAndThrow(writersSchema.type, datum); 505 | 506 | if (writersSchema.constructor.name === "RecordReferenceSchema" && writersSchema.type) { 507 | return this.writeRecord(writersSchema.type, datum, encoder); 508 | } 509 | 510 | switch(writersSchema.type) { 511 | case "null": encoder.writeNull(datum); break; 512 | case "boolean": encoder.writeBoolean(datum); break; 513 | case "string": encoder.writeString(datum); break; 514 | case "int": encoder.writeInt(datum); break; 515 | case "long": encoder.writeLong(datum); break; 516 | case "float": encoder.writeFloat(datum); break; 517 | case "double": encoder.writeDouble(datum); break; 518 | case "bytes": encoder.writeBytes(datum); break; 519 | case "fixed": this.writeFixed(writersSchema, datum, encoder); break; 520 | case "enum": this.writeEnum(writersSchema, datum, encoder); break; 521 | case "array": this.writeArray(writersSchema, datum, encoder); break; 522 | case "map": this.writeMap(writersSchema, datum, encoder); break; 523 | case "union": this.writeUnion(writersSchema, datum, encoder); break; 524 | case "record": 525 | case "errors": 526 | case "request": this.writeRecord(writersSchema, datum, encoder); break; 527 | default: 528 | throw new AvroErrors.IOError("Unknown type: %j for data %j", writersSchema, datum); 529 | } 530 | }, 531 | 532 | writeFixed: function(writersSchema, datum, encoder) { 533 | var len = datum.length; 534 | for (var i = 0; i < len; i++) { 535 | encoder.writeByte(datum.charCodeAt(i)); 536 | } 537 | }, 538 | 539 | writeEnum: function(writersSchema, datum, encoder) { 540 | var datumIndex = writersSchema.symbols.indexOf(datum); 541 | encoder.writeInt(datumIndex); 542 | }, 543 | 544 | writeArray: function(writersSchema, datum, encoder) { 545 | var self = this; 546 | if (datum.length > 0) { 547 | encoder.writeLong(datum.length); 548 | _.each(datum, function(item) { 549 | self.writeData(writersSchema.items, item, encoder); 550 | }); 551 | } 552 | encoder.writeLong(0); 553 | }, 554 | 555 | writeMap: function(writersSchema, datum, encoder) { 556 | var self = this; 557 | if (_.size(datum) > 0) { 558 | encoder.writeLong(_.size(datum)); 559 | _.each(datum, function(value, key) { 560 | encoder.writeString(key); 561 | self.writeData(writersSchema.values, value, encoder); 562 | }) 563 | } 564 | encoder.writeLong(0); 565 | }, 566 | 567 | writeUnion: function(writersSchema, datum, encoder) { 568 | var schemaIndex = -1; 569 | 570 | for (var i = 0; i < writersSchema.schemas.length; i++) { 571 | if (writersSchema.schemas[i].type === 'record' && writersSchema.schemas[i].validate(writersSchema.schemas[i].type, datum)) { 572 | schemaIndex = i; 573 | break; 574 | } else if (writersSchema.schemas[i].type === 'enum' && writersSchema.schemas[i].validate(writersSchema.schemas[i].type, datum)) { 575 | schemaIndex = i; 576 | break; 577 | } else if (writersSchema.schemas[i].type === 'array' && writersSchema.schemas[i].validate(writersSchema.schemas[i].type, datum)) { 578 | schemaIndex = i; 579 | break; 580 | } else if (writersSchema.schemas[i].type === 'map' && writersSchema.schemas[i].validate(writersSchema.schemas[i].type, datum)) { 581 | schemaIndex = i; 582 | break; 583 | } else if (writersSchema.isPrimitive(writersSchema.schemas[i].type) && writersSchema.validate(writersSchema.schemas[i].type, datum)) { 584 | schemaIndex = i; 585 | break; 586 | } 587 | } 588 | 589 | if (schemaIndex < 0) { 590 | throw new AvroErrors.IOError("No schema found for data %j", datum); 591 | } else { 592 | encoder.writeLong(schemaIndex); 593 | this.writeData(writersSchema.schemas[schemaIndex], datum, encoder); 594 | } 595 | }, 596 | 597 | writeRecord: function(writersSchema, datum, encoder) { 598 | var self = this; 599 | _.each(writersSchema.fields, function(field) { 600 | try { 601 | self.writeData(field.type, datum[field.name], encoder); 602 | } catch (err) { 603 | if (err.fieldPath) { 604 | err.fieldPath.unshift(field.name); 605 | } 606 | throw err; 607 | } 608 | }); 609 | } 610 | } 611 | 612 | if (!_.isUndefined(exports)) { 613 | exports.BinaryDecoder = BinaryDecoder; 614 | exports.BinaryEncoder = BinaryEncoder; 615 | exports.DatumWriter = DatumWriter; 616 | exports.DatumReader = DatumReader; 617 | } 618 | -------------------------------------------------------------------------------- /lib/schema.js: -------------------------------------------------------------------------------- 1 | var libpath = process.env['MOCHA_COV'] ? __dirname + '/../lib-cov/' : __dirname + '/'; 2 | 3 | var _ = require('lodash'); 4 | var util = require('util'); 5 | var AvroErrors = require(libpath + 'errors.js'); 6 | 7 | var PRIMITIVE_TYPES = ['null', 'boolean', 'int', 'long', 'float', 'double', 'bytes', 'string']; 8 | var COMPLEX_TYPES = ['record', 'enum', 'array', 'map', 'union', 'fixed']; 9 | 10 | var _parseNamedType = function(schema, type) { 11 | if (_.contains(PRIMITIVE_TYPES, type)) { 12 | return new PrimitiveSchema(schema, type); 13 | } 14 | if (schema.schemaRecords[type]) { 15 | return new RecordReferenceSchema(schema, type); 16 | } 17 | if (schema.namedSchemas[type]) { 18 | return schema.namedSchemas[type]; 19 | } 20 | 21 | throw new AvroErrors.InvalidSchemaError('unknown type name: %s; known type names are ', 22 | type, 23 | JSON.stringify(_.keys(schema.namedSchemas))); 24 | }; 25 | 26 | function makeFullyQualifiedTypeName(schema, namespace) { 27 | var typeName = null; 28 | if (_.isString(schema)) { 29 | typeName = schema; 30 | } else if (_.isObject(schema)) { 31 | if (_.isString(schema.namespace)) { 32 | namespace = schema.namespace; 33 | } 34 | if (_.isString(schema.name)) { 35 | typeName = schema.name; 36 | } else if (_.isString(schema.type)) { 37 | typeName = schema.type; 38 | } 39 | } else { 40 | throw new AvroErrors.InvalidSchemaError('unable to determine fully qualified type name from schema %s in namespace %s', 41 | JSON.stringify(schema), namespace); 42 | } 43 | 44 | if (!_.isString(typeName)) { 45 | throw new AvroErrors.InvalidSchemaError('unable to determine type name from schema %s in namespace %s', 46 | JSON.stringify(schema), namespace); 47 | } 48 | 49 | if (typeName.indexOf('.') !== -1) { 50 | return typeName; 51 | } else if (_.contains(PRIMITIVE_TYPES, typeName)) { 52 | return typeName; 53 | } else if (_.isString(namespace)) { 54 | return namespace + '.' + typeName; 55 | } else { 56 | return typeName; 57 | } 58 | } 59 | 60 | function Schema(schema, namespace, dependencySchemas) { 61 | var self = this; 62 | this.schemaRecords = {}; 63 | this.namedSchemas = {}; 64 | if (dependencySchemas && !_.isArray(dependencySchemas)) { 65 | throw new AvroErrors.InvalidSchemaError('dependencySchemas must be provided as an array'); 66 | } else if (dependencySchemas) { 67 | dependencySchemas.forEach(function(namedSchema) { 68 | self.namedSchemas[namedSchema.namespace + '.' + namedSchema.name] = namedSchema; 69 | }); 70 | } 71 | 72 | if ((this instanceof arguments.callee) === false) { 73 | return new arguments.callee(schema, namespace, dependencySchemas); 74 | } 75 | 76 | if (!_.isUndefined(schema)) { 77 | return this.parse(schema, namespace); 78 | } 79 | } 80 | 81 | _.extend(Schema.prototype, { 82 | 83 | parse: function(schema, namespace) { 84 | var self = this; 85 | if (_.isNull(schema) || _.isUndefined(schema)) { 86 | throw new AvroErrors.InvalidSchemaError('schema is null, in parentSchema: %s', 87 | JSON.stringify(parentSchema)); 88 | } else if (_.isString(schema)) { 89 | return _parseNamedType(this, schema); 90 | } else if (_.isObject(schema) && !_.isArray(schema)) { 91 | if (schema.type === 'record') { 92 | if (!_.has(schema, 'fields')) { 93 | throw new AvroErrors.InvalidSchemaError('record must specify "fields", got %s', 94 | JSON.stringify(schema)); 95 | } else if (!_.has(schema, 'name')) { 96 | throw new AvroErrors.InvalidSchemaError('record must specify "name", got %s', 97 | JSON.stringify(schema)); 98 | } else { 99 | var record = new RecordSchema(schema.name, schema.namespace, 100 | _.map(schema.fields, function(field) { 101 | return new FieldSchema(field.name, self.parse(field, namespace)); 102 | })); 103 | // Store the schema records into a map of schema name to 104 | // record, so we can compare against it later if we find 105 | // something that isn't a primitive data type, but may 106 | // be a self-reference 107 | if (!this.schemaRecords[schema.name]) { 108 | this.schemaRecords[schema.name] = record; 109 | } 110 | 111 | return record; 112 | } 113 | } else if (schema.type === 'enum') { 114 | if (_.has(schema, 'symbols')) { 115 | return new EnumSchema(schema.symbols); 116 | } else { 117 | throw new AvroErrors.InvalidSchemaError('enum must specify "symbols", got %s', 118 | JSON.stringify(schema)); 119 | } 120 | } else if (schema.type === 'array') { 121 | if (_.has(schema, 'items')) { 122 | return new ArraySchema(this.parse(schema.items, namespace), namespace); 123 | } else { 124 | throw new AvroErrors.InvalidSchemaError('array must specify "items", got %s', 125 | JSON.stringify(schema)); 126 | } 127 | } else if (schema.type === 'map') { 128 | if (_.has(schema, 'values')) { 129 | return new MapSchema(this.parse(schema.values, namespace)); 130 | } else { 131 | throw new AvroErrors.InvalidSchemaError('map must specify "values" schema, got %s', 132 | JSON.stringify(schema)); 133 | } 134 | } else if (schema.type === 'fixed') { 135 | if (_.has(schema, 'size')) { 136 | return new FixedSchema(schema.name, schema.size); 137 | } else { 138 | throw new AvroErrors.InvalidSchemaError('fixed must specify "size", got %s', 139 | JSON.stringify(schema)); 140 | } 141 | } else if (_.has(schema, 'type')) { 142 | return this.parse(schema.type, namespace); 143 | } else { 144 | throw new AvroErrors.InvalidSchemaError('not yet implemented: %j', schema); 145 | } 146 | } else if (_.isArray(schema)) { 147 | if (_.isEmpty(schema)) { 148 | throw new AvroErrors.InvalidSchemaError('unions must have at least 1 branch'); 149 | } 150 | var branchTypes = _.map(schema, function(type) { 151 | return self.parse(type, schema, namespace); 152 | }); 153 | return new UnionSchema(branchTypes, namespace); 154 | } else { 155 | throw new AvroErrors.InvalidSchemaError('unexpected Javascript type for schema: ' + (typeof schema)); 156 | } 157 | }, 158 | 159 | validateAndThrow: function(schema, datum){ 160 | // primitive types 161 | switch (schema) { 162 | case 'null': 163 | if (!_.isNull(datum)) 164 | throw new AvroErrors.DataValidationError("Data [%j] is not null", datum); 165 | break; 166 | case 'boolean': 167 | if (!_.isBoolean(datum)) 168 | throw new AvroErrors.DataValidationError("Data [%j] is not boolean", datum); 169 | break; 170 | case 'int': 171 | case 'long': 172 | case 'float': 173 | case 'double': 174 | if (!_.isNumber(datum) || datum === null) 175 | throw new AvroErrors.DataValidationError("Data [%j] is not a number or not defined", datum); 176 | break; 177 | case 'bytes': 178 | if (datum === null) 179 | throw new AvroErrors.DataValidationError("Data [%j] not defined", datum); 180 | break; 181 | case 'string': 182 | if (!_.isString(datum)) 183 | throw new AvroErrors.DataValidationError("Data [%j] is not a string", datum); 184 | break; 185 | case 'enum': 186 | if (datum === null || _.indexOf(this.symbols, datum) === -1) 187 | throw new AvroErrors.DataValidationError("Data [%j] not a valid enum value. List of valuies [%j]", datum, this.symbols); 188 | break; 189 | case 'array': 190 | if (datum === null || !Array.isArray(datum)) 191 | throw new AvroErrors.DataValidationError("Data [%j] not a an array", datum, this.symbols); 192 | break; 193 | case 'record': 194 | if (datum === null) 195 | return false; 196 | var fields = _.pluck(this.fields, 'name'); 197 | var dFields = _.keys(datum); 198 | var intersect = _.intersection(fields, dFields); 199 | if (intersect.length < dFields.length) 200 | throw new AvroErrors.DataValidationError("Data [%j] has extra fields not in schema. data fields [%j]. schema fields [%j]", datum, dFields, fields); 201 | break; 202 | default: 203 | break; 204 | } 205 | 206 | return true; 207 | }, 208 | 209 | validate: function(schema, datum){ 210 | var self = this; 211 | try { 212 | self.validateAndThrow(schema, datum); 213 | } catch (validateErr) { 214 | return false; 215 | } 216 | return true; 217 | }, 218 | 219 | isPrimitive: function(schema){ 220 | switch (schema) { 221 | case 'null': 222 | case 'boolean': 223 | case 'int': 224 | case 'long': 225 | case 'float': 226 | case 'double': 227 | case 'bytes': 228 | case 'string': 229 | return true; 230 | } 231 | return false; 232 | }, 233 | 234 | toString: function() { 235 | return JSON.stringify({ type: this.type }); 236 | } 237 | }); 238 | 239 | function PrimitiveSchema(schema, type) { 240 | 241 | if (!_.isString(type)) { 242 | throw new AvroErrors.InvalidSchemaError('Primitive type name must be a string'); 243 | } 244 | 245 | if (!_.contains(PRIMITIVE_TYPES, type)) { 246 | throw new AvroErrors.InvalidSchemaError('Primitive type must be one of: %s; Got %s', 247 | JSON.stringify(PRIMITIVE_TYPES), type); 248 | } 249 | 250 | this.type = type; 251 | } 252 | 253 | util.inherits(PrimitiveSchema, Schema); 254 | 255 | function RecordReferenceSchema(schema, type) { 256 | this.type = schema.schemaRecords[type] 257 | } 258 | util.inherits(RecordReferenceSchema, Schema); 259 | 260 | function FieldSchema(name, type) { 261 | if (!_.isString(name)) { 262 | throw new AvroErrors.InvalidSchemaError('Field name must be string'); 263 | } 264 | 265 | if (!(type instanceof Schema)) { 266 | throw new AvroErrors.InvalidSchemaError('Field type must be a Schema object'); 267 | } 268 | 269 | this.name = name; 270 | this.type = type; 271 | } 272 | 273 | //util.inherits(FieldSchema, Schema); 274 | 275 | function RecordSchema(name, namespace, fields) { 276 | if (!_.isString(name)) { 277 | throw new AvroErrors.InvalidSchemaError('Record name must be string'); 278 | } 279 | 280 | if (!_.isNull(namespace) && !_.isUndefined(namespace) && !_.isString(namespace)) { 281 | throw new AvroErrors.InvalidSchemaError('Record namespace must be string or null'); 282 | } 283 | 284 | if (!_.isArray(fields)) { 285 | throw new AvroErrors.InvalidSchemaError('Fields must be an array'); 286 | } 287 | 288 | this.type = 'record'; 289 | this.name = name; 290 | this.namespace = namespace; 291 | this.fields = fields; 292 | 293 | this.fieldsHash = _.reduce(fields, function(hash, field) { 294 | hash[field.name] = field; 295 | return hash; 296 | }, {}); 297 | }; 298 | 299 | util.inherits(RecordSchema, Schema); 300 | 301 | function MapSchema(type) { 302 | this.type = 'map'; 303 | this.values = type; 304 | } 305 | 306 | util.inherits(MapSchema, Schema); 307 | 308 | function ArraySchema(items) { 309 | if (_.isNull(items) || _.isUndefined(items)) { 310 | throw new AvroErrors.InvalidSchemaError('Array "items" schema should not be null or undefined'); 311 | } 312 | 313 | this.type = 'array'; 314 | this.items = items; 315 | } 316 | 317 | util.inherits(ArraySchema, Schema); 318 | 319 | function UnionSchema(schemas, namespace) { 320 | if (!_.isArray(schemas) || _.isEmpty(schemas)) { 321 | throw new InvalidSchemaError('Union must have at least 1 branch'); 322 | } 323 | 324 | this.type = 'union'; 325 | this.schemas = schemas; //_.map(schemas, function(type) { return makeFullyQualifiedTypeName(type, namespace); }); 326 | this.namespace = namespace; 327 | } 328 | 329 | util.inherits(UnionSchema, Schema); 330 | 331 | function EnumSchema(symbols) { 332 | if (!_.isArray(symbols)) { 333 | throw new AvroErrors.InvalidSchemaError('Enum must have array of symbols, got %s', 334 | JSON.stringify(symbols)); 335 | } 336 | if (!_.all(symbols, function(symbol) { return _.isString(symbol); })) { 337 | throw new AvroErrors.InvalidSchemaError('Enum symbols must be strings, got %s', 338 | JSON.stringify(symbols)); 339 | } 340 | 341 | this.type = 'enum'; 342 | this.symbols = symbols; 343 | } 344 | 345 | util.inherits(EnumSchema, Schema); 346 | 347 | function FixedSchema(name, size) { 348 | 349 | this.type = 'fixed'; 350 | this.name = name; 351 | this.size = size; 352 | } 353 | 354 | util.inherits(FixedSchema, Schema); 355 | 356 | if (!_.isUndefined(exports)) { 357 | exports.Schema = Schema; 358 | exports.PrimitiveSchema = PrimitiveSchema; 359 | exports.ArraySchema = ArraySchema; 360 | exports.MapSchema = MapSchema; 361 | exports.UnionSchema = UnionSchema; 362 | exports.RecordSchema = RecordSchema; 363 | exports.FixedSchema = FixedSchema; 364 | exports.EnumSchema = EnumSchema; 365 | } 366 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "node-avro-io", 3 | "description": "This will allow you to encode / decode avro binary format to / from json format, it supports both deflate and snappy compressions and supports node streams", 4 | "version": "2.0.0", 5 | "author": { 6 | "name": "James Power", 7 | "email": "james.bruce.power@gmail.com" 8 | }, 9 | "contributors": [ 10 | "Jeremiah Lu ", 11 | "Matt Lavin ", 12 | "Austin Kelleher ", 13 | "Martin Cizek ", 14 | "Xander Dumaine ", 15 | "Andrew Austin ", 16 | "Hannes Stockner " 17 | ], 18 | "license": "Apache-2.0", 19 | "repository": { 20 | "type": "git", 21 | "url": "https://github.com/jamesbrucepower/node-avro-io.git" 22 | }, 23 | "main": "./index", 24 | "scripts": { 25 | "test": "make test" 26 | }, 27 | "dependencies": { 28 | "buffer-crc32": "^0.2.5", 29 | "int64-native": "^0.5.0", 30 | "lodash": "^2.4.1", 31 | "snappy": "^6.1.1" 32 | }, 33 | "devDependencies": { 34 | "mocha": "^3.1.0", 35 | "should": "^4.4.2", 36 | "jscoverage": "^0.5.9", 37 | "buffertools": "^2.1.2", 38 | "sprint": "^0.3.1", 39 | "dtrace-provider": "^0.3.1" 40 | }, 41 | "engines": { 42 | "node": ">= 4.x", 43 | "npm": ">= 1.4.x" 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /run_tests.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | var Mocha = require('mocha'), 3 | fs = require('fs'), 4 | path = require('path'); 5 | 6 | var mocha = new Mocha(); 7 | 8 | var curDir = __dirname; 9 | var testDir = path.join(curDir, 'test'); 10 | fs.readdirSync(testDir).filter(function(file){ 11 | // Only keep the .js files 12 | return file.substr(-3) === '.js'; 13 | 14 | }).forEach(function(file){ 15 | // Use the method "addFile" to add the file to mocha 16 | mocha.addFile( 17 | path.join(testDir, file) 18 | ); 19 | }); 20 | 21 | // Now, you can run the tests. 22 | mocha.run(function(failures){ 23 | process.on('exit', function () { 24 | process.exit(failures); 25 | }); 26 | }); -------------------------------------------------------------------------------- /test.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesbrucepower/node-avro-io/02aed01d15f52d8df414a0b3e084767741dfafdf/test.avro -------------------------------------------------------------------------------- /test.js: -------------------------------------------------------------------------------- 1 | var avro = require('./index').DataFile.AvroFile(); 2 | 3 | var Schema = require('./index').Schema.Schema; 4 | 5 | var address = { 6 | "type":"record", 7 | "name":"Address", 8 | "namespace":"com.inin.events.conversation", 9 | "fields":[ 10 | {"name":"name","type":["null",{"type":"string","avro.java.string":"String"}],"default":null}, 11 | {"name":"nameRaw","type":["null",{"type":"string","avro.java.string":"String"}],"default":null}, 12 | {"name":"addressNormalized","type":["null",{"type":"string","avro.java.string":"String"}],"default":null}, 13 | {"name":"addressDisplayable","type":["null",{"type":"string","avro.java.string":"String"}],"default":null}, 14 | {"name":"addressRaw","type":["null",{"type":"string","avro.java.string":"String"}],"default":null} 15 | ] 16 | }; 17 | 18 | var addressSchema = new Schema(address); 19 | 20 | console.log(addressSchema instanceof Schema); 21 | 22 | var sessionEvent = { 23 | "type":"record", 24 | "doc":"Topic: ProviderMultimediaSessionEvent. A value of this type is sent when a session event is generated by a Multimedia (WebRTC) conversation participant.", 25 | "name":"ProviderMultimediaSessionEvent", 26 | "namespace":"com.inin.events.provider", 27 | "imports":["RoutingData.avsc","ProviderEvent.avsc", "Address.avsc"], 28 | "fields": [ 29 | { 30 | "name":"self", 31 | "doc":"Name/Address for the participant on this side of the email conversation.", 32 | "type":"com.inin.events.conversation.Address", 33 | "default":{} 34 | }, 35 | { 36 | "name":"sessionId", 37 | "doc":"UUID identifying the session from the point of view of a participant in the conference.", 38 | "type":{"type":"string","avro.java.string":"String"} 39 | }, 40 | { 41 | "name":"context", 42 | "doc":"The room id context (xmpp jid) for the conference session.", 43 | "type":{"type":"string","avro.java.string":"String"} 44 | }, 45 | { 46 | "name":"state", 47 | "doc":"The current state of the participant in the conference. Valid values are in Constants.java", 48 | "type":{"type":"string","avro.java.string":"String"} 49 | }, 50 | { 51 | "name":"audioMuted", 52 | "doc":"Indicates whether this participant has muted their outgoing audio.", 53 | "type":"boolean", 54 | "default":false 55 | }, 56 | { 57 | "name":"videoMuted", 58 | "doc":"Indicates whether this participant has muted/paused their outgoing video.", 59 | "type":"boolean", 60 | "default":false 61 | }, 62 | { 63 | "name":"sharingScreen", 64 | "doc":"Indicates whether this participant is sharing their screen to the session.", 65 | "type":"boolean", 66 | "default":false 67 | }, 68 | { 69 | "name":"peerCount", 70 | "doc":"The number of peer participants from the perspective of the participant in the conference.", 71 | "type":"long" 72 | }, 73 | { 74 | "name":"disconnectType", 75 | "doc":"Indicates what caused this participant to be terminated from the session. Valid values are in the DisconnectType enum.", 76 | "type":["null",{"type":"string","avro.java.string":"String"}], 77 | "default":null 78 | } 79 | ] 80 | }; 81 | 82 | var sessionEventSchema = new Schema(sessionEvent, 'com.inin.events.provider', [addressSchema]); 83 | -------------------------------------------------------------------------------- /test/data/dependents/child.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Request", 3 | "namespace": "e.d.c.b.a", 4 | "type": "record", 5 | "fields": [ 6 | { 7 | "name": "headers", 8 | "type": { 9 | "type": "map", 10 | "values": "string" 11 | } 12 | }, 13 | { 14 | "name": "method", 15 | "type": "string" 16 | }, 17 | { 18 | "name": "path", 19 | "type": "string" 20 | }, 21 | { 22 | "name": "queryString", 23 | "type": [ 24 | "string", 25 | "null" 26 | ] 27 | }, 28 | { 29 | "name": "body", 30 | "type": { 31 | "type": "map", 32 | "values": "string" 33 | } 34 | } 35 | ] 36 | } 37 | -------------------------------------------------------------------------------- /test/data/dependents/parent.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "fields": [ 3 | { 4 | "name": "host", 5 | "type": "string" 6 | }, 7 | { 8 | "name": "time", 9 | "type": "string" 10 | }, 11 | { 12 | "name": "elapsedTime", 13 | "type": "long" 14 | }, 15 | { 16 | "name": "request", 17 | "type": "e.d.c.b.a.Request" 18 | } 19 | ], 20 | "name": "LogEvent", 21 | "namespace": "f.e.d.c.b.a", 22 | "type": "record" 23 | } 24 | -------------------------------------------------------------------------------- /test/data/dependents/parent.json: -------------------------------------------------------------------------------- 1 | { 2 | "host": "testhostA", 3 | "time": "1970-01-01T00:00Z", 4 | "elapsedTime": 123456789, 5 | "request": { 6 | "headers": { 7 | "user-agent": "firefox", 8 | "remote-ip": "0.0.0.0" 9 | }, 10 | "method": "GET", 11 | "path": "/basepath/object", 12 | "queryString": { 13 | "string": "param1=test1¶m2=test2" 14 | }, 15 | "body": {} 16 | }, 17 | "exception": { 18 | "e.d.c.b.a.AppException": { 19 | "class": "org.apache.avro", 20 | "message": "An error occurred", 21 | "stackTrace": { 22 | "string": "failed at line 1" 23 | } 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /test/data/double.json: -------------------------------------------------------------------------------- 1 | 8.98928196620122323 2 | -------------------------------------------------------------------------------- /test/data/double.schema: -------------------------------------------------------------------------------- 1 | { "type": "double" } 2 | -------------------------------------------------------------------------------- /test/data/enum.json: -------------------------------------------------------------------------------- 1 | "Charlie" 2 | -------------------------------------------------------------------------------- /test/data/enum.schema: -------------------------------------------------------------------------------- 1 | { 2 | "type": "enum", 3 | "name": "phonetics", 4 | "symbols": [ "Alpha", "Bravo", "Charlie", "Delta"] 5 | } 6 | -------------------------------------------------------------------------------- /test/data/float.json: -------------------------------------------------------------------------------- 1 | 1.3278991 2 | -------------------------------------------------------------------------------- /test/data/float.schema: -------------------------------------------------------------------------------- 1 | { "type": "float" } 2 | -------------------------------------------------------------------------------- /test/data/log.schema: -------------------------------------------------------------------------------- 1 | { 2 | "type" : "record", 3 | "name" : "AccessLogEvent", 4 | "namespace" : "logging.avro.domain", 5 | "fields" : [ { 6 | "name" : "host", 7 | "type" : "string" 8 | }, { 9 | "name" : "time", 10 | "type" : "string" 11 | }, { 12 | "name" : "elapsedTime", 13 | "type" : "long" 14 | }, { 15 | "name" : "tid", 16 | "type" : "int" 17 | }, { 18 | "name" : "request", 19 | "type" : { 20 | "type" : "record", 21 | "name" : "Request", 22 | "fields" : [ { 23 | "name" : "headers", 24 | "type" : { 25 | "type" : "map", 26 | "values" : "string" 27 | } 28 | }, { 29 | "name" : "method", 30 | "type" : "string" 31 | }, { 32 | "name" : "path", 33 | "type" : "string" 34 | }, { 35 | "name" : "queryString", 36 | "type" : [ "string", "null" ] 37 | }, { 38 | "name" : "remoteIp", 39 | "type" : "string" 40 | }, { 41 | "name" : "body", 42 | "type" : { 43 | "type" : "map", 44 | "values" : "string" 45 | } 46 | } ] 47 | } 48 | }, { 49 | "name" : "response", 50 | "type" : { 51 | "type" : "record", 52 | "name" : "Response", 53 | "fields" : [ { 54 | "name" : "status", 55 | "type" : "int" 56 | }, { 57 | "name" : "headers", 58 | "type" : { 59 | "type" : "map", 60 | "values" : "string" 61 | } 62 | }, { 63 | "name" : "body", 64 | "type" : { 65 | "type" : "map", 66 | "values" : "string" 67 | } 68 | } ] 69 | } 70 | }, { 71 | "name" : "customer", 72 | "type" : { 73 | "type" : "record", 74 | "name" : "Customer", 75 | "fields" : [ { 76 | "name" : "data", 77 | "type" : { 78 | "type" : "map", 79 | "values" : "string" 80 | } 81 | } ] 82 | } 83 | }, { 84 | "name" : "exception", 85 | "type" : [ { 86 | "type" : "record", 87 | "name" : "AppException", 88 | "fields" : [ { 89 | "name" : "class", 90 | "type" : "string" 91 | }, { 92 | "name" : "message", 93 | "type" : "string" 94 | }, { 95 | "name" : "stackTrace", 96 | "type" : [ "null", "string" ] 97 | } ] 98 | }, "null" ] 99 | } ] 100 | } 101 | -------------------------------------------------------------------------------- /test/data/nested.json: -------------------------------------------------------------------------------- 1 | { 2 | "host": "testhostA", 3 | "time": "1970-01-01T00:00Z", 4 | "elapsedTime": 123456789, 5 | "request": { 6 | "headers": { 7 | "user-agent": "firefox", 8 | "remote-ip": "0.0.0.0" 9 | }, 10 | "method": "GET", 11 | "path": "/basepath/object", 12 | "queryString": { 13 | "string": "param1=test1¶m2=test2" 14 | }, 15 | "body": {} 16 | }, 17 | "exception": { 18 | "e.d.c.b.a.AppException": { 19 | "class": "org.apache.avro", 20 | "message": "An error occurred", 21 | "stackTrace": { 22 | "string": "failed at line 1" 23 | } 24 | } 25 | } 26 | } -------------------------------------------------------------------------------- /test/data/nested.schema: -------------------------------------------------------------------------------- 1 | { 2 | "fields": [ 3 | { 4 | "name": "host", 5 | "type": "string" 6 | }, 7 | { 8 | "name": "time", 9 | "type": "string" 10 | }, 11 | { 12 | "name": "elapsedTime", 13 | "type": "long" 14 | }, 15 | { 16 | "name": "request", 17 | "type": { 18 | "fields": [ 19 | { 20 | "name": "headers", 21 | "type": { 22 | "type": "map", 23 | "values": "string" 24 | } 25 | }, 26 | { 27 | "name": "method", 28 | "type": "string" 29 | }, 30 | { 31 | "name": "path", 32 | "type": "string" 33 | }, 34 | { 35 | "name": "queryString", 36 | "type": [ 37 | "string", 38 | "null" 39 | ] 40 | }, 41 | { 42 | "name": "body", 43 | "type": { 44 | "type": "map", 45 | "values": "string" 46 | } 47 | } 48 | ], 49 | "name": "Request", 50 | "type": "record" 51 | } 52 | }, 53 | { 54 | "name": "exception", 55 | "type": [ 56 | { 57 | "fields": [ 58 | { 59 | "name": "class", 60 | "type": "string" 61 | }, 62 | { 63 | "name": "message", 64 | "type": "string" 65 | }, 66 | { 67 | "name": "stackTrace", 68 | "type": [ 69 | "null", 70 | "string" 71 | ] 72 | } 73 | ], 74 | "name": "AppException", 75 | "type": "record" 76 | }, 77 | "null" 78 | ] 79 | } 80 | ], 81 | "name": "LogEvent", 82 | "namespace": "e.d.c.b.a", 83 | "type": "record" 84 | } -------------------------------------------------------------------------------- /test/data/string.json: -------------------------------------------------------------------------------- 1 | "© all rights reserved" 2 | -------------------------------------------------------------------------------- /test/data/string.schema: -------------------------------------------------------------------------------- 1 | { "type": "string" } 2 | -------------------------------------------------------------------------------- /test/data/string_test.json: -------------------------------------------------------------------------------- 1 | "The quick brown fox jumped over the lazy dogs" 2 | "The quick brown fox jumped over the lazy dogs" 3 | "The quick brown fox jumped over the lazy dogs" 4 | -------------------------------------------------------------------------------- /test/data/string_test.schema: -------------------------------------------------------------------------------- 1 | "string" 2 | -------------------------------------------------------------------------------- /test/datafile.test.js: -------------------------------------------------------------------------------- 1 | var libpath = process.env['MOCHA_COV'] ? __dirname + '/../lib-cov/' : __dirname + '/../lib/'; 2 | var fs = require('fs'); 3 | var should = require('should'); 4 | require('buffertools'); 5 | var DataFile = require(libpath + 'datafile'); 6 | var Avro = require(libpath + 'schema'); 7 | var util = require('util'); 8 | 9 | var dataFile; 10 | describe('AvroFile', function(){ 11 | dataFile = __dirname + "/../test/data/test.avrofile.avro"; 12 | var avroFile; 13 | before(function(){ 14 | avroFile = DataFile.AvroFile(); 15 | if (fs.existsSync(dataFile)) 16 | fs.unlinkSync(dataFile); 17 | }); 18 | after(function(){ 19 | if (fs.existsSync(dataFile)) fs.unlinkSync(dataFile); 20 | }); 21 | describe('open()', function(){ 22 | it('should open a file for writing and return a writer', function(done){ 23 | var schema = Avro.Schema({ "type": "string" }); 24 | var writer = avroFile.open(dataFile, schema, { flags: 'w' }); 25 | writer 26 | .on('error', function(err) { 27 | done(err); 28 | }) 29 | .on('close', function() { 30 | fs.existsSync(dataFile).should.be.true; 31 | done(); 32 | }); 33 | writer.should.be.an.instanceof(DataFile.Writer) 34 | writer 35 | .append('testing') 36 | .end(); 37 | }); 38 | it('should open a file for reading and return a reader', function(done){ 39 | var reader = avroFile.open(dataFile, null, { flags: 'r' }); 40 | reader.should.be.an.instanceof(DataFile.Reader); 41 | reader 42 | .on('data', function(data) { 43 | //console.error('data()'); 44 | data.should.equal("testing"); 45 | }) 46 | .on('error', function(err) { 47 | //console.error('error()'); 48 | if (fs.existsSync(dataFile)) fs.unlinkSync(dataFile); 49 | done(err); 50 | }) 51 | .on('end', function() { 52 | //console.error('end()'); 53 | done(); 54 | }); 55 | }); 56 | it('should throw an error if an unsupported codec is passed as an option', function(){ 57 | (function() { 58 | avroFile.open(null, null, { codec: 'non-existant'}); 59 | }).should.throwError(); 60 | }); 61 | it('should throw an error if an unsupported operation is passed as an option', function(){ 62 | (function() { 63 | avroFile.open(null, null, { flags: 'x'}); 64 | }).should.throwError(); 65 | }); 66 | }); 67 | }); 68 | describe('Block()', function(){ 69 | describe('length', function() { 70 | it('should return the current length of a Block', function(){ 71 | var block = new DataFile.Block(); 72 | block.length.should.equal(0); 73 | block.write(0x10); 74 | block.length.should.equal(1); 75 | }); 76 | }); 77 | describe('flush()', function(){ 78 | it('should flush the buffer by setting the offset of 0', function(){ 79 | var block = new DataFile.Block(); 80 | block.write(0x55); 81 | block.flush(); 82 | block.length.should.equal(0); 83 | }); 84 | }); 85 | describe('write()', function(){ 86 | it('should write a single byte into the buffer', function(){ 87 | var block = new DataFile.Block(); 88 | block.write(0x20); 89 | block.isEqual([0x20]).should.be.true; 90 | }); 91 | it('should write an array of bytes into the buffer', function() { 92 | var block = new DataFile.Block(); 93 | var bArray = [0x10, 0x20, 0x30, 0x40, 0x50, 0x60]; 94 | block.write(bArray); 95 | block.isEqual(bArray).should.be.true; 96 | }) 97 | }); 98 | describe('skip()', function(){ 99 | it('should skip n bytes of the block', function(){ 100 | var block = new DataFile.Block(32); 101 | block.write([0x01, 0x02, 0x03, 0x04, 0x05, 0x06]); 102 | block.skip(3); 103 | block.offset.should.equal(3); 104 | block.skip(2); 105 | block.offset.should.equal(5); 106 | }); 107 | it('should throw an error if you try to skip past the end of the written amount', function(){ 108 | (function() { 109 | var block = new DataFile.Block(32); 110 | block.write([0x01, 0x02, 0x03, 0x04, 0x05, 0x06]); 111 | block.skip(7); 112 | }).should.throwError(); 113 | }); 114 | }) 115 | describe('slice()', function(){ 116 | it('should return a the written part of the Block', function(){ 117 | var block = new DataFile.Block(32); 118 | block.write([0x01, 0x02, 0x03, 0x04, 0x05, 0x06]); 119 | block.slice().equals(new Buffer([0x01, 0x02, 0x03, 0x04, 0x05, 0x06])).should.be.true; 120 | }); 121 | it('should return the specified sub section of a block', function(){ 122 | var block = new DataFile.Block(32); 123 | block.write([0x01, 0x02, 0x03, 0x04, 0x05, 0x06]); 124 | block.slice(2,5).equals(new Buffer([0x03, 0x04, 0x05])).should.be.true; 125 | }) 126 | }) 127 | describe('toBuffer()', function(){ 128 | it('should return a buffer with the contents of the block', function(){ 129 | var block = new DataFile.Block(64); 130 | block.write([0x11, 0x21, 0x31, 0x41, 0x51, 0x61, 0x71]); 131 | block.isEqual([0x11, 0x21, 0x31, 0x41, 0x51, 0x61, 0x71]).should.be.true; 132 | }); 133 | }); 134 | }); 135 | 136 | describe('Writer()', function(){ 137 | var avroFile; 138 | dataFile = __dirname + "/../test/data/test.writer.avro"; 139 | beforeEach(function(){ 140 | avroFile = DataFile.AvroFile(); 141 | }); 142 | after(function(){ 143 | if (fs.existsSync(dataFile)) fs.unlinkSync(dataFile); 144 | }); 145 | it('should write data to a file stream using a pipe', function(done){ 146 | var schema = "string"; 147 | var fileStream = fs.createWriteStream(dataFile); 148 | var writer = DataFile.Writer(schema, "null"); 149 | writer.pipe(fileStream); 150 | writer 151 | .on('error', function(err) { 152 | done(err); 153 | }) 154 | .on('close', function() { 155 | fs.existsSync(dataFile).should.be.true; 156 | done(); 157 | }); 158 | writer.append("hello world"); 159 | writer.end(); 160 | }); 161 | it('should read back data from the written file', function(done){ 162 | var reader = DataFile.Reader(); 163 | var fileStream = fs.createReadStream(dataFile); 164 | fileStream.pipe(reader); 165 | reader 166 | .on('data', function(data) { 167 | data.should.equal("hello world"); 168 | }) 169 | .on('error', function(err) { 170 | done(err); 171 | }) 172 | .on('end', function() { 173 | done(); 174 | }); 175 | }); 176 | function randomString() { 177 | var i; 178 | var result = ""; 179 | var stringSize = Math.floor(Math.random() * 512); 180 | for (i = 0; i < stringSize; i++) 181 | result += String.fromCharCode(Math.floor(Math.random() * 0xFF)); 182 | return result; 183 | } 184 | function schemaGenerator() { 185 | return { 186 | "testBoolean": Math.floor(Math.random() * 2) == 0 ? false : true, 187 | "testString": randomString(), 188 | "testLong": Math.floor(Math.random() * 1E10), 189 | "testDouble": Math.random(), 190 | "testBytes": new Buffer(randomString()) 191 | }; 192 | } 193 | it('should write a sequence marker after 16k of data to a file stream', function(done) { 194 | dataFile = __dirname + "/../test/data/test.writer.random.avro"; 195 | var schema = { 196 | "name": "testLargeDataSet", 197 | "type": "record", 198 | "fields": [ 199 | {"name":"testBoolean","type": "boolean"}, 200 | {"name":"testString","type": "string"}, 201 | {"name":"testLong","type": "long"}, 202 | {"name":"testDouble","type": "double"}, 203 | {"name":"testBytes","type": "bytes"} 204 | ] 205 | }; 206 | var writer = DataFile.Writer(schema, "null"); 207 | var fileStream = fs.createWriteStream(dataFile); 208 | writer.pipe(fileStream); 209 | writer 210 | .on('close', function() { 211 | fs.existsSync(dataFile).should.be.true; 212 | fs.unlinkSync(dataFile); 213 | done(); 214 | }) 215 | .on('error', function(err) { 216 | if (fs.existsSync(dataFile)) fs.unlinkSync(dataFile); 217 | done(err); 218 | }); 219 | var i = 0; 220 | var delay = 0; 221 | while(i++ < 20) { 222 | writer.append(schemaGenerator()); 223 | } 224 | writer.end(); 225 | }); 226 | describe('_generateSyncMarker()', function(){ 227 | it('should generate a 16 byte sequence to be used as a marker', function(){ 228 | var writer = DataFile.Writer(); 229 | should.not.exist(writer._generateSyncMarker(-5)); 230 | should.not.exist(writer._generateSyncMarker(0)); 231 | writer._generateSyncMarker(16).length.should.equal(16); 232 | writer._generateSyncMarker(2).length.should.equal(2); 233 | }); 234 | }); 235 | describe('compressData()', function(){ 236 | it('should compress a given buffer with deflate and return the compressed buffer', function(done){ 237 | var reader = DataFile.Reader(); 238 | var writer = DataFile.Writer(); 239 | writer.compressData(new Buffer([0x15, 0x25, 0x35, 0x45, 0x55, 0x65]), "deflate", function(err, data) { 240 | data.equals(new Buffer([0x13, 0x55, 0x35, 0x75, 0x0d, 0x4d, 0x05, 0x00])).should.be.true; 241 | reader.decompressData(data, "deflate", function(err, data) { 242 | data.equals(new Buffer([0x15, 0x25, 0x35, 0x45, 0x55, 0x65])).should.be.true; 243 | done(); 244 | }) 245 | }) 246 | }); 247 | it('should compress a given buffer with snappy and return the compressed buffer', function(done){ 248 | var reader = DataFile.Reader(); 249 | var writer = DataFile.Writer(); 250 | writer.compressData(new Buffer("compress this text"), "snappy", function(err, data) { 251 | reader.decompressData(data, "snappy", function(err, data) { 252 | if (err) done(err); 253 | data.toString().should.equal("compress this text"); 254 | done(); 255 | }); 256 | }); 257 | }); 258 | it('should return an error if an unsupported codec is passed as a parameter', function(done){ 259 | var writer = DataFile.Writer(); 260 | writer.compressData(new Buffer([0x13, 0x55, 0x35, 0x75]), "unsupported", function(err, data) { 261 | should.exist(err); 262 | err.should.be.an.instanceof(Error); 263 | done(); 264 | }); 265 | }); 266 | }); 267 | describe('write()', function() { 268 | it('should write a schema and associated data to a file', function(done) { 269 | var schema = "string"; //{ "type": "string" }; 270 | var data = "The quick brown fox jumped over the lazy dogs"; 271 | var writer = avroFile.open(dataFile, schema, { flags: 'w', codec: "deflate" }); 272 | writer 273 | .on('error', function(err) { 274 | done(err); 275 | }) 276 | .on('close', function() { 277 | fs.existsSync(dataFile).should.be.true; 278 | done(); 279 | }) 280 | .append(data) 281 | .append(data) 282 | .append(data) 283 | .end(); 284 | }); 285 | it('should write a series of integers to a file and read them back as integers', function(done) { 286 | aFile = __dirname + "/../test/data/test.int.avro"; 287 | var schema = { "type": "int" }; 288 | var writer = avroFile.open(aFile, schema, { flags: 'w', codec: "deflate" }); 289 | writer 290 | .on('error', function(err) { 291 | done(err); 292 | }) 293 | .on('close', function() { 294 | fs.existsSync(aFile).should.be.true; 295 | var reader = avroFile.open(aFile, null, { flags: 'r' }); 296 | reader.should.be.an.instanceof(DataFile.Reader); 297 | var results = []; 298 | reader 299 | .on('data', function(data) { 300 | results.push(data); 301 | }) 302 | .on('error', function(err) { 303 | console.error(err); 304 | done(err); 305 | }) 306 | .on('end', function() { 307 | results.should.eql([1,14,0,552]); 308 | done(); 309 | }); 310 | }) 311 | .append(1) 312 | .append(14) 313 | .append(0) 314 | .append(552) 315 | .end(); 316 | }); 317 | }); 318 | }); 319 | describe('Reader()', function(){ 320 | 321 | describe('streaming', function () { 322 | 323 | it('should read a large avro data stream compressed with deflate', function(done){ 324 | 325 | var count = 0; 326 | var fileStream = fs.createReadStream(__dirname + "/data/log.deflate.avro"); 327 | 328 | fileStream.pipe(DataFile.Reader()) 329 | .on('error', function(err) { 330 | done(err); 331 | }) 332 | .on('end', function(err) { 333 | count.should.equal(4096); 334 | done(); 335 | }) 336 | .on('header', function(data) { 337 | //console.log('\nHeader\n',util.inspect(data, {colors:true, depth:null})); 338 | data.should.not 339 | }) 340 | .on('data', function(data) { 341 | count++; 342 | //console.log(data.time, data.request.path, data.request.body.rememberMe || '[]' , data.response.status); 343 | }); 344 | }); 345 | 346 | it('should read a large avro data stream compressed with snappy', function(done){ 347 | 348 | var count = 0; 349 | var fileStream = fs.createReadStream(__dirname + "/data/log.snappy.avro"); 350 | 351 | fileStream.pipe(DataFile.Reader()) 352 | .on('error', function(err) { 353 | done(err); 354 | }) 355 | .on('end', function(err) { 356 | count.should.equal(4096); 357 | done(); 358 | }) 359 | .on('header', function(data) { 360 | //console.log('\nHeader\n',util.inspect(data, {colors:true, depth:null})); 361 | }) 362 | .on('data', function(data) { 363 | count++; 364 | //console.log(data.time, data.request.path, data.request.body.rememberMe || '[]' , data.response.status); 365 | }); 366 | }); 367 | }); 368 | 369 | describe('decompressData()', function(){ 370 | it('should compress a given buffer with deflate and return the compressed buffer', function(done){ 371 | var reader = DataFile.Reader(); 372 | reader.decompressData(new Buffer([0x13, 0x55, 0x35, 0x75, 0x0d, 0x4d, 0x05, 0x00]), "deflate", function(err, data) { 373 | data.equals(new Buffer([0x15, 0x25, 0x35, 0x45, 0x55, 0x65])).should.be.true; 374 | done(); 375 | }); 376 | }); 377 | it('should compress a given buffer with snappy and return the compressed buffer', function(done){ 378 | var reader = DataFile.Reader(); 379 | reader.decompressData(new Buffer([0x12, 0x44, 0x63, 0x6f, 0x6d, 0x70, 0x72, 0x65, 0x73, 380 | 0x73, 0x20, 0x74, 0x68, 0x69, 0x73, 0x20, 0x74, 0x65, 381 | 0x78, 0x74, 0x6c, 0x25, 0xd9, 0x04]), "snappy", function(err, data) { 382 | if (err) done(err); 383 | data.toString().should.equal("compress this text"); 384 | done(); 385 | }); 386 | }); 387 | it('should just return the same data if the codec is null', function(done){ 388 | var reader = DataFile.Reader(); 389 | reader.decompressData(new Buffer([0x13, 0x55, 0x35, 0x75, 0x0d, 0x4d, 0x05, 0x00]), "null", function(err, data) { 390 | data.equals(new Buffer([0x13, 0x55, 0x35, 0x75, 0x0d, 0x4d, 0x05, 0x00])).should.be.true; 391 | done(); 392 | }); 393 | }); 394 | it('should return an error if an unsupported codec is passed as a parameter', function(done) { 395 | var reader = DataFile.Reader(); 396 | reader.decompressData(new Buffer([0x13, 0x55, 0x35, 0x75]), "unsupported", function(err, data) { 397 | should.exist(err); 398 | err.should.be.an.instanceof(Error); 399 | done(); 400 | }); 401 | }) 402 | }) 403 | describe('writing then reading', function() { 404 | it('should read an avro data file written and return the same data', function(done){ 405 | 406 | var dataFile = __dirname + "/data/test-array-strings.avro"; 407 | var schema = "string"; 408 | var fileStream = fs.createWriteStream(dataFile); 409 | var writer = DataFile.Writer(schema); 410 | var source = [ 411 | "The quick brown fox jumped over the lazy dogs", 412 | "The time has come for all good men to come to the aid of...", 413 | "Humpty dumpty sat on the wall, humpty dumpty had a great fall..." 414 | ]; 415 | writer.pipe(fileStream); 416 | writer 417 | .on('error', function(err) { 418 | done(err); 419 | }) 420 | .on('close', function() { 421 | 422 | var fileStream = fs.createReadStream(dataFile); 423 | var reader = fileStream.pipe(DataFile.Reader()); 424 | 425 | reader.should.be.an.instanceof(DataFile.Reader); 426 | var count = 0; 427 | reader 428 | .on('data', function(data) { 429 | data.should.equal(source[count++]); 430 | }) 431 | .on('error', function(err) { 432 | console.error(err); 433 | done(err); 434 | }) 435 | .on('header', function(data) { 436 | //console.log(data); 437 | }) 438 | .on('end', function() { 439 | count.should.equal(3); 440 | done(); 441 | }); 442 | }) 443 | .append(source[0]) 444 | .append(source[1]) 445 | .append(source[2]) 446 | .end(); 447 | 448 | }); 449 | }); 450 | }); 451 | -------------------------------------------------------------------------------- /test/io.test.js: -------------------------------------------------------------------------------- 1 | var _ = require('lodash'); 2 | var should = require('should'); 3 | require('buffertools').extend(); 4 | 5 | var libpath = process.env['MOCHA_COV'] ? __dirname + '/../lib-cov/' : __dirname + '/../lib/'; 6 | var IO = require(libpath + 'io'); 7 | var DataFile = require(libpath + 'datafile'); 8 | var Avro = require(libpath + 'schema'); 9 | 10 | function randomString(length) { 11 | return new Buffer(new DataFile.Writer()._generateSyncMarker(32)).toString('base64'); 12 | } 13 | 14 | describe('IO', function(){ 15 | describe('BinaryEncoder()', function(){ 16 | var encoder, block; 17 | beforeEach(function(){ 18 | block = DataFile.Block(); 19 | encoder = IO.BinaryEncoder(block); 20 | }) 21 | afterEach(function(){ 22 | encoder = null; 23 | }) 24 | it('should throw an error if it is not passed an object to write to', function(){ 25 | (function() { 26 | var invalidEncoder = IO.BinaryEncoder(); 27 | }).should.throwError(); 28 | }); 29 | it('should throw an error if the object passed in does not implement the write() method', function() { 30 | (function() { 31 | var dummyBlock = { write: 0 }; 32 | var invalidEncoder = IO.BinaryEncoder(dummyBlock); 33 | }).should.throwError(); 34 | }); 35 | describe('writeByte()', function(){ 36 | it('should add a single octet to the buffer', function() { 37 | encoder.writeByte(50); 38 | block.toBuffer()[0].should.equal(50); 39 | // Test high bit 40 | encoder.writeByte(250); 41 | block.toBuffer()[1].should.equal(250); 42 | }); 43 | }); 44 | describe('writeNull()', function(){ 45 | it('should not add anything to the buffer', function(){ 46 | encoder.writeNull(); 47 | block.length.should.equal(0); 48 | }); 49 | }); 50 | describe('writeBoolean()', function(){ 51 | it('should add 1 or 0 to the buffer', function(){ 52 | encoder.writeBoolean(true); 53 | block.toBuffer()[0].should.equal(1); 54 | encoder.writeBoolean(false); 55 | block.toBuffer()[1].should.equal(0); 56 | }); 57 | }); 58 | describe('writeLong()', function(){ 59 | it('should encode a long using zig-zag encoding', function(){ 60 | encoder.writeLong(4); 61 | block.toBuffer()[0].should.equal(8); 62 | encoder.writeLong(138); 63 | block.toBuffer()[1].should.equal(148); 64 | block.toBuffer()[2].should.equal(2); 65 | }); 66 | 67 | // http://lucene.apache.org/core/3_5_0/fileformats.html#VInt 68 | it('should encode a long using variable-leng + zigzag encoding', function(){ 69 | encoder.writeLong(1425253517632); 70 | 71 | block.toBuffer()[0].should.equal(128); 72 | block.toBuffer()[1].should.equal(165); 73 | block.toBuffer()[2].should.equal(214); 74 | block.toBuffer()[3].should.equal(251); 75 | block.toBuffer()[4].should.equal(250); 76 | block.toBuffer()[5].should.equal(82); 77 | }); 78 | }); 79 | describe('writeFloat()', function(){ 80 | it('should encode a 32bit float in 4 bytes using java floatToIntBits method', function(){ 81 | encoder.writeFloat(1.3278991); 82 | block.toBuffer().equals(new Buffer([0x99, 0xf8, 0xa9, 0x3f])).should.be.true; 83 | }); 84 | }); 85 | describe('writeDouble()', function(){ 86 | it('should encode a 64bit float in 8 bytes using java doubleToLongBits method', function() { 87 | encoder.writeDouble(8.98928196620122323); 88 | block.toBuffer().equals(new Buffer([0xb3, 0xb6, 0x76, 0x2a, 0x83, 0xfa, 0x21, 0x40])).should.be.true; 89 | }); 90 | }); 91 | describe('writeBytes()', function(){ 92 | it('should be encoded as a long followed by that many bytes of data', function(){ 93 | var testBytes = new Buffer([255, 1, 254, 2, 253, 3]); 94 | encoder.writeBytes(testBytes); 95 | block.toBuffer()[0].should.equal(testBytes.length * 2); 96 | block.toBuffer()[5].should.equal(253); 97 | }); 98 | it('should throw an error if a buffer or array is not provided', function(){ 99 | (function() { 100 | encoder.writeBytes(4); 101 | }).should.throwError(); 102 | }) 103 | }); 104 | describe('writeString()', function(){ 105 | it('should be encoded as a long followed by that many bytes of UTF8 encoded character data', function(){ 106 | // Test UTF8 characters as well as normal 107 | var testString = "\u00A9 all rights reserved"; 108 | encoder.writeString(testString); 109 | block.toBuffer().equals(new Buffer([0x2c, 0xc2, 0xa9, 0x20, 0x61, 0x6c, 0x6c, 0x20, 110 | 0x72, 0x69, 0x67, 0x68, 0x74, 0x73, 0x20, 0x72, 0x65, 0x73, 0x65, 0x72, 0x76, 111 | 0x65, 0x64])).should.be.true; 112 | }); 113 | it('should throw an error if is not passed a string', function(){ 114 | (function() { 115 | encoder.writeString(21); 116 | }).should.throwError(); 117 | }) 118 | }); 119 | }); 120 | describe('BinaryDecoder()', function(){ 121 | var decoder, block; 122 | beforeEach(function(){ 123 | block = DataFile.Block(); 124 | decoder = IO.BinaryDecoder(block); 125 | }) 126 | afterEach(function(){ 127 | block = null; 128 | decoder = null; 129 | }) 130 | it('should throw an error if the constructor is not passed an input object', function(){ 131 | (function() { 132 | var invalidDecoder = IO.BinaryDecoder(); 133 | }).should.throwError(); 134 | }); 135 | it('should throw an error if the constructor is not passed an input object that implements the read method', function(){ 136 | (function() { 137 | var dummyBlock = { read: false }; 138 | var invalidDecoder = IO.BinaryDecoder(dummyBlock); 139 | }).should.throwError(); 140 | }); 141 | describe('readNull()', function(){ 142 | it('should decode and return a null', function(){ 143 | should.not.exist(decoder.readNull()); 144 | }); 145 | }); 146 | describe('readByte()', function(){ 147 | it('should decode and return an octet from the current position of the buffer', function(){ 148 | block.write(new Buffer([0x55])); 149 | decoder.readByte().should.equal(0x55); 150 | }) 151 | }) 152 | describe('readBoolean()', function(){ 153 | it('should decode and return true or false', function(){ 154 | block.write(new Buffer([0x01, 0x00])); 155 | decoder.readBoolean().should.be.true; 156 | decoder.readBoolean().should.be.false; 157 | }) 158 | }) 159 | describe('readLong()', function(){ 160 | it('should decode and return a long', function(){ 161 | block.write(new Buffer([0x94, 0x02])); 162 | decoder.readLong().should.equal(138); 163 | }) 164 | }) 165 | describe('readFloat()', function(){ 166 | it('should decode and return a 32bit float', function(){ 167 | block.write(new Buffer([0x99, 0xf8, 0xa9, 0x3f])); 168 | decoder.readFloat().toFixed(7).should.equal('1.3278991'); 169 | }) 170 | }) 171 | describe('readDouble()', function(){ 172 | it('should decode and return a 64bit float', function(){ 173 | block.write(new Buffer([0xb3, 0xb6, 0x76, 0x2a, 0x83, 0xfa, 0x21, 0x40])); 174 | decoder.readDouble().should.equal(8.98928196620122323); 175 | }) 176 | }) 177 | describe('readFixed()', function(){ 178 | it('should decode and return a fixed number of bytes', function(){ 179 | block.write(new Buffer([0x55, 0x66, 0x77, 0x88, 0x99, 0xAA, 0xBB, 0xCC])); 180 | decoder.readFixed(8).equals(new Buffer([0x55, 0x66, 0x77, 0x88, 0x99, 0xAA, 0xBB, 0xCC])).should.be.true; 181 | }) 182 | }) 183 | describe('readBytes()', function(){ 184 | it('should decode and return a set of bytes', function(){ 185 | block.write(new Buffer([0x08, 0x11, 0x22, 0x33, 0x44])); 186 | decoder.readBytes().equals(new Buffer([0x11, 0x22, 0x33, 0x44])); 187 | }) 188 | }) 189 | describe('readString()', function(){ 190 | it('should decode and return a string', function(){ 191 | block.write(new Buffer([0x2c, 0xc2, 0xa9, 0x20, 0x61, 0x6c, 0x6c, 0x20, 192 | 0x72, 0x69, 0x67, 0x68, 0x74, 0x73, 0x20, 0x72, 0x65, 0x73, 0x65, 0x72, 0x76, 193 | 0x65, 0x64])); 194 | decoder.readString().should.equal("\u00A9 all rights reserved"); 195 | }) 196 | }) 197 | describe('skipNull()', function(){ 198 | it('should be a no op since nulls are encoded a nothing', function(){ 199 | block.write(new Buffer([1])); 200 | decoder.skipNull(); 201 | block.remainingBytes.should.equal(1); 202 | }) 203 | }) 204 | describe('skipBoolean()', function(){ 205 | it('should skip a reading by 1 byte', function(){ 206 | block.write(new Buffer([1])); 207 | decoder.skipBoolean(); 208 | block.remainingBytes.should.equal(0); 209 | }); 210 | }) 211 | describe('skipLong()', function(){ 212 | it('should skip n bytes of a long encoded with zigzag encoding', function(){ 213 | block.write(new Buffer([0x94, 0x02])); 214 | decoder.skipLong(); 215 | block.remainingBytes.should.equal(0); 216 | block.write(new Buffer([0x02])); 217 | decoder.skipLong(); 218 | block.remainingBytes.should.equal(0) 219 | }) 220 | }) 221 | describe('skipFloat()', function(){ 222 | it('should skip 4 bytes of an encoded float', function(){ 223 | block.write(new Buffer([0x40, 0x50, 0x60, 0x70])); 224 | decoder.skipFloat(); 225 | block.remainingBytes.should.equal(0); 226 | }) 227 | }) 228 | describe('skipDouble()', function(){ 229 | it('should skip 8 bytes of an encoded double', function(){ 230 | block.write(new Buffer([0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xA0, 0xB0])); 231 | decoder.skipDouble(); 232 | block.remainingBytes.should.equal(0); 233 | }) 234 | }) 235 | describe('skipBytes()', function(){ 236 | it('should ', function(){ 237 | block.write(new Buffer([0x04, 0x64, 0x40])) 238 | decoder.skipBytes(); 239 | block.remainingBytes.should.equal(0); 240 | }) 241 | }) 242 | describe('skipString()', function(){ 243 | it('should skip a long followed by that many bytes', function(){ 244 | block.write(new Buffer([0x04, 0x4F, 0x4B])); 245 | decoder.skipString(); 246 | block.remainingBytes.should.equal(0); 247 | }); 248 | it('should skip a long followed by a UTF-8 encoded string', function(){ 249 | block.write(new Buffer([0x0c, 0xc2, 0xa9, 0x20, 0x61, 0x6c, 0x6c])); 250 | decoder.skipString(); 251 | block.remainingBytes.should.equal(0); 252 | }); 253 | }) 254 | }) 255 | describe('DatumWriter()', function() { 256 | it('should be initiated and store a schema', function(){ 257 | var schema = Avro.Schema("long"); 258 | var writer = IO.DatumWriter(schema); 259 | writer.writersSchema.should.equal(schema); 260 | }) 261 | describe('writeFixed()', function(){ 262 | it('should add a series of bytes specified by the schema', function(){ 263 | var schema = Avro.Schema({ 264 | "type": "fixed", 265 | "name": "telephone", 266 | "size": 10 267 | }); 268 | var block = DataFile.Block(); 269 | var writer = IO.DatumWriter(schema); 270 | var encoder = IO.BinaryEncoder(block); 271 | var testString = "1234567890"; 272 | writer.writeFixed(schema, testString, encoder); 273 | block.toBuffer().toString().should.equal(testString); 274 | block.toBuffer().length.should.equal(testString.length); 275 | }) 276 | }); 277 | describe('writeEnum()', function(){ 278 | it('should write an eneration encoded by its index', function(){ 279 | var schema = Avro.Schema({ 280 | "type": "enum", 281 | "name": "phonetics", 282 | "symbols": [ "Alpha", "Bravo", "Charlie", "Delta"] 283 | }); 284 | var block = DataFile.Block(); 285 | var writer = IO.DatumWriter(schema); 286 | var encoder = IO.BinaryEncoder(block); 287 | writer.writeEnum(schema, "Charlie", encoder); 288 | writer.writeEnum(schema, "Delta", encoder); 289 | block.toBuffer()[0].should.equal(4); 290 | block.toBuffer()[1].should.equal(6); 291 | }); 292 | }); 293 | describe('writeArray()', function(){ 294 | it('should encode an array as a series of blocks, each block consists of a long count value, followed by that many array items, a block with count zero indicates the end of the array', function(){ 295 | var schema = Avro.Schema({ 296 | "type": "array", 297 | "items": "long" 298 | }); 299 | var block = DataFile.Block(); 300 | var writer = IO.DatumWriter(schema); 301 | var encoder = IO.BinaryEncoder(block); 302 | var testArray = [10, 20, 30, 40, 50]; 303 | writer.writeArray(schema, testArray, encoder); 304 | block.toBuffer().equals(new Buffer([testArray.length * 2, 20, 40, 60, 80, 100, 0])).should.be.true; 305 | }) 306 | }); 307 | describe('writeMap()', function(){ 308 | it('should write a map encoded as a series of blocks, each block consists of a long count, followed by that many key/value pairs, a block count of 0 indicates the end of the map', function(){ 309 | var schema = Avro.Schema({ 310 | "name": "headers", 311 | "type": { 312 | "type": "map", 313 | "values": "string" 314 | } 315 | }); 316 | var data = { 317 | "user-agent": "firefox", 318 | "remote-ip": "10.0.0.0", 319 | "content-type": "applicaiton/json" 320 | } 321 | var block = DataFile.Block(); 322 | var writer = IO.DatumWriter(schema); 323 | var encoder = IO.BinaryEncoder(block); 324 | writer.writeMap(schema, data, encoder); 325 | var i = 0; 326 | block.toBuffer()[i++].should.equal(_.size(data) * 2); // zig-zag encoding 327 | _.each(data, function(value, key) { 328 | block.toBuffer()[i++].should.equal(key.length * 2); // zig-zag encoding 329 | block.toBuffer().slice(i,i + key.length).toString().should.equal(key); 330 | i += key.length; 331 | block.toBuffer()[i++].should.equal(value.length * 2); // zig-zag encoding 332 | block.toBuffer().slice(i,i + value.length).toString().should.equal(value); 333 | i += value.length; 334 | }) 335 | }); 336 | }); 337 | describe('writeUnion()', function(){ 338 | it('should encode a union by first writing a long value indicating the zero-based position within the union of the schema of its value, followed by the encoded value according to that schema', function(){ 339 | var schema = Avro.Schema([ "string", "int" ]); 340 | var data = "testing a union"; 341 | var block = DataFile.Block(); 342 | var writer = IO.DatumWriter(schema); 343 | var encoder = IO.BinaryEncoder(block); 344 | writer.writeUnion(schema, data, encoder); 345 | block.toBuffer().length.should.equal(data.length + 2); 346 | block.toBuffer()[0].should.equal(0); 347 | block.toBuffer()[1].should.equal(data.length * 2); 348 | block.toBuffer().slice(2).toString().should.equal(data); 349 | block.flush(); 350 | writer.writeUnion(schema, 44, encoder); 351 | block.toBuffer().length.should.equal(2); 352 | block.toBuffer()[0].should.equal(2); 353 | block.toBuffer()[1].should.equal(44 * 2); 354 | }); 355 | }); 356 | describe('writeRecord()', function(){ 357 | it('should encode a record by encoding the values of its fields in the order that they are declared', function(){ 358 | var schema = Avro.Schema({ 359 | "name": "user", 360 | "type": "record", 361 | "fields": [ 362 | {"name":"firstName","type": "string"}, 363 | {"name":"lastName","type": "string"}, 364 | {"name":"age","type": "int"} 365 | ] 366 | }); 367 | var data = { 368 | "firstName": "bob", 369 | "lastName": "the_builder", 370 | "age": 40 371 | } 372 | var block = DataFile.Block(); 373 | var writer = IO.DatumWriter(schema); 374 | var encoder = IO.BinaryEncoder(block); 375 | writer.writeRecord(schema, data, encoder); 376 | block.toBuffer()[0].should.equal(data.firstName.length * 2); // zig-zag encoding 377 | block.toBuffer().slice(1,4).toString().should.equal(data.firstName); 378 | block.toBuffer()[4].should.equal(data.lastName.length * 2); // zig-zag encoding 379 | block.toBuffer().slice(5,16).toString().should.equal(data.lastName); 380 | block.toBuffer()[16].should.equal(data.age * 2); 381 | }) 382 | }); 383 | 384 | describe('bad writeRecord()', function(){ 385 | it('should encode a record by encoding the values of its fields in the order that they are declared', function(){ 386 | var schema = Avro.Schema({ 387 | "name": "user", 388 | "type": "record", 389 | "fields": [ 390 | {"name":"firstName","type": "string"}, 391 | {"name":"lastName","type": "string"}, 392 | {"name":"nest","type": { 393 | "name":"nest", 394 | "type": "record", 395 | "fields": [{"name":"nField","type": "int"}] 396 | }}, 397 | {"name":"age","type": "int"} 398 | ] 399 | }); 400 | var data = { 401 | "firstName": "bob", 402 | "lastName": "the_builder", 403 | "nest": {nField: "badString"}, 404 | "extra": "foo", 405 | "age": 40 406 | } 407 | var block = DataFile.Block(); 408 | var writer = IO.DatumWriter(schema); 409 | var encoder = IO.BinaryEncoder(block); 410 | var thrown = false; 411 | try { 412 | writer.writeRecord(schema, data, encoder); 413 | } catch (err){ 414 | err.fieldPath[0].should.equal("nest"); 415 | err.fieldPath[1].should.equal("nField"); 416 | thrown = true; 417 | } 418 | 419 | thrown.should.equal(true); 420 | }) 421 | }); 422 | 423 | describe('write()', function(){ 424 | it('should encode an int/long with zig-zag encoding', function() { 425 | var schema = Avro.Schema({ 426 | "type": "int" 427 | }); 428 | var block = DataFile.Block(); 429 | var writer = IO.DatumWriter(schema); 430 | var encoder = IO.BinaryEncoder(block); 431 | writer.write(-64, encoder); 432 | block.toBuffer()[0].should.equal(127); 433 | }); 434 | it('should encode a string as a long of its length, followed by the utf8 encoded string', function(){ 435 | var schema = Avro.Schema({ 436 | "type": "string" 437 | }); 438 | var block = DataFile.Block(); 439 | var writer = IO.DatumWriter(schema); 440 | var encoder = IO.BinaryEncoder(block); 441 | writer.write("testing", encoder); 442 | block.toBuffer().toString().should.equal("\u000etesting"); 443 | }); 444 | it('should encode a record as the values of its fields in the order of declaration', function(){ 445 | var schema = Avro.Schema({ 446 | "type" : "record", 447 | "name" : "IntStringRecord", 448 | "fields" : [ { "name" : "intField", "type" : "int" }, 449 | { "name" : "stringField", "type" : "string" }] 450 | }); 451 | var block = DataFile.Block(); 452 | var writer = IO.DatumWriter(schema); 453 | var encoder = IO.BinaryEncoder(block); 454 | var record = { 455 | intField: 1, 456 | stringField: "abc" 457 | }; 458 | writer.write(record, encoder); 459 | block.toBuffer().toString().should.equal("\u0002\u0006abc"); 460 | }); 461 | it('should encode a union as a long of the zero-based schema position, followed by the value according to the schema at that position', function(){ 462 | var schema = Avro.Schema([ 463 | "int", 464 | "string", 465 | "null" 466 | ]); 467 | var block = DataFile.Block(); 468 | var writer = IO.DatumWriter(schema); 469 | var encoder = IO.BinaryEncoder(block); 470 | var record = "test"; 471 | writer.write(record, encoder); 472 | block.toBuffer().toString().should.equal("\u0002\u0008test"); 473 | block.flush(); 474 | var record = null; 475 | writer.write(record, encoder); 476 | block.toBuffer()[0].should.equal(4); 477 | }); 478 | it('should encode a union of a enum with null type and enum', function(){ 479 | var schema = Avro.Schema( 480 | [ 481 | "null", 482 | { 483 | "type" : "enum", 484 | "name" : "a_enum", 485 | "symbols": ["enum_1", "enum_2"] 486 | } 487 | ] 488 | ); 489 | var block = DataFile.Block(); 490 | var writer = IO.DatumWriter(schema); 491 | var encoder = IO.BinaryEncoder(block); 492 | var record = "enum_1"; 493 | writer.write(record, encoder); 494 | block.toBuffer().toString().should.equal("\u0002\u0000"); 495 | block.flush(); 496 | var record = null; 497 | writer.write(record, encoder); 498 | block.toBuffer()[0].should.equal(0); 499 | }); 500 | it('should encode a union of a array with null type and enum', function(){ 501 | var schema = Avro.Schema( 502 | [ 503 | { 504 | "type": "array", 505 | "items": "string" 506 | }, 507 | "null" 508 | ] 509 | ); 510 | var block = DataFile.Block(); 511 | var writer = IO.DatumWriter(schema); 512 | var encoder = IO.BinaryEncoder(block); 513 | var record = ['testStr']; 514 | writer.write(record, encoder); 515 | block.toBuffer().equals(new Buffer([0,2,14,116,101,115,116,83,116,114,0])).should.be.true; 516 | block.flush(); 517 | var record = null; 518 | writer.write(record, encoder); 519 | block.toBuffer()[0].should.equal(2); 520 | }); 521 | it('should encode a union of a array with null type and object', function () { 522 | var schema = Avro.Schema( 523 | [ 524 | "null", 525 | { 526 | "type": "record", 527 | "name": "nested_record", 528 | "fields": [ 529 | {"name": "field1", "type": ["string", "null"], "default": ""}, 530 | {"name": "field2", "type": ["int", "null"], "default": 0} 531 | ] 532 | } 533 | ] 534 | ); 535 | var block = DataFile.Block(); 536 | var writer = IO.DatumWriter(schema); 537 | var encoder = IO.BinaryEncoder(block); 538 | var record = { 539 | "field1": "data1", 540 | "field2": 23 541 | }; 542 | writer.write(record, encoder); 543 | block.toBuffer().equals(new Buffer([2, 0, 10, 100, 97, 116, 97, 49, 0, 46])).should.be.true; 544 | block.flush(); 545 | var record = null; 546 | writer.write(record, encoder); 547 | block.toBuffer()[0].should.equal(0); 548 | }); 549 | it('should encode a union of a array with null type and map', function () { 550 | var schema = Avro.Schema( 551 | [ 552 | "null", 553 | { 554 | "type": "map", 555 | "values": "string" 556 | } 557 | ] 558 | ); 559 | var block = DataFile.Block(); 560 | var writer = IO.DatumWriter(schema); 561 | var encoder = IO.BinaryEncoder(block); 562 | var record = { 563 | "Archer": "Boop" 564 | }; 565 | 566 | writer.write(record, encoder); 567 | console.log(block.toBuffer().toJSON()); 568 | block.toBuffer().equals(new Buffer([ 2, 2, 12, 65, 114, 99, 104, 101, 114, 8, 66, 111, 111, 112, 0 ])).should.be.true; 569 | block.toBuffer().slice(10,14).toString().should.equal(record.Archer); 570 | block.flush(); 571 | 572 | var record = null; 573 | writer.write(record, encoder); 574 | block.toBuffer()[0].should.equal(0); 575 | }); 576 | 577 | 578 | 579 | it('should encode a nested schema', function() { 580 | var schema = Avro.Schema({ 581 | "fields": [ 582 | { 583 | "name": "host", 584 | "type": "string" 585 | }, 586 | { 587 | "name": "time", 588 | "type": "string" 589 | }, 590 | { 591 | "name": "elapsedTime", 592 | "type": "long" 593 | }, 594 | { 595 | "name": "request", 596 | "type": { 597 | "name": "Request", 598 | "type": "record", 599 | "fields": [ 600 | { 601 | "name": "headers", 602 | "type": { 603 | "type": "map", 604 | "values": "string" 605 | } 606 | }, 607 | { 608 | "name": "method", 609 | "type": "string" 610 | }, 611 | { 612 | "name": "path", 613 | "type": "string" 614 | }, 615 | { 616 | "name": "queryString", 617 | "type": [ 618 | "string", 619 | "null" 620 | ] 621 | }, 622 | { 623 | "name": "body", 624 | "type": { 625 | "type": "map", 626 | "values": "string" 627 | } 628 | } 629 | ] 630 | } 631 | }, 632 | { 633 | "name": "exception", 634 | "type": [ 635 | { 636 | "fields": [ 637 | { 638 | "name": "class", 639 | "type": "string" 640 | }, 641 | { 642 | "name": "message", 643 | "type": "string" 644 | }, 645 | { 646 | "name": "stackTrace", 647 | "type": [ 648 | "null", 649 | "string" 650 | ] 651 | } 652 | ], 653 | "name": "AppException", 654 | "type": "record" 655 | }, 656 | "null" 657 | ] 658 | } 659 | ], 660 | "name": "LogEvent", 661 | "namespace": "e.d.c.b.a", 662 | "type": "record" 663 | }); 664 | var block = DataFile.Block(); 665 | var writer = IO.DatumWriter(schema); 666 | var encoder = IO.BinaryEncoder(block); 667 | var log = { 668 | host: "testhostA", 669 | time: "1970-01-01T00:00Z", 670 | elapsedTime: 123456789, 671 | request: { 672 | headers: { 673 | "user-agent": "firefox", 674 | "remote-ip": "0.0.0.0" 675 | }, 676 | method: "GET", 677 | path: "/basepath/object", 678 | queryString: "param1=test1¶m2=test2", 679 | body: {} 680 | }, 681 | exception: { 682 | "class": "org.apache.avro", 683 | message: "An error occurred", 684 | stackTrace: "failed at line 1" 685 | } 686 | } 687 | writer.write(log, encoder); 688 | block.toBuffer().equals(new Buffer([0x12, 0x74, 0x65, 0x73, 0x74, 689 | 0x68, 0x6f, 0x73, 0x74, 0x41, 0x22, 0x31, 0x39, 690 | 0x37, 0x30, 0x2d, 0x30, 0x31, 0x2d, 0x30, 0x31, 691 | 0x54, 0x30, 0x30, 0x3a, 0x30, 0x30, 0x5a, 0xaa, 692 | 0xb4, 0xde, 0x75, 0x04, 0x14, 0x75, 0x73, 0x65, 693 | 0x72, 0x2d, 0x61, 0x67, 0x65, 0x6e, 0x74, 0x0e, 694 | 0x66, 0x69, 0x72, 0x65, 0x66, 0x6f, 0x78, 0x12, 695 | 0x72, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x2d, 0x69, 696 | 0x70, 0x0e, 0x30, 0x2e, 0x30, 0x2e, 0x30, 0x2e, 697 | 0x30, 0x00, 0x06, 0x47, 0x45, 0x54, 0x20, 0x2f, 698 | 0x62, 0x61, 0x73, 0x65, 0x70, 0x61, 0x74, 0x68, 699 | 0x2f, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x00, 700 | 0x32, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x31, 0x3d, 701 | 0x74, 0x65, 0x73, 0x74, 0x31, 0x26, 0x70, 0x61, 702 | 0x72, 0x61, 0x6d, 0x32, 0x3d, 0x74, 0x65, 0x73, 703 | 0x74, 0x32, 0x00, 0x00, 0x1e, 0x6f, 0x72, 0x67, 704 | 0x2e, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 705 | 0x61, 0x76, 0x72, 0x6f, 0x22, 0x41, 0x6e, 0x20, 706 | 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x6f, 0x63, 707 | 0x63, 0x75, 0x72, 0x72, 0x65, 0x64, 0x02, 0x20, 708 | 0x66, 0x61, 0x69, 0x6c, 0x65, 0x64, 0x20, 0x61, 709 | 0x74, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x31])).should.be.true; 710 | }) 711 | 712 | }); 713 | 714 | it('should encode a record with a nested record correctly', function(){ 715 | var schema = Avro.Schema({ 716 | "name": "user", 717 | "type": "record", 718 | "fields": [ 719 | {"name":"firstName","type": "string"}, 720 | {"name":"lastName","type": "string"}, 721 | {"name":"age","type": "int"}, 722 | { 723 | "name":"dog", 724 | "type": "record", 725 | "fields": [ 726 | {"name":"name","type": "string"}, 727 | {"name":"age","type": "int"} 728 | ] 729 | } 730 | ] 731 | }); 732 | var data = { 733 | "firstName": "bob", 734 | "lastName": "the_builder", 735 | "age": 40, 736 | "dog": {name: "sparky", "age": 3} 737 | }; 738 | var block = DataFile.Block(); 739 | var writer = IO.DatumWriter(schema); 740 | var encoder = IO.BinaryEncoder(block); 741 | writer.write(data, encoder); 742 | block.toBuffer().equals(new Buffer([ 743 | 0x06, 0x62, 0x6f, 0x62, 0x16, 0x74, 0x68, 0x65, 0x5f, 0x62, 0x75, 0x69, 744 | 0x6c, 0x64, 0x65, 0x72, 0x50, 0x0c, 0x73, 0x70, 0x61, 0x72, 0x6b, 0x79, 745 | 0x06])).should.be.true; 746 | }); 747 | 748 | it('should encode a record with two nested records correctly', function(){ 749 | var schema = Avro.Schema({ 750 | "name": "user", 751 | "type": "record", 752 | "fields": [ 753 | {"name":"firstName","type": "string"}, 754 | {"name":"lastName","type": "string"}, 755 | {"name":"age","type": "int"}, 756 | { 757 | "name":"dog", 758 | "type": "record", 759 | "fields": [ 760 | {"name":"name","type": "string"}, 761 | {"name":"age","type": "int"} 762 | ] 763 | }, 764 | {"name":"dog2","type": "dog"} 765 | ] 766 | }); 767 | var data = { 768 | "firstName": "bob", 769 | "lastName": "the_builder", 770 | "age": 40, 771 | "dog": {name: "sparky", "age": 3}, 772 | "dog2": {name: "spot", "age": 1} 773 | }; 774 | var block = DataFile.Block(); 775 | var writer = IO.DatumWriter(schema); 776 | var encoder = IO.BinaryEncoder(block); 777 | writer.write(data, encoder); 778 | block.toBuffer().equals(new Buffer([ 779 | 0x06, 0x62, 0x6f, 0x62, 0x16, 0x74, 0x68, 0x65, 0x5f, 0x62, 780 | 0x75, 0x69, 0x6c, 0x64, 0x65, 0x72, 0x50, 0x0c, 0x73, 0x70, 781 | 0x61, 0x72, 0x6b, 0x79, 0x06, 0x08, 0x73, 0x70, 0x6f, 0x74, 0x02])).should.be.true; 782 | }); 783 | 784 | it('should encode a schema with another dependent schema', function() { 785 | var child = Avro.Schema({ 786 | "name": "Request", 787 | "namespace": "e.d.c.b.a", 788 | "type": "record", 789 | "fields": [ 790 | { 791 | "name": "headers", 792 | "type": { 793 | "type": "map", 794 | "values": "string" 795 | } 796 | }, 797 | { 798 | "name": "method", 799 | "type": "string" 800 | }, 801 | { 802 | "name": "path", 803 | "type": "string" 804 | }, 805 | { 806 | "name": "queryString", 807 | "type": [ 808 | "string", 809 | "null" 810 | ] 811 | }, 812 | { 813 | "name": "body", 814 | "type": { 815 | "type": "map", 816 | "values": "string" 817 | } 818 | } 819 | ] 820 | }); 821 | var parent = Avro.Schema({ 822 | "fields": [ 823 | { 824 | "name": "host", 825 | "type": "string" 826 | }, 827 | { 828 | "name": "time", 829 | "type": "string" 830 | }, 831 | { 832 | "name": "elapsedTime", 833 | "type": "long" 834 | }, 835 | { 836 | "name": "request", 837 | "type": "e.d.c.b.a.Request" 838 | }, 839 | { 840 | "name": "exception", 841 | "type": [ 842 | { 843 | "fields": [ 844 | { 845 | "name": "class", 846 | "type": "string" 847 | }, 848 | { 849 | "name": "message", 850 | "type": "string" 851 | }, 852 | { 853 | "name": "stackTrace", 854 | "type": [ 855 | "null", 856 | "string" 857 | ] 858 | } 859 | ], 860 | "name": "AppException", 861 | "type": "record" 862 | }, 863 | "null" 864 | ] 865 | } 866 | ], 867 | "name": "LogEvent", 868 | "namespace": "e.d.c.b.a", 869 | "type": "record" 870 | }, 'f.e.d.c.b.a', [child]); 871 | var block = DataFile.Block(); 872 | var writer = IO.DatumWriter(parent); 873 | var encoder = IO.BinaryEncoder(block); 874 | var log = { 875 | host: "testhostA", 876 | time: "1970-01-01T00:00Z", 877 | elapsedTime: 123456789, 878 | request: { 879 | headers: { 880 | "user-agent": "firefox", 881 | "remote-ip": "0.0.0.0" 882 | }, 883 | method: "GET", 884 | path: "/basepath/object", 885 | queryString: "param1=test1¶m2=test2", 886 | body: {} 887 | }, 888 | exception: { 889 | "class": "org.apache.avro", 890 | message: "An error occurred", 891 | stackTrace: "failed at line 1" 892 | } 893 | } 894 | writer.write(log, encoder); 895 | block.toBuffer().equals(new Buffer([0x12, 0x74, 0x65, 0x73, 0x74, 896 | 0x68, 0x6f, 0x73, 0x74, 0x41, 0x22, 0x31, 0x39, 897 | 0x37, 0x30, 0x2d, 0x30, 0x31, 0x2d, 0x30, 0x31, 898 | 0x54, 0x30, 0x30, 0x3a, 0x30, 0x30, 0x5a, 0xaa, 899 | 0xb4, 0xde, 0x75, 0x04, 0x14, 0x75, 0x73, 0x65, 900 | 0x72, 0x2d, 0x61, 0x67, 0x65, 0x6e, 0x74, 0x0e, 901 | 0x66, 0x69, 0x72, 0x65, 0x66, 0x6f, 0x78, 0x12, 902 | 0x72, 0x65, 0x6d, 0x6f, 0x74, 0x65, 0x2d, 0x69, 903 | 0x70, 0x0e, 0x30, 0x2e, 0x30, 0x2e, 0x30, 0x2e, 904 | 0x30, 0x00, 0x06, 0x47, 0x45, 0x54, 0x20, 0x2f, 905 | 0x62, 0x61, 0x73, 0x65, 0x70, 0x61, 0x74, 0x68, 906 | 0x2f, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x00, 907 | 0x32, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x31, 0x3d, 908 | 0x74, 0x65, 0x73, 0x74, 0x31, 0x26, 0x70, 0x61, 909 | 0x72, 0x61, 0x6d, 0x32, 0x3d, 0x74, 0x65, 0x73, 910 | 0x74, 0x32, 0x00, 0x00, 0x1e, 0x6f, 0x72, 0x67, 911 | 0x2e, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 912 | 0x61, 0x76, 0x72, 0x6f, 0x22, 0x41, 0x6e, 0x20, 913 | 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x6f, 0x63, 914 | 0x63, 0x75, 0x72, 0x72, 0x65, 0x64, 0x02, 0x20, 915 | 0x66, 0x61, 0x69, 0x6c, 0x65, 0x64, 0x20, 0x61, 916 | 0x74, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x31])).should.be.true; 917 | }) 918 | }); 919 | describe('DatumReader()', function(){ 920 | var block, decoder; 921 | beforeEach(function(){ 922 | block = DataFile.Block(); 923 | decoder = IO.BinaryDecoder(block); 924 | }); 925 | describe('read()', function(){ 926 | it('should set the readersSchema to the writersSchema if readersSchema is null', function(){ 927 | var schema = Avro.Schema("int"); 928 | var reader = IO.DatumReader(schema, null); 929 | block.write(new Buffer([0x06])); 930 | var result = reader.read(decoder); 931 | result.should.equal(3); 932 | reader.writersSchema.should.equal(reader.readersSchema); 933 | }); 934 | }); 935 | describe('readData()', function(){ 936 | var schema = Avro.Schema({ 937 | "name": "testRecord", 938 | "type": "record", 939 | "fields": [ 940 | {"name":"testNull","type": "null"}, 941 | {"name":"testBoolean","type": "boolean"}, 942 | {"name":"testString","type": "string"}, 943 | {"name":"testInt","type": "int"}, 944 | {"name":"testLong","type": "long"}, 945 | {"name":"testFloat","type": "float"}, 946 | {"name":"testDouble","type": "double"}, 947 | {"name":"testBytes","type": "bytes"}, 948 | {"name":"testFixed","type": "fixed", "size": 5}, 949 | {"name":"testEnum","type": "enum", "symbols": ["Alpha", "Bravo", "Charlie", "Delta"]}, 950 | {"name":"testArray","type": "array", "items": "long"}, 951 | {"name":"testMap","type": { "type":"map", "values": "int"}}, 952 | {"name":"testUnion","type":["string", "int", "null"]} 953 | ] 954 | }); 955 | schema.should.be.an.instanceof(Avro.RecordSchema); 956 | var reader = IO.DatumReader(schema); 957 | var block = DataFile.Block(); 958 | var decoder = IO.BinaryDecoder(block); 959 | block.write(new Buffer([/*purposely blank*/ 960 | 0x01, 961 | 0x08, 0x74, 0x65, 0x73, 0x74, 962 | 0x08, 963 | 0x94, 0x02, 964 | 0x99, 0xf8, 0xa9, 0x3f, 965 | 0xb3, 0xb6, 0x76, 0x2a, 0x83, 0xfa, 0x21, 0x40, 966 | 0x0c, 0xF4, 0x44, 0x45, 0x7f, 0x28, 0x6C, 967 | 0x19, 0x69, 0x29, 0x3f, 0xff, 968 | 0x04, 969 | 0x08, 0x14, 0x69, 0x10, 0xF1, 0x01, 0x00, 970 | 0x06, 0x06, 0x6f, 0x6e, 0x65, 0x20, 0x06, 0x74, 0x77, 0x6f, 0x10, 0x0a, 0x74, 0x68, 0x72, 0x65, 0x65, 0x40, 0x00, 971 | 0x04])); 972 | it('should read and decode a null', function(){ 973 | var result = reader.readData(schema.fieldsHash["testNull"].type, null, decoder); 974 | should.not.exist(result); 975 | block.offset.should.equal(0); 976 | }); 977 | it('should read and decode a boolean', function(){ 978 | var result = reader.readData(schema.fieldsHash["testBoolean"].type, null, decoder); 979 | result.should.equal(true); 980 | }); 981 | it('should read and decode a string', function(){ 982 | var result = reader.readData(schema.fieldsHash["testString"].type, null, decoder); 983 | result.should.equal("test"); 984 | }); 985 | it('should read and decode an int', function(){ 986 | var result = reader.readData(schema.fieldsHash["testInt"].type, null, decoder); 987 | result.should.equal(4); 988 | }); 989 | it('should read and decode a long', function(){ 990 | var result = reader.readData(schema.fieldsHash["testLong"].type, null, decoder); 991 | result.should.equal(138); 992 | }); 993 | it('should read and decode a float', function(){ 994 | var result = reader.readData(schema.fieldsHash["testFloat"].type, null, decoder); 995 | result.toFixed(7).should.equal('1.3278991') 996 | }); 997 | it('should read and decode a double', function(){ 998 | var result = reader.readData(schema.fieldsHash["testDouble"].type, null, decoder); 999 | result.should.equal(8.98928196620122323); 1000 | }); 1001 | it('should read and decode bytes', function(){ 1002 | var result = reader.readData(schema.fieldsHash["testBytes"].type, null, decoder); 1003 | result.equals(new Buffer([0xF4, 0x44, 0x45, 0x7f, 0x28, 0x6C])).should.be.true; 1004 | result.length.should.equal(6); 1005 | }); 1006 | it('should read and decode a fixed', function(){ 1007 | var result = reader.readData(schema.fieldsHash["testFixed"].type, null, decoder); 1008 | result.equals(new Buffer([0x19, 0x69, 0x29, 0x3f, 0xff])).should.be.true; 1009 | result.length.should.equal(5); 1010 | }); 1011 | it('should read and decode an enum', function(){ 1012 | var result = reader.readData(schema.fieldsHash["testEnum"].type, null, decoder); 1013 | result.should.equal("Charlie"); 1014 | }); 1015 | it('should read and decode an array', function(){ 1016 | var result = reader.readData(schema.fieldsHash["testArray"].type, null, decoder); 1017 | result.should.eql([10, -53, 8, -121]); 1018 | result.length.should.equal(4); 1019 | }); 1020 | it('should read and decode a map', function(){ 1021 | var result = reader.readData(schema.fieldsHash["testMap"].type, null, decoder); 1022 | result.should.have.property("one", 0x10); 1023 | result.should.have.property("two", 8); 1024 | result.should.have.property("three", 0x20); 1025 | _.size(result).should.equal(3); 1026 | }); 1027 | it('should read and decode a union', function(){ 1028 | var result = reader.readData(schema.fieldsHash["testUnion"].type, null, decoder); 1029 | should.not.exist(result); 1030 | }); 1031 | it('should read and decode a record', function(){ 1032 | block.rewind(); 1033 | var result = reader.readData(schema, null, decoder); 1034 | result.should.have.property("testMap"); 1035 | var map = result.testMap; 1036 | map.should.have.property("one", 0x10); 1037 | }); 1038 | it('should throw an error if an unrecognized schema type is provided', function(){ 1039 | (function() { 1040 | reader.readData(Avro.schema({"type":"invalid"}), null, decoder); 1041 | }).should.throwError(); 1042 | }); 1043 | it('should throw an error if the writersSchema provided is not a Schema object', function(){ 1044 | (function() { 1045 | reader.readData("invalid", null, decoder); 1046 | }).should.throwError(); 1047 | }); 1048 | it('should throw an error if the readersSchema provided is not a Schema object', function(){ 1049 | (function() { 1050 | reader.readData(Avro.schema({"type":"string"}), "invalid", decoder); 1051 | }).should.throwError(); 1052 | }); 1053 | }) 1054 | describe('readEnum()', function(){ 1055 | it('should decode and return an enumerated type from the end of the list', function(){ 1056 | var schema = Avro.Schema({ 1057 | "type": "enum", 1058 | "name": "phonetics", 1059 | "symbols": [ "Alpha", "Bravo", "Charlie", "Delta"] 1060 | }); 1061 | var reader = IO.DatumReader(schema); 1062 | block.write(new Buffer([0x06])); 1063 | reader.readEnum(schema, schema, decoder).should.equal("Delta"); 1064 | }) 1065 | it('should decode and return an enumerated type from the start of the list', function(){ 1066 | var schema = Avro.Schema({ 1067 | "type": "enum", 1068 | "name": "phonetics", 1069 | "symbols": [ "Alpha", "Bravo", "Charlie", "Delta"] 1070 | }); 1071 | var reader = IO.DatumReader(schema); 1072 | block.write(new Buffer([0x00])); 1073 | reader.readEnum(schema, schema, decoder).should.equal("Alpha"); 1074 | }) 1075 | }) 1076 | describe('readArray()', function(){ 1077 | it('should decode and return an array', function(){ 1078 | var schema = Avro.Schema({ 1079 | "type": "array", 1080 | "items": "string" 1081 | }); 1082 | var data = ["apples", "banannas", "oranges", "pears", "grapes"]; 1083 | var reader = IO.DatumReader(schema); 1084 | block.write(new Buffer([0x0a, 0x0c, 0x61, 0x70, 0x70, 0x6c, 0x65, 0x73, 0x10, 0x62, 0x61, 1085 | 0x6e, 0x61, 0x6e, 0x6e, 0x61, 0x73, 0x0e, 0x6f, 0x72, 0x61, 0x6e, 1086 | 0x67, 0x65, 0x73, 0x0a, 0x70, 0x65, 0x61, 0x72, 0x73, 0x0c, 0x67, 1087 | 0x72, 0x61, 0x70, 0x65, 0x73, 0x00])); 1088 | reader.readArray(schema, schema, decoder).should.eql(data); 1089 | }) 1090 | }) 1091 | describe('readMap()', function(){ 1092 | it('should decode a map and return a json object containing the data', function(){ 1093 | var schema = Avro.Schema({ 1094 | "name": "headers", 1095 | "type": { 1096 | "type": "map", 1097 | "values": "string" 1098 | } 1099 | }); 1100 | var data = [ 6, 20, 117, 115, 101, 114, 45, 97, 103, 101, 110, 116, 14, 102, 105, 114, 101, 1101 | 102, 111, 120, 18, 114, 101, 109, 111, 116, 101, 45, 105, 112, 16, 49, 48, 46, 1102 | 48, 46, 48, 46, 48, 24, 99, 111, 110, 116, 101, 110, 116, 45, 116, 121, 112, 1103 | 101, 32, 97, 112, 112, 108, 105, 99, 97, 105, 116, 111, 110, 47, 106, 115, 111, 1104 | 110, 0]; 1105 | block.write(new Buffer(data)); 1106 | var reader = IO.DatumReader(schema); 1107 | var map = reader.readMap(schema, schema, decoder); 1108 | map.should.have.property("user-agent", "firefox"); 1109 | map.should.have.property("remote-ip", "10.0.0.0"); 1110 | map.should.have.property("content-type", "applicaiton/json"); 1111 | }); 1112 | }) 1113 | describe('readUnion()', function(){ 1114 | it('should decode a union by returning the object specified by the schema of the unions index', function(){ 1115 | var schema = Avro.Schema([ 1116 | "int", 1117 | "string", 1118 | "null" 1119 | ]); 1120 | var reader = IO.DatumReader(schema); 1121 | block.write(new Buffer([0x02, 0x08, 0x74, 0x65, 0x73, 0x74])); 1122 | var result = reader.readUnion(schema, schema, decoder); 1123 | (result === "test").should.be.true; 1124 | }) 1125 | }) 1126 | describe('readRecord()', function(){ 1127 | it('should decode a record and return a json object containing the data', function(){ 1128 | var schema = Avro.Schema({ 1129 | "name": "user", 1130 | "type": "record", 1131 | "fields": [ 1132 | {"name":"firstName","type": "string"}, 1133 | {"name":"lastName","type": "string"}, 1134 | {"name":"age","type": "int"} 1135 | ] 1136 | }); 1137 | block.write(new Buffer([0x06, 0x62, 0x6f, 0x62, 0x16, 0x74, 0x68, 0x65, 0x5f, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x65, 0x72, 0x50])); 1138 | var reader = IO.DatumReader(schema); 1139 | var record = reader.readRecord(schema, schema, decoder); 1140 | record.should.have.property("firstName", "bob"); 1141 | record.should.have.property("lastName", "the_builder"); 1142 | record.should.have.property("age", 40); 1143 | }); 1144 | it('should decode a record with a nested record and return a json object containing the data', function(){ 1145 | var schema = Avro.Schema({ 1146 | "name": "user", 1147 | "type": "record", 1148 | "fields": [ 1149 | {"name":"firstName","type": "string"}, 1150 | {"name":"lastName","type": "string"}, 1151 | {"name":"age","type": "int"}, 1152 | { 1153 | "name":"dog", 1154 | "type": "record", 1155 | "fields": [ 1156 | {"name":"name","type": "string"}, 1157 | {"name":"age","type": "int"} 1158 | ] 1159 | } 1160 | ] 1161 | }); 1162 | var data = { 1163 | "firstName": "bob", 1164 | "lastName": "the_builder", 1165 | "age": 40, 1166 | "dog": {name: "sparky", "age": 3} 1167 | }; 1168 | block.write(new Buffer([ 1169 | 0x06, 0x62, 0x6f, 0x62, 0x16, 0x74, 0x68, 0x65, 0x5f, 0x62, 1170 | 0x75, 0x69, 0x6c, 0x64, 0x65, 0x72, 0x50, 0x0c, 0x73, 0x70, 1171 | 0x61, 0x72, 0x6b, 0x79, 0x06])); 1172 | var reader = IO.DatumReader(schema); 1173 | var record = reader.readRecord(schema, schema, decoder); 1174 | record.should.have.property("firstName", "bob"); 1175 | record.should.have.property("lastName", "the_builder"); 1176 | record.should.have.property("age", 40); 1177 | record.dog.should.have.property("name", "sparky"); 1178 | record.dog.should.have.property("age", 3); 1179 | }); 1180 | it('should decode a record with two nested records and return a json object containing the data', function(){ 1181 | var schema = Avro.Schema({ 1182 | "name": "user", 1183 | "type": "record", 1184 | "fields": [ 1185 | {"name":"firstName","type": "string"}, 1186 | {"name":"lastName","type": "string"}, 1187 | {"name":"age","type": "int"}, 1188 | { 1189 | "name":"dog", 1190 | "type": "record", 1191 | "fields": [ 1192 | {"name":"name","type": "string"}, 1193 | {"name":"age","type": "int"} 1194 | ] 1195 | }, 1196 | {"name":"dog2","type": "dog"} 1197 | ] 1198 | }); 1199 | var data = { 1200 | "firstName": "bob", 1201 | "lastName": "the_builder", 1202 | "age": 40, 1203 | "dog": {name: "sparky", "age": 3}, 1204 | "dog2": {name: "spot", "age": 1} 1205 | }; 1206 | block.write(new Buffer([ 1207 | 0x06, 0x62, 0x6f, 0x62, 0x16, 0x74, 0x68, 0x65, 0x5f, 0x62, 1208 | 0x75, 0x69, 0x6c, 0x64, 0x65, 0x72, 0x50, 0x0c, 0x73, 0x70, 1209 | 0x61, 0x72, 0x6b, 0x79, 0x06, 0x08, 0x73, 0x70, 0x6f, 0x74, 0x02])); 1210 | var reader = IO.DatumReader(schema); 1211 | var record = reader.readRecord(schema, schema, decoder); 1212 | record.should.have.property("firstName", "bob"); 1213 | record.should.have.property("lastName", "the_builder"); 1214 | record.should.have.property("age", 40); 1215 | record.dog.should.have.property("name", "sparky"); 1216 | record.dog.should.have.property("age", 3); 1217 | record.dog2.should.have.property("name", "spot"); 1218 | record.dog2.should.have.property("age", 1); 1219 | }); 1220 | }); 1221 | describe('skipData()', function(){ 1222 | var schema = Avro.Schema({ 1223 | "name": "testRecord", 1224 | "type": "record", 1225 | "fields": [ 1226 | {"name":"testNull","type": "null"}, 1227 | {"name":"testBoolean","type": "boolean"}, 1228 | {"name":"testString","type": "string"}, 1229 | {"name":"testInt","type": "int"}, 1230 | {"name":"testLong","type": "long"}, 1231 | {"name":"testFloat","type": "float"}, 1232 | {"name":"testDouble","type": "double"}, 1233 | {"name":"testBytes","type": "bytes"}, 1234 | {"name":"testFixed","type": "fixed", "size": 5}, 1235 | {"name":"testEnum","type": "enum", "symbols": ["Alpha", "Bravo", "Charlie", "Delta"]}, 1236 | {"name":"testArray","type": "array", "items": "long"}, 1237 | {"name":"testMap","type": { "type":"map", "values": "int"}}, 1238 | {"name":"testUnion","type":["string", "int", "null"]} 1239 | ] 1240 | }); 1241 | var reader = IO.DatumReader(schema); 1242 | var block = DataFile.Block(); 1243 | var decoder = IO.BinaryDecoder(block); 1244 | block.write(new Buffer([/*purposely blank*/ 1245 | 0x01, 1246 | 0x08, 0x74, 0x65, 0x73, 0x74, 1247 | 0x08, 1248 | 0x94, 0x02, 1249 | 0x99, 0xf8, 0xa9, 0x3f, 1250 | 0xb3, 0xb6, 0x76, 0x2a, 0x83, 0xfa, 0x21, 0x40, 1251 | 0x0c, 0xF4, 0x44, 0x45, 0x7f, 0x28, 0x6C, 1252 | 0x19, 0x69, 0x29, 0x3f, 0xff, 1253 | 0x04, 1254 | 0x08, 0x14, 0x69, 0x10, 0xF1, 0x01, 0x00, 1255 | 0x06, 0x06, 0x6f, 0x6e, 0x65, 0x20, 0x06, 0x74, 0x77, 0x6f, 0x10, 0x0a, 0x74, 0x68, 0x72, 0x65, 0x65, 0x40, 0x00, 1256 | 0x04])); 1257 | it('should skip a null', function(){ 1258 | reader.skipData(schema.fieldsHash["testNull"].type, decoder); 1259 | block.offset.should.equal(0); 1260 | }); 1261 | it('should skip a boolean', function(){ 1262 | reader.skipData(schema.fieldsHash["testBoolean"].type, decoder); 1263 | block.offset.should.equal(1); 1264 | }); 1265 | it('should skip a string', function(){ 1266 | reader.skipData(schema.fieldsHash["testString"].type, decoder); 1267 | block.offset.should.equal(6); 1268 | }); 1269 | it('should skip an int', function(){ 1270 | reader.skipData(schema.fieldsHash["testInt"].type, decoder); 1271 | block.offset.should.equal(7); 1272 | }); 1273 | it('should skip a long', function(){ 1274 | reader.skipData(schema.fieldsHash["testLong"].type, decoder); 1275 | block.offset.should.equal(9); 1276 | }); 1277 | it('should skip a float', function(){ 1278 | reader.skipData(schema.fieldsHash["testFloat"].type, decoder); 1279 | block.offset.should.equal(13); 1280 | }); 1281 | it('should skip a double', function(){ 1282 | reader.skipData(schema.fieldsHash["testDouble"].type, decoder); 1283 | block.offset.should.equal(21); 1284 | }); 1285 | it('should skip bytes', function(){ 1286 | reader.skipData(schema.fieldsHash["testBytes"].type, decoder); 1287 | block.offset.should.equal(28); 1288 | }); 1289 | it('should skip a fixed', function(){ 1290 | reader.skipData(schema.fieldsHash["testFixed"].type, decoder); 1291 | block.offset.should.equal(33); 1292 | }); 1293 | it('should skip an enum', function(){ 1294 | reader.skipData(schema.fieldsHash["testEnum"].type, decoder); 1295 | block.offset.should.equal(34); 1296 | }); 1297 | it('should skip an array', function(){ 1298 | reader.skipData(schema.fieldsHash["testArray"].type, decoder); 1299 | block.offset.should.equal(41); 1300 | }); 1301 | it('should skip a map', function(){ 1302 | reader.skipData(schema.fieldsHash["testMap"].type, decoder); 1303 | block.offset.should.equal(60); 1304 | }); 1305 | it('should skip a union', function(){ 1306 | reader.skipData(schema.fieldsHash["testUnion"].type, decoder); 1307 | block.offset.should.equal(61); 1308 | }); 1309 | it('should skip a record', function(){ 1310 | block.rewind(); 1311 | reader.skipData(schema, decoder); 1312 | block.offset.should.equal(61); 1313 | }); 1314 | }) 1315 | }) 1316 | }) 1317 | -------------------------------------------------------------------------------- /test/mocha.opts: -------------------------------------------------------------------------------- 1 | --require should 2 | --reporter spec 3 | --ui bdd -------------------------------------------------------------------------------- /test/schema.js: -------------------------------------------------------------------------------- 1 | var _ = require('lodash'); 2 | var util = require('util'); 3 | 4 | var AvroInvalidSchemaError = function(msg) { return new Error('AvroInvalidSchemaError: ' + util.format.apply(null, arguments)); } 5 | 6 | var PRIMITIVE_TYPES = ['null', 'boolean', 'int', 'long', 'float', 'double', 'bytes', 'string']; 7 | var COMPLEX_TYPES = ['record', 'enum', 'array', 'map', 'union', 'fixed']; 8 | 9 | var _parseNamedType = function(schema, namespace) { 10 | if (_.contains(PRIMITIVE_TYPES, schema)) { 11 | return schema; 12 | } else { 13 | throw new AvroInvalidSchemaError('unknown type name: %s; known type names are ', 14 | JSON.stringify(schema), 15 | JSON.stringify(_.keys(_namedTypes))); 16 | } 17 | }; 18 | 19 | function makeFullyQualifiedTypeName(schema, namespace) { 20 | var typeName = null; 21 | if (_.isString(schema)) { 22 | typeName = schema; 23 | } else if (_.isObject(schema)) { 24 | if (_.isString(schema.namespace)) { 25 | namespace = schema.namespace; 26 | } 27 | if (_.isString(schema.name)) { 28 | typeName = schema.name; 29 | } else if (_.isString(schema.type)) { 30 | typeName = schema.type; 31 | } 32 | } else { 33 | throw new AvroInvalidSchemaError('unable to determine fully qualified type name from schema %s in namespace %s', 34 | JSON.stringify(schema), namespace); 35 | } 36 | 37 | if (!_.isString(typeName)) { 38 | throw new AvroInvalidSchemaError('unable to determine type name from schema %s in namespace %s', 39 | JSON.stringify(schema), namespace); 40 | } 41 | 42 | if (typeName.indexOf('.') !== -1) { 43 | return typeName; 44 | } else if (_.contains(PRIMITIVE_TYPES, typeName)) { 45 | return typeName; 46 | } else if (_.isString(namespace)) { 47 | return namespace + '.' + typeName; 48 | } else { 49 | return typeName; 50 | } 51 | } 52 | 53 | function Schema(schema, namespace) { 54 | this.schemaRecords = {}; 55 | 56 | if ((this instanceof arguments.callee) === false) 57 | return new arguments.callee(schema, namespace); 58 | 59 | if (!_.isUndefined(schema)) 60 | return this.parse(schema, namespace); 61 | } 62 | 63 | _.extend(Schema.prototype, { 64 | 65 | parse: function(schema, namespace) { 66 | var self = this; 67 | if (_.isNull(schema) || _.isUndefined(schema)) { 68 | throw new AvroInvalidSchemaError('schema is null, in parentSchema: %s', 69 | JSON.stringify(parentSchema)); 70 | } else if (_.isString(schema)) { 71 | return new PrimitiveSchema(this, schema); 72 | } else if (_.isObject(schema) && !_.isArray(schema)) { 73 | if (schema.type === 'record') { 74 | if (!_.has(schema, 'fields')) { 75 | throw new AvroInvalidSchemaError('record must specify "fields", got %s', 76 | JSON.stringify(schema)); 77 | } else if (!_.has(schema, 'name')) { 78 | throw new AvroInvalidSchemaError('record must specify "name", got %s', 79 | JSON.stringify(schema)); 80 | } else { 81 | var record = new RecordSchema(schema.name, schema.namespace, 82 | _.map(schema.fields, function(field) { 83 | return new FieldSchema(field.name, self.parse(field, namespace)); 84 | })); 85 | // Store the schema records into a map of schema name to 86 | // record, so we can compare against it later if we find 87 | // something that isn't a primitive data type, but may 88 | // be a self-reference 89 | if (!this.schemaRecords[schema.name]) { 90 | this.schemaRecords[schema.name] = record; 91 | } 92 | 93 | return record; 94 | } 95 | } else if (schema.type === 'enum') { 96 | if (_.has(schema, 'symbols')) { 97 | return new EnumSchema(schema.symbols); 98 | } else { 99 | throw new AvroInvalidSchemaError('enum must specify "symbols", got %s', 100 | JSON.stringify(schema)); 101 | } 102 | } else if (schema.type === 'array') { 103 | if (_.has(schema, 'items')) { 104 | return new ArraySchema(this.parse(schema.items, namespace), namespace); 105 | } else { 106 | throw new AvroInvalidSchemaError('array must specify "items", got %s', 107 | JSON.stringify(schema)); 108 | } 109 | } else if (schema.type === 'map') { 110 | if (_.has(schema, 'values')) { 111 | return new MapSchema(this.parse(schema.values, namespace)); 112 | } else { 113 | throw new AvroInvalidSchemaError('map must specify "values" schema, got %s', 114 | JSON.stringify(schema)); 115 | } 116 | } else if (schema.type === 'fixed') { 117 | if (_.has(schema, 'size')) { 118 | return new FixedSchema(schema.name, schema.size); 119 | } else { 120 | throw new AvroInvalidSchemaError('fixed must specify "size", got %s', 121 | JSON.stringify(schema)); 122 | } 123 | } else if (_.has(schema, 'type')) { 124 | return this.parse(schema.type, namespace); 125 | } else { 126 | throw new AvroInvalidSchemaError('not yet implemented: %j', schema); 127 | } 128 | } else if (_.isArray(schema)) { 129 | if (_.isEmpty(schema)) { 130 | throw new AvroInvalidSchemaError('unions must have at least 1 branch'); 131 | } 132 | var branchTypes = _.map(schema, function(type) { 133 | return self.parse(type, schema, namespace); 134 | }); 135 | return new UnionSchema(branchTypes, namespace); 136 | } else { 137 | throw new AvroInvalidSchemaError('unexpected Javascript type for schema: ' + (typeof schema)); 138 | } 139 | }, 140 | 141 | validate: function(schema, datum){ 142 | return true; 143 | }, 144 | 145 | validateAndThrow: function(schema, datum){ 146 | return true; 147 | }, 148 | 149 | toString: function() { 150 | return JSON.stringify({ type: this.type }); 151 | } 152 | }); 153 | 154 | function PrimitiveSchema(schema, type) { 155 | 156 | if (!_.isString(type)) { 157 | throw new AvroInvalidSchemaError('Primitive type name must be a string'); 158 | } 159 | 160 | if (!_.contains(PRIMITIVE_TYPES, type)) { 161 | var record = schema.schemaRecords[type]; 162 | 163 | if (record) { 164 | this.type = record; 165 | return; 166 | } 167 | 168 | throw new AvroErrors.InvalidSchemaError('Primitive type must be one of: %s; or a previously self-referenced type. Got %s', 169 | JSON.stringify(PRIMITIVE_TYPES), type); 170 | } 171 | 172 | this.type = type; 173 | } 174 | 175 | util.inherits(PrimitiveSchema, Schema); 176 | 177 | function FieldSchema(name, type) { 178 | if (!_.isString(name)) { 179 | throw new AvroInvalidSchemaError('Field name must be string'); 180 | } 181 | 182 | if (!(type instanceof Schema)) { 183 | throw new AvroInvalidSchemaError('Field type must be a Schema object'); 184 | } 185 | 186 | this.name = name; 187 | this.type = type; 188 | } 189 | 190 | //util.inherits(FieldSchema, Schema); 191 | 192 | function NamedSchema(name, namespace) { 193 | 194 | } 195 | 196 | function RecordSchema(name, namespace, fields) { 197 | if (!_.isString(name)) { 198 | throw new AvroInvalidSchemaError('Record name must be string'); 199 | } 200 | 201 | if (!_.isNull(namespace) && !_.isUndefined(namespace) && !_.isString(namespace)) { 202 | throw new AvroInvalidSchemaError('Record namespace must be string or null'); 203 | } 204 | 205 | if (!_.isArray(fields)) { 206 | throw new AvroInvalidSchemaError('Fields must be an array'); 207 | } 208 | 209 | this.type = 'record'; 210 | this.name = name; 211 | this.namespace = namespace; 212 | this.fields = fields; 213 | 214 | this.fieldsHash = _.reduce(fields, function(hash, field) { 215 | hash[field.name] = field; 216 | return hash; 217 | }, {}); 218 | }; 219 | 220 | util.inherits(RecordSchema, Schema); 221 | 222 | function MapSchema(type) { 223 | this.type = 'map'; 224 | this.values = type; 225 | } 226 | 227 | util.inherits(MapSchema, Schema); 228 | 229 | function ArraySchema(items) { 230 | if (_.isNull(items) || _.isUndefined(items)) { 231 | throw new AvroInvalidSchemaError('Array "items" schema should not be null or undefined'); 232 | } 233 | 234 | this.type = 'array'; 235 | this.items = items; 236 | } 237 | 238 | util.inherits(ArraySchema, Schema); 239 | 240 | function UnionSchema(schemas, namespace) { 241 | if (!_.isArray(schemas) || _.isEmpty(schemas)) { 242 | throw new InvalidSchemaError('Union must have at least 1 branch'); 243 | } 244 | 245 | this.type = 'union'; 246 | this.schemas = schemas; //_.map(schemas, function(type) { return makeFullyQualifiedTypeName(type, namespace); }); 247 | this.namespace = namespace; 248 | } 249 | 250 | util.inherits(UnionSchema, Schema); 251 | 252 | function EnumSchema(symbols) { 253 | if (!_.isArray(symbols)) { 254 | throw new AvroInvalidSchemaError('Enum must have array of symbols, got %s', 255 | JSON.stringify(symbols)); 256 | } 257 | if (!_.all(symbols, function(symbol) { return _.isString(symbol); })) { 258 | throw new AvroInvalidSchemaError('Enum symbols must be strings, got %s', 259 | JSON.stringify(symbols)); 260 | } 261 | 262 | this.type = 'enum'; 263 | this.symbols = symbols; 264 | } 265 | util.inherits(EnumSchema, Schema); 266 | 267 | function FixedSchema(name, size) { 268 | 269 | this.type = 'fixed'; 270 | this.name = name; 271 | this.size = size; 272 | } 273 | 274 | util.inherits(FixedSchema, Schema); 275 | 276 | if (!_.isUndefined(exports)) { 277 | exports.Schema = Schema; 278 | exports.PrimitiveSchema = PrimitiveSchema; 279 | exports.ArraySchema = ArraySchema; 280 | exports.MapSchema = MapSchema; 281 | exports.UnionSchema = UnionSchema; 282 | exports.RecordSchema = RecordSchema; 283 | exports.FixedSchema = FixedSchema; 284 | exports.EnumSchema = EnumSchema; 285 | } 286 | -------------------------------------------------------------------------------- /test/schema.test.js: -------------------------------------------------------------------------------- 1 | var libpath = process.env['MOCHA_COV'] ? __dirname + '/../lib-cov/' : __dirname + '/../lib/'; 2 | 3 | var _ = require('lodash'); 4 | var should = require('should'); 5 | 6 | var Avro = require(libpath + 'schema'); 7 | 8 | describe('Schema()', function(){ 9 | it('should create a new Schema object given arguments', function(){ 10 | var schema = Avro.Schema("string"); 11 | schema.should.be.an.instanceof(Avro.PrimitiveSchema); 12 | schema.should.be.an.instanceof(Avro.Schema); // its baseclass 13 | schema.type.should.equal("string"); 14 | }); 15 | describe('parse()', function(){ 16 | it('should throw an error if no arguments are provided', function(){ 17 | (function() { 18 | var schema = Avro.Schema(); 19 | schema.parse(); 20 | }).should.throwError(); 21 | }); 22 | it('should return a PrimitiveSchema if any of the primitive types are passed as schema arguments or as a type property', function(){ 23 | var primitives = ['null', 'boolean', 'int', 'long', 'float', 'double', 'bytes', 'string']; 24 | _.each(primitives, function(type) { 25 | var schema = Avro.Schema(type); 26 | schema.should.be.an.instanceof(Avro.PrimitiveSchema); 27 | schema.type.should.equal(type); 28 | schema = Avro.Schema({ "type": type }); 29 | schema.should.be.an.instanceof(Avro.PrimitiveSchema); 30 | schema.type.should.equal(type); 31 | }); 32 | }); 33 | it('should throw an error is an unrecognized primitive type is provided', function(){ 34 | (function() { 35 | Avro.Schema("unrecognized"); 36 | }).should.throwError(); 37 | (function() { 38 | Avro.Schema({"type":"unrecognized"}); 39 | }).should.throwError(); 40 | }) 41 | it('should return a UnionSchema if an array is passwd as a type', function(){ 42 | var schema = Avro.Schema([ "string", "int", "null"]); 43 | schema.should.be.an.instanceof(Avro.UnionSchema); 44 | schema.type.should.equal("union"); 45 | }); 46 | it('should throw an error if an empty array of unions is passed', function(){ 47 | (function() { 48 | var schema = Avro.Schema([]); 49 | }).should.throwError(); 50 | }) 51 | it('should return a RecordSchema if an object is passed with a type "record"', function(){ 52 | var schema = Avro.Schema({ 53 | name: "myrecord", 54 | type: "record", 55 | fields: [ 56 | { 57 | "name": "method", 58 | "type": "string" 59 | }, 60 | { 61 | "name": "path", 62 | "type": "string" 63 | }, 64 | { 65 | "name": "queryString", 66 | "type": [ 67 | "string", 68 | "null" 69 | ] 70 | }, 71 | ] 72 | }); 73 | schema.should.be.an.instanceof(Avro.RecordSchema); 74 | schema.type.should.equal("record"); 75 | schema.fields.should.be.an.instanceof(Object); 76 | _.size(schema.fields).should.equal(3); 77 | }); 78 | it('should return a MapSchema if an object is passed with a type "map"', function(){ 79 | var schema = Avro.Schema({ 80 | "name": "mapSchemaTest", 81 | "type": { 82 | "type": "map", 83 | "values": "bytes" 84 | } 85 | }); 86 | schema.should.be.an.instanceof(Avro.MapSchema); 87 | schema.values.should.be.an.instanceof(Avro.PrimitiveSchema); 88 | schema.values.type.should.equal("bytes"); 89 | schema.type.should.equal("map"); 90 | }); 91 | it('should return an ArraySchema is an object is passed with a type "array"', function(){ 92 | var schema = Avro.Schema({ 93 | "name": "arraySchemaTest", 94 | "type": "array", 95 | "items": "long" 96 | }); 97 | schema.should.be.an.instanceof(Avro.ArraySchema); 98 | schema.items.should.be.an.instanceof(Avro.PrimitiveSchema); 99 | schema.type.should.equal("array"); 100 | }); 101 | it('should return a FixedSchema if an object is passed with a type "fixed"', function(){ 102 | var schema = Avro.Schema({ 103 | "name": "fixedSchemaTest", 104 | "type": { 105 | "type": "fixed", 106 | "size": 50 107 | } 108 | }); 109 | schema.should.be.an.instanceof(Avro.FixedSchema); 110 | schema.size.should.equal(50); 111 | schema.type.should.equal("fixed"); 112 | }); 113 | it('should return a EnumSchema if an object is passed with a type "enum"', function(){ 114 | var schema = Avro.Schema({ 115 | "type": "enum", 116 | "symbols": [ "Alpha", "Bravo", "Charlie", "Delta"] 117 | }); 118 | schema.should.be.an.instanceof(Avro.EnumSchema); 119 | schema.symbols.should.have.length(4); 120 | schema.type.should.equal("enum"); 121 | }) 122 | it('should allow for self references by name for non-primitive data types', function() { 123 | var schema = Avro.Schema({ 124 | "name": "document", 125 | "type": [ 126 | { 127 | "type": "record", 128 | "name": "Document", 129 | "fields": [ 130 | { 131 | "name": "test", 132 | "type": [ 133 | "null", 134 | "string" 135 | ] 136 | } 137 | ] 138 | }, 139 | { 140 | "type": "record", 141 | "name": "Fax", 142 | "fields": [ 143 | { 144 | "name": "data", 145 | "type": [ 146 | "null", 147 | "Document" 148 | ], 149 | "default": null 150 | } 151 | ] 152 | } 153 | ] 154 | }); 155 | 156 | // Ensure that the the reference to the non-primitive type 'Document' 157 | // in the second element of the type array now has the value of the 158 | // original 'Document' 159 | var original = schema.schemas[0]; 160 | var selfReferenced = schema.schemas[1].fields[0].type.schemas[1].type; 161 | selfReferenced.should.equal(original); 162 | }); 163 | // This test is disabled due to the current inability to do a late 164 | // checking. In this case we would desire the first reference to 165 | // Document not initially fail. It would wait until it has reached a 166 | // definition for Document or fail when it reaches the end of the schema 167 | it.skip('should allow for self references that are defined later', function() { 168 | var schema = Avro.Schema({ 169 | "name": "document", 170 | "type": [ 171 | { 172 | "type": "record", 173 | "name": "Fax", 174 | "fields": [ 175 | { 176 | "name": "data", 177 | "type": [ 178 | "null", 179 | "Document" 180 | ], 181 | "default": null 182 | } 183 | ] 184 | }, 185 | { 186 | "type": "record", 187 | "name": "Document", 188 | "fields": [ 189 | { 190 | "name": "test", 191 | "type": [ 192 | "null", 193 | "string" 194 | ] 195 | } 196 | ] 197 | } 198 | ] 199 | }); 200 | 201 | // Ensure that the the reference to the non-primitive type 'Document' 202 | // in the second element of the type array now has the value of the 203 | // original 'Document' 204 | var original = schema.schemas[0]; 205 | var selfReferenced = schema.schemas[1].fields[0].type.schemas[1].type; 206 | selfReferenced.should.equal(original); 207 | }); 208 | }) 209 | }); 210 | -------------------------------------------------------------------------------- /tools/avro-tools-1.7.7.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesbrucepower/node-avro-io/02aed01d15f52d8df414a0b3e084767741dfafdf/tools/avro-tools-1.7.7.jar --------------------------------------------------------------------------------