├── .gitignore ├── LICENSE ├── README.md ├── benchmark.js ├── binding.gyp ├── filereader.js ├── index.js ├── package.json ├── src └── addon.cc └── tests └── native.js /.gitignore: -------------------------------------------------------------------------------- 1 | lib-cov 2 | *.seed 3 | *.log 4 | *.csv 5 | *.dat 6 | *.out 7 | *.pid 8 | *.gz 9 | 10 | pids 11 | logs 12 | results 13 | 14 | npm-debug.log 15 | node_modules 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Code Charm 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | nan-example-eol 2 | ======== 3 | 4 | Newline detection in Node.JS implemented as a native addon 5 | Note: not significantly faster than `carrier` module and much slower than `split` module 6 | -------------------------------------------------------------------------------- /benchmark.js: -------------------------------------------------------------------------------- 1 | var fs = require('fs'); 2 | var carrier = require('carrier'); 3 | var eol = require('./'); 4 | 5 | var assert = require('assert'); 6 | 7 | var split = require('split'); 8 | 9 | var linesCarrier = 0; 10 | var linesEol = 0; 11 | 12 | var lengthCarrier = 0; 13 | var lengthEol = 0; 14 | 15 | var linesSplit = 0; 16 | var lengthSplit = 0; 17 | 18 | function carrierTest(cb) { 19 | var begin = new Date().getTime(); 20 | var stream = fs.createReadStream(process.argv[2]); 21 | 22 | carrier.carry(stream, function(line) { 23 | linesCarrier++; 24 | lengthCarrier += line.length; 25 | }); 26 | stream.on('end', function() { 27 | console.log("Carrier took: ", (new Date().getTime()) - begin); 28 | cb(); 29 | }); 30 | } 31 | 32 | function eolTest(cb) { 33 | var begin = new Date().getTime(); 34 | var stream = fs.createReadStream(process.argv[2]); 35 | 36 | eol.streamWrapper(stream, function(line) { 37 | linesEol++; 38 | lengthEol += line.length; 39 | }); 40 | 41 | stream.on('end', function() { 42 | console.log("EOL took: ", (new Date().getTime()) - begin); 43 | cb(); 44 | }); 45 | } 46 | 47 | function splitTest(cb) { 48 | var begin = new Date().getTime(); 49 | var stream = fs.createReadStream(process.argv[2]) 50 | .pipe(split()).on('data', function(line) { 51 | linesSplit++; 52 | lengthSplit += line.length; 53 | }); 54 | stream.on('end', function() { 55 | console.log("Split took: ", (new Date().getTime()) - begin); 56 | cb(); 57 | }); 58 | } 59 | 60 | 61 | eolTest(function() { 62 | carrierTest(function() { 63 | splitTest(function() { 64 | eolTest(function() { 65 | carrierTest(function() { 66 | splitTest(function() { 67 | console.log(linesSplit, linesCarrier); 68 | assert(linesCarrier == linesEol); 69 | assert(lengthCarrier == lengthEol); 70 | }); 71 | }); 72 | }); 73 | }); 74 | }); 75 | }); -------------------------------------------------------------------------------- /binding.gyp: -------------------------------------------------------------------------------- 1 | { 2 | "targets": [ 3 | { 4 | "target_name": "eol", 5 | "sources": [ 6 | "src/addon.cc" 7 | ], 8 | "include_dirs" : [ 9 | "", 21 | "license": "MIT", 22 | "bugs": { 23 | "url": "https://github.com/CodeCharmLtd/node-eol/issues" 24 | }, 25 | "homepage": "https://github.com/CodeCharmLtd/node-eol", 26 | "dependencies": { 27 | "nan": "1.0.0" 28 | }, 29 | "devDependencies": { 30 | "mocha": "~1.17.1", 31 | "should": "~3.1.2", 32 | "carrier": "~0.1.13", 33 | "split": "~0.3.0" 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/addon.cc: -------------------------------------------------------------------------------- 1 | /* Copyright [year] */ 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | using node::ObjectWrap; 8 | 9 | using v8::Array; 10 | using v8::Context; 11 | using v8::Function; 12 | using v8::FunctionTemplate; 13 | using v8::Handle; 14 | using v8::Local; 15 | using v8::Object; 16 | using v8::Persistent; 17 | using v8::String; 18 | using v8::Value; 19 | 20 | static Persistent constructor; 21 | 22 | class EOLFinder : public ObjectWrap { 23 | Persistent m_onNewLine; 24 | 25 | size_t offsetFromBeginning; 26 | size_t sizeWithoutNewline; 27 | int chunksWithoutNewline; 28 | explicit EOLFinder() : offsetFromBeginning(0), sizeWithoutNewline(0), 29 | chunksWithoutNewline(0) { 30 | } 31 | 32 | typedef std::pair chunk_type; 33 | typedef std::list chunk_list; 34 | chunk_list chunks; 35 | 36 | void AddChunk(const char* chunk, size_t siz) { 37 | // for C++11 could be emplace_back 38 | chunks.push_back(chunk_type(chunk, siz)); 39 | } 40 | 41 | int ProcessLines() { 42 | int foundCount = 0; 43 | 44 | assert(chunks.size() > 0); 45 | 46 | chunk_type last = chunks.back(); 47 | 48 | const char* begin = last.first; 49 | size_t siz = last.second; 50 | 51 | do { 52 | const char* found = static_cast(memchr(begin, 10, siz)); 53 | if (found) { 54 | size_t foundSiz = (found - begin); 55 | size_t tillEnd = siz - foundSiz - 1; 56 | 57 | Local onNewLine = NanNew(m_onNewLine); 58 | sizeWithoutNewline += foundSiz; 59 | 60 | if (chunksWithoutNewline > 0) { 61 | Handle args[1] = {NanNewBufferHandle(sizeWithoutNewline)}; 62 | char* targetData = node::Buffer::Data(args[0]); 63 | 64 | size_t chunksOffset = 0; 65 | // iterate over all chunks except one 66 | // - copy chunks 67 | for (chunk_list::iterator i = chunks.begin(); 68 | i != --chunks.end(); ++i) { 69 | chunk_type chunk = *i; 70 | memcpy(targetData + chunksOffset, 71 | chunk.first + offsetFromBeginning, 72 | chunk.second - offsetFromBeginning); 73 | chunksOffset += chunk.second - offsetFromBeginning; 74 | offsetFromBeginning = 0; // offset is valid for first chunk only 75 | } 76 | // copy over last chunk 77 | memcpy(targetData + chunksOffset, begin, foundSiz); 78 | chunksOffset += foundSiz; 79 | 80 | NanMakeCallback(NanGetCurrentContext()->Global(), onNewLine, 1, args); 81 | } else { // whole line is inside buffer 82 | Handle args[1] = {NanNewBufferHandle(begin, foundSiz)}; 83 | NanMakeCallback(NanGetCurrentContext()->Global(), onNewLine, 1, args); 84 | } 85 | 86 | foundCount++; 87 | 88 | begin = found + 1; 89 | siz = tillEnd; 90 | 91 | if (foundCount == 1) { 92 | offsetFromBeginning = foundSiz + 1; 93 | } else { 94 | offsetFromBeginning += foundSiz + 1; 95 | } 96 | 97 | chunksWithoutNewline = 0; 98 | sizeWithoutNewline = 0; 99 | 100 | } else { 101 | if (siz) { 102 | sizeWithoutNewline += siz; 103 | chunksWithoutNewline++; 104 | } 105 | break; 106 | } 107 | } while (1); 108 | 109 | return foundCount; 110 | } 111 | 112 | static NAN_METHOD(New) { 113 | NanScope(); 114 | EOLFinder* finder = new EOLFinder(); 115 | finder->Wrap(args.This()); 116 | 117 | args.This()->Set(NanSymbol("buffers"), NanNew()); 118 | 119 | NanAssignPersistent(finder->m_onNewLine, args[0].As()); 120 | 121 | NanReturnValue(args.This()); 122 | } 123 | 124 | static NAN_METHOD(Add) { 125 | NanScope(); 126 | EOLFinder* self = ObjectWrap::Unwrap(args.This()); 127 | 128 | Local buffers = Local::Cast( 129 | args.This()->Get(NanSymbol("buffers"))); 130 | 131 | if (args.Length() > 0 && node::Buffer::HasInstance(args[0])) { 132 | char* data = node::Buffer::Data(args[0]); 133 | size_t siz = node::Buffer::Length(args[0]); 134 | 135 | buffers->Set(buffers->Length(), args[0]); 136 | self->AddChunk(data, siz); 137 | 138 | if (self->ProcessLines()) { 139 | chunk_type lastInternal = self->chunks.back(); 140 | self->chunks.clear(); 141 | self->chunks.push_back(lastInternal); 142 | 143 | Local last = buffers->Get(buffers->Length() - 1); 144 | buffers = NanNew(); 145 | args.This()->Set(NanSymbol("buffers"), buffers); 146 | buffers->Set(0, last); 147 | } 148 | } 149 | NanReturnUndefined(); 150 | } 151 | 152 | // actual destructor 153 | virtual ~EOLFinder() { 154 | NanDisposePersistent(m_onNewLine); 155 | } 156 | 157 | public: 158 | static void Init() { 159 | Local tpl = NanNew(EOLFinder::New); 160 | NanAssignPersistent(constructor, tpl); 161 | tpl->SetClassName(NanSymbol("EOLFinder")); 162 | tpl->InstanceTemplate()->SetInternalFieldCount(1); 163 | NODE_SET_PROTOTYPE_METHOD(tpl, "add", EOLFinder::Add); 164 | } 165 | }; 166 | 167 | void Init(Handle exports, Handle module) { 168 | EOLFinder::Init(); 169 | v8::Local constructorHandle = 170 | NanNew(constructor); 171 | 172 | module->Set(NanSymbol("exports"), constructorHandle->GetFunction()); 173 | } 174 | 175 | NODE_MODULE(eol, Init) 176 | -------------------------------------------------------------------------------- /tests/native.js: -------------------------------------------------------------------------------- 1 | var eol = require("../build/Release/eol.node"); 2 | var mocha = require('mocha'); 3 | var should = require('should'); 4 | 5 | describe('native', function() { 6 | 7 | it('should handle single line in one buffer', function(cb) { 8 | 9 | var finder = new eol(function(buf) { 10 | var got = buf.toString('utf8'); 11 | got.should.equal('aaaaaaa'); 12 | cb(); 13 | }); 14 | finder.add(new Buffer("aaaaaaa\n", "utf8")); 15 | }); 16 | 17 | it('should handle two lines one in each buffer', function(cb) { 18 | var count = 0; 19 | var finder = new eol(function(buf) { 20 | var got = buf.toString('utf8'); 21 | got.should.equal('aaaaaaa'); 22 | count++; 23 | if(count == 2) 24 | cb(); 25 | }); 26 | finder.add(new Buffer("aaaaaaa\n", "utf8")); 27 | finder.add(new Buffer("aaaaaaa\n", "utf8")); 28 | }); 29 | 30 | it('should handle new line spanned across buffers', function(cb) { 31 | var finder = new eol(function(buf) { 32 | var got = buf.toString('utf8'); 33 | got.should.equal('aaaaaaaaa1aaaaaaaaa2aaaaaaaaa3aaaaaaaa'); 34 | cb(); 35 | }); 36 | finder.add(new Buffer("aaaaaaaaa1", "utf8")) 37 | finder.add(new Buffer("aaaaaaaaa2", "utf8")) 38 | finder.add(new Buffer("aaaaaaaaa3", "utf8")) 39 | finder.add(new Buffer("aaaaaaaa\n", "utf8")) 40 | 41 | }); 42 | 43 | it('should handle mixed cases', function(cb) { 44 | var counter = 0; 45 | var result = ''; 46 | var finder = new eol(function(buf) { 47 | result += buf.toString('utf8') + '\n'; 48 | counter++; 49 | if(counter == 3) { 50 | result.should.equal('aaaaaaaaa1aaaaaaaaa2aaaaaaaaa3aaaaaaaa\naa\ndddaa\n'); 51 | cb(); 52 | } 53 | }); 54 | finder.add(new Buffer("aaaaaaaaa1", "utf8")) 55 | finder.add(new Buffer("aaaaaaaaa2", "utf8")) 56 | finder.add(new Buffer("aaaaaaaaa3", "utf8")) 57 | finder.add(new Buffer("aaaaaaaa\n", "utf8")) 58 | finder.add(new Buffer("aa\nddd", "utf8")) 59 | finder.add(new Buffer("aa\nddd", "utf8")) 60 | }); 61 | 62 | it('should handle multiple-lines in mixed cases', function(cb) { 63 | var counter = 0; 64 | var result = ''; 65 | var finder = new eol(function(buf) { 66 | result += buf.toString('utf8') + '\n'; 67 | counter++; 68 | if(counter == 6) { 69 | result.should.equal("ala\nma\nkota\nakot\nma\nale\n"); 70 | cb(); 71 | } 72 | }); 73 | finder.add(new Buffer("ala\nma\nkota\na", "utf8")); 74 | finder.add(new Buffer("kot\nma\nale\nsdfdfs", "utf8")); 75 | 76 | }); 77 | 78 | }); --------------------------------------------------------------------------------