├── README.md ├── clucene.js ├── package.json ├── src ├── Misc.h ├── StringBuffer.h ├── clucene_bindings.cpp └── repl_tchar.h ├── test ├── facebook.json ├── heavy.js └── index.js └── wscript /README.md: -------------------------------------------------------------------------------- 1 | OVERVIEW 2 | ========= 3 | node-clucene is a native Node.js module that wraps up the CLucene library. CLucene is a C++ port of the ubiquitous Java Lucene library. The Lucene ecosystem is vibrant, and lots of great code has come out of it. However, for those who do not wish to run a JVM-based information retrieval system, CLucene makes a lot of sense. 4 | 5 | CLucene is based on a port of Lucene 2.3. Unfortunately, there's been a lot of new updates and features added since then. The goal is to add features to this module as necessary, without rewriting it to map method-to-method with the current version of Lucene. 6 | 7 | Right now, adding a document with the same docId will *replace* the older document in the index that has the same docId. This is intentional, and makes document updating simple. In the future, this will probably get refactored into a separate updateDocument() method call instead, and revert addDocument() back to adding only. 8 | 9 | This library began as a project from Tyler Gillies (http://github.com/tjgillies/node-lucene). Mad props to him for getting this library bootstrapped and running! 10 | 11 | 12 | HOW IT WORKS: 13 | ============ 14 | 15 | Indexing information into the index 16 | ------------------------------- 17 | ```javascript 18 | var cl = require('clucene').CLucene, 19 | clucene = new cl.Lucene(); 20 | 21 | var indexPath = './test.index', 22 | data = [ {'name': 'Eric Jennings', 'timestamp': '1293765885000'}, 23 | {'name': 'Thomas Anderson', 'timestamp': '129555555555555'} ]; 24 | 25 | var addItem = function(contact, index) { 26 | var doc = new cl.Document(), 27 | docId = index; 28 | 29 | doc.addField('name', contact.name, cl.STORE_YES|cl.INDEX_TOKENIZED); 30 | doc.addField('_type', 'contact', cl.STORE_YES|cl.INDEX_UNTOKENIZED); 31 | doc.addField('timestamp', contact.timestamp, cl.STORE_YES|cl.INDEX_UNTOKENIZED); 32 | 33 | clucene.addDocument(docId, doc, indexPath, function(err, indexTime, docsReplaced) { 34 | if (err) { 35 | console.log('Error indexing document: ' + err); 36 | } 37 | 38 | console.log('Indexed document in ' + indexTime + ' ms'); 39 | 40 | if (docsReplaced > 0) { 41 | console.log('Updated ' + docsReplaced + ' existing document(s)'); 42 | } 43 | 44 | clucene.closeWriter(); 45 | }); 46 | } 47 | 48 | var i = 0, 49 | m = setInterval(function () { 50 | addItem(data[i], (i - 1).toString(10)); 51 | i += 1; 52 | if (i >= data.length) { 53 | clearInterval(m); 54 | } 55 | }, 500); 56 | ``` 57 | 58 | 59 | Querying information out of the index 60 | ------------------------------- 61 | ```javascript 62 | var cl = require('clucene').CLucene, 63 | clucene = new cl.Lucene(); 64 | 65 | var indexPath = './test.index'; 66 | 67 | var queryTerm = 'name:jenn*'; 68 | 69 | clucene.search(indexPath, queryTerm, function(err, results, searchTime) { 70 | if (err) { 71 | console.log('Search error: ' + err); 72 | return; 73 | } 74 | 75 | console.log('Search results: '); 76 | 77 | for (var i=0; i= 0.4.0" }, 12 | "repository": { 13 | "type": "git", 14 | "url": "http://github.com/erictj/node-clucene.git" 15 | }, 16 | "licenses": [ 17 | { 18 | "type": "MIT" 19 | } 20 | ], 21 | "bugs": { 22 | "url" : "http://github.com/erictj/node-clucene/issues" 23 | }, 24 | "scripts": { 25 | "install": "node-waf configure build", 26 | "test": "node-waf configure build; nodeunit test/" 27 | }, 28 | "devDependencies": { 29 | "nodeunit": ">=0.3.1" 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/Misc.h: -------------------------------------------------------------------------------- 1 | /*------------------------------------------------------------------------------ 2 | * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team 3 | * 4 | * Distributable under the terms of either the Apache License (Version 2.0) or 5 | * the GNU Lesser General Public License, as specified in the COPYING file. 6 | ------------------------------------------------------------------------------*/ 7 | #ifndef _lucene_util_Misc_H 8 | #define _lucene_util_Misc_H 9 | 10 | #include 11 | 12 | CL_NS_DEF(util) 13 | /** A class containing various functions. 14 | */ 15 | class CLUCENE_SHARED_EXPORT Misc{ 16 | static void zerr(int ret, std::string& err); 17 | public: 18 | static uint64_t currentTimeMillis(); 19 | static const TCHAR* replace_all( const TCHAR* val, const TCHAR* srch, const TCHAR* repl ); 20 | static bool dir_Exists(const char* path); 21 | static int64_t file_Size(const char* path); 22 | static int64_t filelength(int handle); 23 | static void sleep(const int ms); 24 | 25 | /** 26 | * Unlinks the given file, waits until dir_Exists is false. It makes maxAttempts 27 | * attempts to remove the file. If maxAttemps is less than 0 then unlimited 28 | * count of attempts is done. 29 | * Returns 1 if deleted and dir_Exists returns false 30 | * Returns 0 if deleted and dir_Exists returns still true 31 | * Returns -1 if file can not be deleted. 32 | */ 33 | static int32_t file_Unlink(const char* path, int32_t maxAttempts = -1); 34 | 35 | static size_t ahashCode(const char* str); 36 | static size_t ahashCode(const char* str, size_t len); 37 | 38 | static TCHAR* join ( const TCHAR* a, const TCHAR* b, const TCHAR* c=NULL, const TCHAR* d=NULL,const TCHAR* e=NULL,const TCHAR* f=NULL ); 39 | static char* ajoin ( const char* a, const char* b, const char* c=NULL, const char* d=NULL,const char* e=NULL,const char* f=NULL ); 40 | 41 | static bool priv_isDotDir( const TCHAR* name ); 42 | //Creates a filename by concatenating Segment with ext and x 43 | static std::string segmentname(const char* segment, const char* ext, const int32_t x=-1 ); 44 | //Creates a filename in buffer by concatenating Segment with ext and x 45 | static void segmentname(char* buffer,int32_t bufferLen, const char* Segment, const char* ext, const int32_t x=-1); 46 | 47 | /** 48 | * Compares two strings, character by character, and returns the 49 | * first position where the two strings differ from one another. 50 | * 51 | * @param s1 The first string to compare 52 | * @param s1Len The length of the first string to compare 53 | * @param s2 The second string to compare 54 | * @param s2Len The length of the second string to compare 55 | * @return The first position where the two strings differ. 56 | */ 57 | static int32_t stringDifference(const TCHAR* s1, const int32_t s1Len, const TCHAR* s2, const int32_t s2Len); 58 | 59 | // In-place trimming for strings and words ("te st" will be returned by stringTrim, while wordTrim will return "te") 60 | // This is by design only meant for use with on-memory strings, and calling it like stringTrim(_T("test")) will 61 | // be errorneous 62 | static TCHAR* stringTrim(TCHAR* s); 63 | static TCHAR* wordTrim(TCHAR* s); 64 | 65 | static size_t longToBase( int64_t value, int32_t base, char* to ); //< length of to should be at least ((sizeof(unsigned long) << 3) + 1). returns actual length used 66 | static int64_t base36ToLong( const char* value ); 67 | 68 | static std::string toString(const int32_t value); 69 | static std::string toString(const int64_t value); 70 | static std::string toString(const _LUCENE_THREADID_TYPE value); 71 | static std::string toString(const bool value); 72 | static std::string toString(const float_t value); 73 | static std::string toString(const TCHAR* s, int32_t len=-1); 74 | 75 | #ifdef _UCS2 76 | static size_t whashCode(const wchar_t* str); 77 | static size_t whashCode(const wchar_t* str, size_t len); 78 | #define thashCode whashCode 79 | 80 | static char* _wideToChar(const wchar_t* s); 81 | static wchar_t* _charToWide(const char* s); 82 | 83 | static void _cpycharToWide(const char* s, wchar_t* d, size_t len); 84 | static void _cpywideToChar(const wchar_t* s, char* d, size_t len); 85 | #else 86 | #define thashCode ahashCode 87 | #endif 88 | 89 | /** List all files in dir. 90 | * @param bool fullPath True to return entire path 91 | */ 92 | static bool listFiles(const char* dir, std::vector& files, bool fullPath=false); 93 | 94 | /** uncompress the source stream into the dest stream. 95 | * Default CHUNK size is 1k 96 | */ 97 | static bool inflate(const uint8_t* source, size_t sourcelen, std::ostream& dest, std::string& err, int CHUNK=-1); 98 | /** compress the source stream into the dest stream. 99 | * Default CHUNK size is 1k 100 | * Default level is Z_BEST_COMPRESSION 101 | */ 102 | static bool deflate(const uint8_t* source, size_t sourcelen, std::ostream& dest, std::string& err, int CHUNK=-1, int level=-1); 103 | }; 104 | 105 | CL_NS_END 106 | #endif 107 | -------------------------------------------------------------------------------- /src/StringBuffer.h: -------------------------------------------------------------------------------- 1 | /*------------------------------------------------------------------------------ 2 | * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team 3 | * 4 | * Distributable under the terms of either the Apache License (Version 2.0) or 5 | * the GNU Lesser General Public License, as specified in the COPYING file. 6 | ------------------------------------------------------------------------------*/ 7 | #ifndef _lucene_util_StringBuffer_ 8 | #define _lucene_util_StringBuffer_ 9 | 10 | CL_NS_DEF(util) 11 | class CLUCENE_SHARED_EXPORT StringBuffer{ 12 | public: 13 | ///Constructor. Allocates a buffer with the default length. 14 | StringBuffer(); 15 | ///Constructor. Allocates a buffer of length initSize + 1 16 | StringBuffer(const size_t initSize); 17 | ///Constructor. Creates an instance of Stringbuffer containing a copy of 18 | ///the string value 19 | StringBuffer(const TCHAR* value); 20 | ///Constructs a StringBuffer using another buffer. The StringBuffer can 21 | ///the be used to easily manipulate the buffer. 22 | StringBuffer(TCHAR* buf,size_t maxlen, const bool consumeBuffer); 23 | ///Destructor 24 | virtual ~StringBuffer(); 25 | ///Clears the Stringbuffer and resets it to it default empty state 26 | void clear(); 27 | 28 | ///Appends a single character 29 | void appendChar(const TCHAR chr); 30 | ///Appends a copy of the string value 31 | void append(const TCHAR* value); 32 | ///Appends a copy of the string value 33 | void append(const TCHAR* value, size_t appendedLength); 34 | ///Appends an integer (after conversion to a character string) with a default radix of 10. Radixes lower than 10 not supported. 35 | void appendInt(const int64_t value, const int32_t _Radix = 10); 36 | ///Appends a float_t (after conversion to a character string) 37 | void appendFloat(const float_t value, const size_t digits); 38 | ///Appends a Lucene boost, formatted in the format used in the toString() functions; replaces JL's ToStringUtils::boost 39 | void appendBoost(const float_t boost); 40 | ///Appends a bool in the same way as java StringBuffer does (i.e. "true", "false" ) 41 | void appendBool(const bool value); 42 | ///Puts a copy of the string value in front of the current string in the StringBuffer 43 | void prepend(const TCHAR* value); 44 | ///Puts a copy of the string value in front of the current string in the StringBuffer 45 | void prepend(const TCHAR* value, size_t prependedLength); 46 | 47 | StringBuffer& operator<< (const TCHAR* value) 48 | { 49 | append(value); 50 | return *this; 51 | } 52 | StringBuffer& operator<< (const int64_t value) 53 | { 54 | appendInt(value); 55 | return *this; 56 | } 57 | 58 | void setCharAt(size_t pos, const TCHAR chr); 59 | TCHAR charAt(size_t pos); 60 | 61 | void insert(const size_t pos, TCHAR chr); 62 | void insert(const size_t pos, const TCHAR* chrs, size_t length = -1); 63 | void deleteCharAt(size_t pos); 64 | void deleteChars(size_t start, size_t end); 65 | 66 | void toLower(); 67 | bool substringEquals(size_t start, size_t end, const TCHAR* str, size_t length = -1) const; 68 | 69 | ///Contains the length of string in the StringBuffer 70 | ///Public so that analyzers can edit the length directly 71 | size_t len; 72 | ///Returns the length of the string in the StringBuffer 73 | size_t length() const; 74 | ///Returns a copy of the current string in the StringBuffer 75 | TCHAR* toString(); 76 | ///Returns a null terminated reference to the StringBuffer's text 77 | TCHAR* getBuffer(); 78 | /** Returns a null terminated reference to the StringBuffer's text 79 | * the StringBuffer's buffer is released so that the text doesn't need to be copied 80 | */ 81 | TCHAR* giveBuffer(); 82 | 83 | ///reserve a minimum amount of data for the buffer. 84 | ///no change made if the buffer is already longer than length 85 | void reserve(const size_t length); 86 | private: 87 | ///A buffer that contains strings 88 | TCHAR* buffer; 89 | ///The length of the buffer 90 | size_t bufferLength; 91 | bool bufferOwner; 92 | 93 | ///Has the buffer grown to a minimum length of minLength or bigger and shifts the 94 | ///current string in buffer by skippingNInitialChars forward 95 | void growBuffer(const size_t minLength, const size_t skippingNInitialChars=0); 96 | 97 | }; 98 | CL_NS_END 99 | #endif 100 | -------------------------------------------------------------------------------- /src/clucene_bindings.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | //Thanks bnoordhuis and jerrysv from #node.js 5 | 6 | #include 7 | 8 | #include 9 | #include 10 | #include "Misc.h" 11 | #include "repl_tchar.h" 12 | #include "StringBuffer.h" 13 | 14 | using namespace node; 15 | using namespace v8; 16 | using namespace std; 17 | using namespace lucene::index; 18 | using namespace lucene::analysis; 19 | using namespace lucene::util; 20 | using namespace lucene::store; 21 | using namespace lucene::document; 22 | using namespace lucene::search; 23 | using namespace lucene::queryParser; 24 | 25 | const static int CL_MAX_DIR = 220; 26 | 27 | #define REQ_ARG_COUNT_AND_TYPE(I, TYPE) \ 28 | if (args.Length() < (I + 1) ) { \ 29 | return ThrowException(Exception::RangeError(String::New("A least " #I " arguments are required"))); \ 30 | } else if (!args[I]->Is##TYPE()) { \ 31 | return ThrowException(Exception::TypeError(String::New("Argument " #I " must be a " #TYPE))); \ 32 | } 33 | 34 | #define REQ_FUN_ARG(I, VAR) \ 35 | REQ_ARG_COUNT_AND_TYPE(I, Function) \ 36 | Local VAR = Local::Cast(args[I]); 37 | 38 | #define REQ_STR_ARG(I) REQ_ARG_COUNT_AND_TYPE(I, String) 39 | #define REQ_NUM_ARG(I) REQ_ARG_COUNT_AND_TYPE(I, Number) 40 | #define REQ_OBJ_ARG(I) REQ_ARG_COUNT_AND_TYPE(I, Object) 41 | 42 | #define REQ_OBJ_TYPE(OBJ, TYPE) \ 43 | if (!OBJ->GetConstructorName()->Equals(String::New(#TYPE))) { \ 44 | return ThrowException(Exception::TypeError(String::New("Expected a " #TYPE " type."))); \ 45 | } 46 | 47 | class LuceneDocument : public ObjectWrap { 48 | public: 49 | static void Initialize(v8::Handle target) { 50 | HandleScope scope; 51 | 52 | Local t = FunctionTemplate::New(New); 53 | 54 | t->InstanceTemplate()->SetInternalFieldCount(1); 55 | 56 | NODE_SET_PROTOTYPE_METHOD(t, "addField", AddField); 57 | NODE_SET_PROTOTYPE_METHOD(t, "clear", Clear); 58 | 59 | target->Set(String::NewSymbol("Document"), t->GetFunction()); 60 | } 61 | 62 | Document* document() { return &doc_; } 63 | 64 | void Ref() { ObjectWrap::Ref(); } 65 | void Unref() { ObjectWrap::Unref(); } 66 | 67 | protected: 68 | static Handle New(const Arguments& args) { 69 | HandleScope scope; 70 | 71 | LuceneDocument* newDoc = new LuceneDocument(); 72 | newDoc->Wrap(args.This()); 73 | 74 | return scope.Close(args.This()); 75 | } 76 | 77 | // args: 78 | // String* key 79 | // String* value 80 | // Integer flags 81 | static Handle AddField(const Arguments& args) { 82 | HandleScope scope; 83 | 84 | REQ_STR_ARG(0); 85 | REQ_STR_ARG(1); 86 | REQ_NUM_ARG(2); 87 | 88 | LuceneDocument* docWrapper = ObjectWrap::Unwrap(args.This()); 89 | 90 | TCHAR* key = STRDUP_AtoT(*String::Utf8Value(args[0])); 91 | TCHAR* value = STRDUP_AtoT(*String::Utf8Value(args[1])); 92 | 93 | try { 94 | Field* field = _CLNEW Field(key, value, args[2]->Int32Value()); 95 | delete key; 96 | delete value; 97 | docWrapper->document()->add(*field); 98 | } catch (CLuceneError& E) { 99 | delete key; 100 | delete value; 101 | return scope.Close(ThrowException(Exception::TypeError(String::New(E.what())))); 102 | } catch(...) { 103 | delete key; 104 | delete value; 105 | return scope.Close(ThrowException(Exception::Error(String::New("Unknown internal error while adding field")))); 106 | } 107 | 108 | return scope.Close(Undefined()); 109 | } 110 | 111 | static Handle Clear(const Arguments& args) { 112 | HandleScope scope; 113 | 114 | LuceneDocument* docWrapper = ObjectWrap::Unwrap(args.This()); 115 | docWrapper->document()->clear(); 116 | 117 | return scope.Close(Undefined()); 118 | } 119 | 120 | LuceneDocument() : ObjectWrap() { 121 | } 122 | 123 | ~LuceneDocument() { 124 | } 125 | private: 126 | Document doc_; 127 | }; 128 | 129 | class Lucene : public ObjectWrap { 130 | 131 | static Persistent s_ct; 132 | 133 | private: 134 | int m_count; 135 | typedef std::map IndexReaderMap; 136 | IndexReaderMap readers_; 137 | typedef std::map FSDirectoryMap; 138 | FSDirectoryMap directories_; 139 | IndexWriter* writer_; 140 | lucene::analysis::standard::StandardAnalyzer* an_; 141 | 142 | private: 143 | IndexReader* get_reader(const std::string &index, std::string &error) { 144 | IndexReader* reader = 0; 145 | //printf("Index: %s\n", index.c_str()); 146 | try { 147 | IndexReaderMap::iterator it = readers_.find(index); 148 | if (it == readers_.end()) { 149 | //printf("Open: %s\n", index.c_str()); 150 | FSDirectory* directory = 0; 151 | FSDirectoryMap::iterator dir_it = directories_.find(index); 152 | if (dir_it == directories_.end()) { 153 | directory = FSDirectory::getDirectory(index.c_str()); 154 | directories_[index] = directory; 155 | } else { 156 | directory = dir_it->second; 157 | } 158 | reader = IndexReader::open(directory); 159 | } else { 160 | //printf("Reopen: %s\n", index.c_str()); 161 | reader = it->second; 162 | IndexReader* newreader = reader->reopen(); 163 | if (newreader != reader) { 164 | //printf("Newreader != reader: %s\n", index.c_str()); 165 | //reader->close(); 166 | _CLLDELETE(reader); 167 | reader = newreader; 168 | } 169 | } 170 | //printf("Finished opening: %s\n", index.c_str()); 171 | readers_[index] = reader; 172 | 173 | } catch (CLuceneError& E) { 174 | printf("get_reader Exception: %s\n", E.what()); 175 | error.assign(E.what()); 176 | } catch(...) { 177 | error = "Got an unknown exception \n"; 178 | printf("get_reader Exception:"); 179 | } 180 | return reader; 181 | } 182 | 183 | void close_reader(const std::string& index) { 184 | IndexReader* reader = 0; 185 | IndexReaderMap::iterator it = readers_.find(index); 186 | if (it != readers_.end()) { 187 | reader = it->second; 188 | reader->close(); 189 | _CLLDELETE(reader); 190 | reader = 0; 191 | readers_.erase(index); 192 | } 193 | } 194 | public: 195 | 196 | static void Init(Handle target) { 197 | HandleScope scope; 198 | 199 | Local t = FunctionTemplate::New(New); 200 | 201 | s_ct = Persistent::New(t); 202 | s_ct->InstanceTemplate()->SetInternalFieldCount(1); 203 | s_ct->SetClassName(String::NewSymbol("Lucene")); 204 | 205 | NODE_SET_PROTOTYPE_METHOD(s_ct, "addDocument", AddDocumentAsync); 206 | NODE_SET_PROTOTYPE_METHOD(s_ct, "deleteDocument", DeleteDocumentAsync); 207 | NODE_SET_PROTOTYPE_METHOD(s_ct, "deleteDocumentsByType", DeleteDocumentsByTypeAsync); 208 | NODE_SET_PROTOTYPE_METHOD(s_ct, "search", SearchAsync); 209 | NODE_SET_PROTOTYPE_METHOD(s_ct, "optimize", OptimizeAsync); 210 | NODE_SET_PROTOTYPE_METHOD(s_ct, "closeWriter", CloseWriter); 211 | 212 | target->Set(String::NewSymbol("Lucene"), s_ct->GetFunction()); 213 | } 214 | 215 | Lucene() : ObjectWrap(), m_count(0), writer_(0), an_(0) {} 216 | 217 | ~Lucene() { } 218 | 219 | static Handle New(const Arguments& args) { 220 | HandleScope scope; 221 | Lucene* lucene = new Lucene(); 222 | lucene->writer_ = 0; 223 | lucene->Wrap(args.This()); 224 | return scope.Close(args.This()); 225 | } 226 | 227 | struct index_baton_t { 228 | Lucene* lucene; 229 | LuceneDocument* doc; 230 | std::string docID; 231 | std::string index; 232 | Persistent callback; 233 | uint64_t indexTime; 234 | int32_t docCount; 235 | std::string error; 236 | }; 237 | 238 | static Handle CloseWriter(const Arguments& args) { 239 | HandleScope scope; 240 | 241 | REQ_OBJ_TYPE(args.This(), Lucene); 242 | Lucene* lucene = ObjectWrap::Unwrap(args.This()); 243 | 244 | if (lucene->writer_ != 0) { 245 | lucene->writer_->flush(); 246 | lucene->writer_->close(true); 247 | delete lucene->writer_; 248 | lucene->writer_ = 0; 249 | 250 | delete lucene->an_; 251 | lucene->an_ = 0; 252 | } 253 | //printf("Deleted index writer\n"); 254 | 255 | return scope.Close(Undefined()); 256 | } 257 | 258 | // args: 259 | // String* docID 260 | // Document* doc 261 | // String* indexPath 262 | static Handle AddDocumentAsync(const Arguments& args) { 263 | HandleScope scope; 264 | 265 | REQ_STR_ARG(0); 266 | REQ_OBJ_ARG(1); 267 | REQ_STR_ARG(2); 268 | REQ_FUN_ARG(3, callback); 269 | 270 | REQ_OBJ_TYPE(args.This(), Lucene); 271 | Lucene* lucene = ObjectWrap::Unwrap(args.This()); 272 | 273 | 274 | index_baton_t* baton = new index_baton_t; 275 | baton->lucene = lucene; 276 | baton->docID.assign(*v8::String::Utf8Value(args[0])); 277 | baton->doc = ObjectWrap::Unwrap(args[1]->ToObject()); 278 | baton->index.assign(*v8::String::Utf8Value(args[2])); 279 | baton->callback = Persistent::New(callback); 280 | baton->error.clear(); 281 | 282 | lucene->Ref(); 283 | baton->doc->Ref(); 284 | 285 | 286 | eio_custom(EIO_Index, EIO_PRI_DEFAULT, EIO_AfterIndex, baton); 287 | 288 | 289 | ev_ref(EV_DEFAULT_UC); 290 | 291 | return scope.Close(Undefined()); 292 | } 293 | 294 | 295 | static void EIO_Index(eio_req* req) { 296 | 297 | index_baton_t* baton = static_cast(req->data); 298 | 299 | 300 | 301 | try { 302 | bool needsCreation = true; 303 | std::string error; 304 | if (IndexReader::indexExists(baton->index.c_str())) { 305 | if (IndexReader::isLocked(baton->index.c_str())) { 306 | IndexReader::unlock(baton->index.c_str()); 307 | } 308 | needsCreation = false; 309 | } 310 | 311 | // We keep shared instances of the index modifiers because you can only have one per index 312 | if (baton->lucene->writer_ == 0) { 313 | baton->lucene->an_ = new lucene::analysis::standard::StandardAnalyzer; 314 | baton->lucene->writer_ = new IndexWriter(baton->index.c_str(), baton->lucene->an_, needsCreation); 315 | //printf("New index writer\n"); 316 | baton->lucene->writer_->setRAMBufferSizeMB(5); 317 | 318 | // To bypass a possible exception (we have no idea what we will be indexing...) 319 | baton->lucene->writer_->setMaxFieldLength(0x7FFFFFFFL); // LUCENE_INT32_MAX_SHOULDBE 320 | // Turn this off to make indexing faster; we'll turn it on later before optimizing 321 | baton->lucene->writer_->setUseCompoundFile(false); 322 | } 323 | 324 | uint64_t start = Misc::currentTimeMillis(); 325 | 326 | // replace document._id if it's also set in the document itself 327 | TCHAR key[CL_MAX_DIR]; 328 | STRCPY_AtoT(key, "_id", CL_MAX_DIR); 329 | TCHAR* value = STRDUP_AtoT(baton->docID.c_str()); 330 | baton->doc->document()->removeFields(key); 331 | Field* field = _CLNEW Field(key, value, Field::STORE_YES|Field::INDEX_UNTOKENIZED); 332 | baton->doc->document()->add(*field); 333 | 334 | Term* term = new Term(key, value); 335 | //_tprintf(_T("Fields: %S\n"), baton->doc->document()->toString()); 336 | //_tprintf(_T("Term k(%S) v(%S)\n"), key, value); 337 | baton->lucene->writer_->updateDocument(term, baton->doc->document()); 338 | _CLDECDELETE(term); 339 | 340 | delete value; 341 | 342 | // Make the index use as little files as possible, and optimize it 343 | 344 | //writer->optimize(); 345 | 346 | baton->lucene->close_reader(baton->index); 347 | /* 348 | baton->lucene->writer_->flush(); 349 | baton->lucene->writer_->close(true); 350 | delete baton->lucene->writer_; 351 | baton->lucene->writer_ = 0; 352 | */ 353 | 354 | //writer->close(); 355 | //delete writer; 356 | //writer = 0; 357 | 358 | baton->indexTime = (Misc::currentTimeMillis() - start); 359 | } catch (CLuceneError& E) { 360 | baton->error.assign(E.what()); 361 | } catch(...) { 362 | baton->error = "Got an unknown exception"; 363 | } 364 | 365 | //(*(*baton->index), &an, false); 366 | return; 367 | } 368 | 369 | static int EIO_AfterIndex(eio_req* req) { 370 | HandleScope scope; 371 | index_baton_t* baton = static_cast(req->data); 372 | ev_unref(EV_DEFAULT_UC); 373 | baton->lucene->Unref(); 374 | baton->doc->Unref(); 375 | 376 | Handle argv[2]; 377 | 378 | if (!baton->error.empty()) { 379 | argv[0] = v8::String::New(baton->error.c_str()); 380 | argv[1] = Undefined(); 381 | } 382 | else { 383 | argv[0] = Undefined(); 384 | argv[1] = v8::Integer::NewFromUnsigned((uint32_t)baton->indexTime); 385 | } 386 | 387 | TryCatch tryCatch; 388 | 389 | baton->callback->Call(Context::GetCurrent()->Global(), 2, argv); 390 | 391 | if (tryCatch.HasCaught()) { 392 | FatalException(tryCatch); 393 | } 394 | 395 | baton->callback.Dispose(); 396 | delete baton; 397 | return 0; 398 | } 399 | 400 | 401 | struct indexdelete_baton_t { 402 | Lucene* lucene; 403 | v8::String::Utf8Value* docID; 404 | std::string index; 405 | Persistent callback; 406 | uint64_t indexTime; 407 | uint64_t docsDeleted; 408 | std::string error; 409 | }; 410 | 411 | // args: 412 | // String* docID 413 | // String* indexPath 414 | static Handle DeleteDocumentAsync(const Arguments& args) { 415 | HandleScope scope; 416 | 417 | REQ_STR_ARG(0); 418 | REQ_STR_ARG(1); 419 | REQ_FUN_ARG(2, callback); 420 | 421 | REQ_OBJ_TYPE(args.This(), Lucene); 422 | Lucene* lucene = ObjectWrap::Unwrap(args.This()); 423 | 424 | indexdelete_baton_t* baton = new indexdelete_baton_t; 425 | baton->lucene = lucene; 426 | baton->docID = new v8::String::Utf8Value(args[0]); 427 | baton->index = *v8::String::Utf8Value(args[1]); 428 | baton->callback = Persistent::New(callback); 429 | baton->error.clear(); 430 | 431 | lucene->Ref(); 432 | 433 | eio_custom(EIO_DeleteDocument, EIO_PRI_DEFAULT, EIO_AfterDeleteDocument, baton); 434 | ev_ref(EV_DEFAULT_UC); 435 | 436 | return scope.Close(Undefined()); 437 | } 438 | 439 | 440 | static void EIO_DeleteDocument(eio_req* req) { 441 | indexdelete_baton_t* baton = static_cast(req->data); 442 | 443 | lucene::analysis::standard::StandardAnalyzer an; 444 | 445 | IndexReader* reader = baton->lucene->get_reader(baton->index, baton->error); 446 | if (!baton->error.empty()) { 447 | return; 448 | } 449 | 450 | uint64_t start = Misc::currentTimeMillis(); 451 | 452 | TCHAR key[CL_MAX_DIR]; 453 | STRCPY_AtoT(key, "_id", CL_MAX_DIR); 454 | TCHAR value[CL_MAX_DIR]; 455 | STRCPY_AtoT(value, *(*baton->docID), CL_MAX_DIR); 456 | 457 | try { 458 | reader->deleteDocuments(new Term(key, value)); 459 | 460 | baton->indexTime = (Misc::currentTimeMillis() - start); 461 | baton->lucene->close_reader(baton->index); 462 | } catch (CLuceneError& E) { 463 | baton->error.assign(E.what()); 464 | } catch(...) { 465 | baton->error = "Got an unknown exception"; 466 | } 467 | //(*(*baton->index), &an, false); 468 | 469 | return; 470 | } 471 | 472 | static int EIO_AfterDeleteDocument(eio_req* req) { 473 | HandleScope scope; 474 | indexdelete_baton_t* baton = static_cast(req->data); 475 | ev_unref(EV_DEFAULT_UC); 476 | baton->lucene->Unref(); 477 | 478 | Handle argv[2]; 479 | 480 | if (!baton->error.empty()) { 481 | argv[0] = v8::String::New(baton->error.c_str()); 482 | argv[1] = Undefined(); 483 | } 484 | else { 485 | argv[0] = Undefined(); 486 | argv[1] = v8::Integer::NewFromUnsigned((uint32_t)baton->indexTime); 487 | } 488 | 489 | TryCatch tryCatch; 490 | 491 | baton->callback->Call(Context::GetCurrent()->Global(), 2, argv); 492 | 493 | if (tryCatch.HasCaught()) { 494 | FatalException(tryCatch); 495 | } 496 | 497 | baton->callback.Dispose(); 498 | delete baton; 499 | return 0; 500 | } 501 | 502 | struct indexdeletebytype_baton_t { 503 | Lucene* lucene; 504 | std::string type; 505 | std::string index; 506 | Persistent callback; 507 | uint64_t indexTime; 508 | std::string error; 509 | }; 510 | 511 | // args: 512 | // String* docID 513 | // String* indexPath 514 | static Handle DeleteDocumentsByTypeAsync(const Arguments& args) { 515 | HandleScope scope; 516 | 517 | REQ_STR_ARG(0); 518 | REQ_STR_ARG(1); 519 | REQ_FUN_ARG(2, callback); 520 | 521 | REQ_OBJ_TYPE(args.This(), Lucene); 522 | Lucene* lucene = ObjectWrap::Unwrap(args.This()); 523 | 524 | indexdeletebytype_baton_t* baton = new indexdeletebytype_baton_t; 525 | baton->lucene = lucene; 526 | baton->type = *v8::String::Utf8Value(args[0]); 527 | baton->index = *v8::String::Utf8Value(args[1]); 528 | baton->callback = Persistent::New(callback); 529 | baton->error.clear(); 530 | lucene->Ref(); 531 | 532 | eio_custom(EIO_DeleteDocumentsByType, EIO_PRI_DEFAULT, EIO_AfterDeleteDocumentsByType, baton); 533 | ev_ref(EV_DEFAULT_UC); 534 | 535 | return scope.Close(Undefined()); 536 | } 537 | 538 | 539 | static void EIO_DeleteDocumentsByType(eio_req* req) { 540 | indexdeletebytype_baton_t* baton = static_cast(req->data); 541 | 542 | lucene::analysis::standard::StandardAnalyzer an; 543 | 544 | try { 545 | IndexReader* reader = baton->lucene->get_reader(baton->index, baton->error); 546 | if (!baton->error.empty()) { 547 | return; 548 | } 549 | 550 | uint64_t start = Misc::currentTimeMillis(); 551 | 552 | TCHAR key[CL_MAX_DIR]; 553 | STRCPY_AtoT(key, "_type", CL_MAX_DIR); 554 | TCHAR value[CL_MAX_DIR]; 555 | STRCPY_AtoT(value, baton->type.c_str(), CL_MAX_DIR); 556 | reader->deleteDocuments(new Term(key, value)); 557 | 558 | baton->indexTime = (Misc::currentTimeMillis() - start); 559 | baton->lucene->close_reader(baton->index); 560 | } catch (CLuceneError& E) { 561 | baton->error.assign(E.what()); 562 | } catch(...) { 563 | baton->error = "Got an unknown exception"; 564 | } 565 | 566 | return; 567 | } 568 | 569 | static int EIO_AfterDeleteDocumentsByType(eio_req* req) { 570 | HandleScope scope; 571 | indexdeletebytype_baton_t* baton = static_cast(req->data); 572 | ev_unref(EV_DEFAULT_UC); 573 | baton->lucene->Unref(); 574 | 575 | Handle argv[2]; 576 | 577 | if (!baton->error.empty()) { 578 | argv[0] = v8::String::New(baton->error.c_str()); 579 | argv[1] = Undefined(); 580 | } 581 | else { 582 | argv[0] = Undefined(); 583 | argv[1] = v8::Integer::NewFromUnsigned((uint32_t)baton->indexTime); 584 | } 585 | 586 | TryCatch tryCatch; 587 | 588 | baton->callback->Call(Context::GetCurrent()->Global(), 2, argv); 589 | 590 | if (tryCatch.HasCaught()) { 591 | FatalException(tryCatch); 592 | } 593 | 594 | baton->callback.Dispose(); 595 | delete baton; 596 | return 0; 597 | } 598 | 599 | 600 | struct search_field 601 | { 602 | search_field(const std::string& key_, const std::string& value_) : key(key_), value(value_) 603 | { } 604 | std::string key; 605 | std::string value; 606 | }; 607 | 608 | struct search_doc 609 | { 610 | float score; 611 | std::vector fields; 612 | }; 613 | 614 | struct search_baton_t 615 | { 616 | Lucene* lucene; 617 | std::string index; 618 | std::string search; 619 | uint64_t searchTime; 620 | std::vector docs; 621 | Persistent callback; 622 | std::string error; 623 | }; 624 | 625 | static Handle SearchAsync(const Arguments& args) { 626 | HandleScope scope; 627 | 628 | REQ_STR_ARG(0); 629 | REQ_STR_ARG(1); 630 | REQ_FUN_ARG(2, callback); 631 | 632 | REQ_OBJ_TYPE(args.This(), Lucene); 633 | Lucene* lucene = ObjectWrap::Unwrap(args.This()); 634 | 635 | search_baton_t* baton = new search_baton_t; 636 | baton->lucene = lucene; 637 | baton->index.assign(*v8::String::Utf8Value(args[0])); 638 | baton->search.assign(*v8::String::Utf8Value(args[1])); 639 | baton->callback = Persistent::New(callback); 640 | baton->error.clear(); 641 | 642 | lucene->Ref(); 643 | 644 | eio_custom(EIO_Search, EIO_PRI_DEFAULT, EIO_AfterSearch, baton); 645 | ev_ref(EV_DEFAULT_UC); 646 | 647 | return scope.Close(Undefined()); 648 | } 649 | 650 | static void EIO_Search(eio_req* req) 651 | { 652 | search_baton_t* baton = static_cast(req->data); 653 | uint64_t start = Misc::currentTimeMillis(); 654 | 655 | standard::StandardAnalyzer analyzer; 656 | IndexReader* reader = baton->lucene->get_reader(baton->index, baton->error); 657 | 658 | if (!baton->error.empty()) { 659 | return; 660 | } 661 | 662 | IndexSearcher s(reader); 663 | 664 | try { 665 | TCHAR* searchString = STRDUP_AtoT(baton->search.c_str()); 666 | Query* q = QueryParser::parse(searchString, _T("_id"), &analyzer); 667 | Hits* hits = s.search(q); 668 | free(searchString); 669 | // Build the result array 670 | for (size_t i=0; i < hits->length(); i++) { 671 | Document& doc(hits->doc(i)); 672 | // {"id":"ab34", "score":1.0} 673 | search_doc newDoc; 674 | newDoc.score = hits->score(i); 675 | 676 | Document::FieldsType* fields = const_cast(doc.getFields()); 677 | DocumentFieldEnumeration fieldEnum(fields->begin(), fields->end()); 678 | while (fieldEnum.hasMoreElements()) { 679 | Field* curField = fieldEnum.nextElement(); 680 | 681 | char* fieldName = STRDUP_TtoA(curField->name()); 682 | char* fieldValue = STRDUP_TtoA(curField->stringValue()); 683 | 684 | newDoc.fields.push_back(search_field(fieldName, fieldValue)); 685 | 686 | free(fieldName); 687 | free(fieldValue); 688 | } 689 | baton->docs.push_back(newDoc); 690 | } 691 | s.close(); 692 | _CLLDELETE(hits); 693 | _CLLDELETE(q); 694 | baton->searchTime = (Misc::currentTimeMillis() - start); 695 | } catch (CLuceneError& E) { 696 | baton->error.assign(E.what()); 697 | } catch(...) { 698 | baton->error = "Got an unknown exception"; 699 | } 700 | 701 | return; 702 | } 703 | 704 | static int EIO_AfterSearch(eio_req* req) 705 | { 706 | HandleScope scope; 707 | search_baton_t* baton = static_cast(req->data); 708 | ev_unref(EV_DEFAULT_UC); 709 | baton->lucene->Unref(); 710 | 711 | Handle argv[3]; 712 | 713 | if (baton->error.empty()) { 714 | argv[0] = Null(); // Error arg, defaulting to no error 715 | 716 | Local resultArray = v8::Array::New(); 717 | for (uint32_t i = 0; i < baton->docs.size(); ++i) { 718 | search_doc& doc(baton->docs[i]); 719 | Local resultObject = Object::New(); 720 | for (uint32_t j = 0; j < doc.fields.size(); ++j) { 721 | search_field& field(doc.fields[j]); 722 | resultObject->Set(String::New(field.key.c_str()), String::New(field.value.c_str())); 723 | } 724 | resultObject->Set(String::New("score"), Number::New(doc.score)); 725 | resultArray->Set(i, resultObject); 726 | } 727 | 728 | argv[1] = resultArray; 729 | argv[2] = v8::Integer::NewFromUnsigned((uint32_t)baton->searchTime); 730 | } else { 731 | argv[0] = String::New(baton->error.c_str()); 732 | argv[1] = Null(); 733 | argv[2] = Null(); 734 | } 735 | 736 | TryCatch tryCatch; 737 | 738 | baton->callback->Call(Context::GetCurrent()->Global(), 3, argv); 739 | 740 | if (tryCatch.HasCaught()) { 741 | FatalException(tryCatch); 742 | } 743 | 744 | baton->callback.Dispose(); 745 | delete baton; 746 | 747 | return 0; 748 | } 749 | 750 | struct optimize_baton_t 751 | { 752 | Lucene* lucene; 753 | Persistent callback; 754 | std::string index; 755 | std::string error; 756 | }; 757 | 758 | static Handle OptimizeAsync(const Arguments& args) 759 | { 760 | HandleScope scope; 761 | 762 | REQ_STR_ARG(0); 763 | REQ_FUN_ARG(1, callback); 764 | 765 | REQ_OBJ_TYPE(args.This(), Lucene); 766 | Lucene* lucene = ObjectWrap::Unwrap(args.This()); 767 | 768 | optimize_baton_t* baton = new optimize_baton_t; 769 | baton->lucene = lucene; 770 | baton->callback = Persistent::New(callback); 771 | baton->index = *v8::String::Utf8Value(args[0]); 772 | baton->error.clear(); 773 | 774 | lucene->Ref(); 775 | 776 | eio_custom(EIO_Optimize, EIO_PRI_DEFAULT, EIO_AfterOptimize, baton); 777 | ev_ref(EV_DEFAULT_UC); 778 | 779 | return scope.Close(Undefined()); 780 | } 781 | 782 | static void EIO_Optimize(eio_req* req) 783 | { 784 | optimize_baton_t* baton = static_cast(req->data); 785 | 786 | try { 787 | 788 | baton->lucene->close_reader(baton->index); 789 | bool needsCreation = true; 790 | if (IndexReader::indexExists(baton->index.c_str())) { 791 | if (IndexReader::isLocked(baton->index.c_str())) { 792 | IndexReader::unlock(baton->index.c_str()); 793 | } 794 | needsCreation = false; 795 | } 796 | 797 | standard::StandardAnalyzer an; 798 | IndexWriter* writer = new IndexWriter(baton->index.c_str(), &an, needsCreation); 799 | writer->setUseCompoundFile(false); 800 | writer->optimize(); 801 | 802 | writer->close(); 803 | 804 | } catch (CLuceneError& E) { 805 | baton->error.assign(E.what()); 806 | } catch(...) { 807 | baton->error = "Got an unknown exception"; 808 | } 809 | 810 | return; 811 | } 812 | 813 | static int EIO_AfterOptimize(eio_req* req) 814 | { 815 | HandleScope scope; 816 | 817 | optimize_baton_t* baton = static_cast(req->data); 818 | 819 | ev_unref(EV_DEFAULT_UC); 820 | baton->lucene->Unref(); 821 | 822 | Handle argv[1]; 823 | 824 | if (!baton->error.empty()) { 825 | argv[0] = v8::String::New(baton->error.c_str()); 826 | } 827 | else { 828 | argv[0] = Undefined(); 829 | } 830 | 831 | TryCatch tryCatch; 832 | 833 | baton->callback->Call(Context::GetCurrent()->Global(), 1, argv); 834 | 835 | if (tryCatch.HasCaught()) { 836 | FatalException(tryCatch); 837 | } 838 | 839 | baton->callback.Dispose(); 840 | delete baton; 841 | 842 | return 0; 843 | } 844 | }; 845 | 846 | Persistent Lucene::s_ct; 847 | 848 | extern "C" void init(Handle target) { 849 | Lucene::Init(target); 850 | LuceneDocument::Initialize(target); 851 | } 852 | 853 | NODE_MODULE(clucene_bindings, init); 854 | -------------------------------------------------------------------------------- /src/repl_tchar.h: -------------------------------------------------------------------------------- 1 | /*------------------------------------------------------------------------------ 2 | * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team 3 | * 4 | * Distributable under the terms of either the Apache License (Version 2.0) or 5 | * the GNU Lesser General Public License, as specified in the COPYING file. 6 | ------------------------------------------------------------------------------*/ 7 | #ifndef _REPL_TCHAR_H 8 | #define _REPL_TCHAR_H 9 | 10 | #ifndef _CL_HAVE_TCHAR_H 11 | #if defined(_UCS2) 12 | 13 | //note: descriptions with * in front have replacement functions 14 | 15 | //formatting functions 16 | #define _sntprintf swprintf //* make a formatted a string 17 | #define _tprintf wprintf //* print a formatted string 18 | 19 | //this one has no replacement functions yet, but it is only used in the tests 20 | #define _vsntprintf vsnwprintf //* print a formatted string using variable arguments 21 | 22 | //we are using the internal functions of the compiler here 23 | //if LUCENE_USE_INTERNAL_CHAR_FUNCTIONS is defined, thesse 24 | //will be replaced by internal functions 25 | #define _istalnum iswalnum //* alpha/numeric char check 26 | #define _istalpha iswalpha //* alpha char check 27 | #define _istspace iswspace //* space char check 28 | #define _istdigit iswdigit //* digit char check 29 | #define _totlower towlower //* convert char to lower case 30 | #define _totupper towupper //* convert char to lower case 31 | #define _tcslwr wcslwr //* convert string to lower case 32 | 33 | //these are the string handling functions 34 | //we may need to create wide-character/multi-byte replacements for these 35 | #define _tcscpy wcscpy //copy a string to another string 36 | #define _tcsncpy wcsncpy //copy a specified amount of one string to another string. 37 | #define _tcscat wcscat //copy a string onto the end of the other string 38 | #define _tcsncat wcsncat 39 | #define _tcschr wcschr //find location of one character 40 | #define _tcsstr wcsstr //find location of a string 41 | #define _tcslen wcslen //get length of a string 42 | #define _tcscmp wcscmp //case sensitive compare two strings 43 | #define _tcsncmp wcsncmp //case sensitive compare two strings 44 | #define _tcscspn wcscspn //location of any of a set of character in a string 45 | 46 | //string compare 47 | #ifdef _CL_HAVE_FUNCTION_WCSICMP 48 | #define _tcsicmp wcsicmp //* case insensitive compare two string 49 | #else 50 | #define _tcsicmp wcscasecmp //* case insensitive compare two string 51 | #endif 52 | #if defined(_CL_HAVE_FUNCTION_WCSDUP) 53 | #define _tcsdup wcsdup 54 | #else 55 | #define _tcsdup lucene_wcsdup 56 | #endif 57 | 58 | //conversion functions 59 | #define _tcstod wcstod //convert a string to a double 60 | #define _tcstoi64 wcstoll //* convers a string to an 64bit bit integer 61 | #define _itot _i64tot 62 | #define _i64tot lltow //* converts a 64 bit integer to a string (with base) 63 | #else //if defined(_ASCII) 64 | 65 | //formatting functions 66 | #define _sntprintf snprintf 67 | #define _tprintf printf 68 | #define _vsntprintf vsnprintf 69 | 70 | //we are using the internal functions of the compiler here 71 | //if LUCENE_USE_INTERNAL_CHAR_FUNCTIONS is defined, thesse 72 | //will be replaced by internal functions 73 | #define _istalnum isalnum 74 | #define _istalpha isalpha 75 | #define _istspace isspace 76 | #define _istdigit isdigit 77 | #define _totlower tolower 78 | #define _totupper toupper 79 | #define _tcslwr strlwr 80 | 81 | //these are the string handling functions 82 | #define _tcscpy strcpy 83 | #define _tcsncpy strncpy 84 | #define _tcscat strcat 85 | #define _tcsncat strncat 86 | #define _tcschr strchr 87 | #define _tcsstr strstr 88 | #define _tcslen strlen 89 | #define _tcscmp strcmp 90 | #define _tcsncmp strncmp 91 | #define _tcsicmp strcasecmp 92 | #define _tcscspn strcspn 93 | #define _tcsdup strdup //string duplicate 94 | //converstion methods 95 | #define _tcstod strtod 96 | #define _tcstoi64 strtoll 97 | #define _itot _i64tot 98 | #define _i64tot lltoa 99 | 100 | #endif 101 | 102 | #else //HAVE_TCHAR_H 103 | #include 104 | 105 | //some tchar headers miss these... 106 | #ifndef _tcstoi64 107 | #if defined(_UCS2) 108 | #define _tcstoi64 wcstoll //* convers a string to an 64bit bit integer 109 | #else 110 | #define _tcstoi64 strtoll 111 | #endif 112 | #endif 113 | 114 | #endif //HAVE_TCHAR_H 115 | 116 | #ifndef _ttoi 117 | #define _ttoi(x) (int)_tcstoi64(x,NULL,10) 118 | #endif 119 | 120 | #ifndef _itot 121 | #define _itot(i, buf, radix) lucene_i64tot(i, buf, radix) 122 | #endif 123 | 124 | namespace std 125 | { 126 | #ifndef tstring 127 | #ifdef _UNICODE 128 | typedef wstring tstring; 129 | #else 130 | typedef string tstring; 131 | #endif 132 | #endif 133 | }; 134 | 135 | #define STRCPY_AtoA(target,src,len) strncpy(target,src,len) 136 | #define STRDUP_AtoA(x) strdup(x) 137 | 138 | #if defined(_UCS2) 139 | #define stringDuplicate(x) _tcsdup(x) 140 | 141 | #if defined(_CL_HAVE_FUNCTION_WCSDUP) 142 | #define STRDUP_WtoW wcsdup 143 | #else 144 | #define STRDUP_WtoW lucene_wcsdup 145 | #endif 146 | #define STRDUP_TtoT STRDUP_WtoW 147 | #define STRDUP_WtoT STRDUP_WtoW 148 | #define STRDUP_TtoW STRDUP_WtoW 149 | 150 | #define STRDUP_AtoW(x) CL_NS(util)::Misc::_charToWide(x) 151 | #define STRDUP_AtoT STRDUP_AtoW 152 | 153 | #define STRDUP_WtoA(x) CL_NS(util)::Misc::_wideToChar(x) 154 | #define STRDUP_TtoA STRDUP_WtoA 155 | 156 | #define STRCPY_WtoW(target,src,len) _tcsncpy(target,src,len) 157 | #define STRCPY_TtoW STRCPY_WtoW 158 | #define STRCPY_WtoT STRCPY_WtoW 159 | //#define _tcscpy STRCPY_WtoW 160 | 161 | #define STRCPY_AtoW(target,src,len) CL_NS(util)::Misc::_cpycharToWide(src,target,len) 162 | #define STRCPY_AtoT STRCPY_AtoW 163 | 164 | #define STRCPY_WtoA(target,src,len) CL_NS(util)::Misc::_cpywideToChar(src,target,len) 165 | #define STRCPY_TtoA STRCPY_WtoA 166 | #else 167 | #define stringDuplicate(x) strdup(x) 168 | #define STRDUP_AtoT STRDUP_AtoA 169 | #define STRDUP_TtoA STRDUP_AtoA 170 | #define STRDUP_TtoT STRDUP_AtoA 171 | 172 | #define STRDUP_WtoT(x) xxxxxxxxxxxxxxx //not possible 173 | #define STRCPY_WtoT(target,src,len) xxxxxxxxxxxxxxx //not possible 174 | 175 | #define STRCPY_AtoT STRCPY_AtoA 176 | #define STRCPY_TtoA STRCPY_AtoA 177 | //#define _tcscpy STRCPY_AtoA 178 | #endif 179 | 180 | 181 | #endif //_REPL_TCHAR_H 182 | -------------------------------------------------------------------------------- /test/facebook.json: -------------------------------------------------------------------------------- 1 | {"type":"photo/facebook","via":"synclet/facebook","timestamp":1314904878961,"action":"new","obj":{"source":"facebook_photo","type":"new","data":{"_id":"4e5fdb2e7629dda70fb4394f","id":"53312381820","from":{"name":"Thomas Muldowney","id":"709761820"},"picture":"http://photos-c.ak.fbcdn.net/photos-ak-snc1/v2136/83/108/709761820/s709761820_1887967_2110.jpg","source":"http://a3.sphotos.ak.fbcdn.net/photos-ak-snc1/v2136/83/108/709761820/n709761820_1887967_2110.jpg","height":453,"width":443,"images":[{"height":453,"width":443,"source":"http://a3.sphotos.ak.fbcdn.net/photos-ak-snc1/v2136/83/108/709761820/n709761820_1887967_2110.jpg"},{"height":184,"width":180,"source":"http://photos-c.ak.fbcdn.net/photos-ak-snc1/v2136/83/108/709761820/a709761820_1887967_2110.jpg"},{"height":130,"width":127,"source":"http://photos-c.ak.fbcdn.net/photos-ak-snc1/v2136/83/108/709761820/s709761820_1887967_2110.jpg"},{"height":76,"width":75,"source":"http://photos-c.ak.fbcdn.net/photos-ak-snc1/v2136/83/108/709761820/t709761820_1887967_2110.jpg"}],"link":"http://www.facebook.com/photo.php?pid=1887967&id=709761820","icon":"http://static.ak.fbcdn.net/rsrc.php/v1/yz/r/StEh3RhPvjk.gif","created_time":1233685472,"position":1,"updated_time":1233685472}}} -------------------------------------------------------------------------------- /test/heavy.js: -------------------------------------------------------------------------------- 1 | var path = require('path'); 2 | var fs = require('fs'); 3 | var wrench = require('wrench'); 4 | var async = require('async'); 5 | 6 | var cl = require('../clucene').CLucene; 7 | var clucene = new cl.Lucene(); 8 | 9 | var indexPath = './heavy.index'; 10 | 11 | var testJson = JSON.parse(fs.readFileSync("./test/facebook.json")); 12 | 13 | if (path.existsSync(indexPath)) { 14 | wrench.rmdirSyncRecursive(indexPath); 15 | } 16 | 17 | var ctr = 0; 18 | var MAX_COUNTER = 200000; 19 | var pauseForRam = false; 20 | 21 | function nextTest(nextFn) { 22 | if (pauseForRam) { 23 | console.log("All done check the ram"); 24 | setTimeout(function() { nextFn.call(); }, 10000); 25 | } else { 26 | nextFn.call(); 27 | } 28 | } 29 | 30 | function searchOnePass() { 31 | ctr = 0; 32 | console.log("Doing invdividual searches"); 33 | function runStep() { 34 | if (ctr > MAX_COUNTER) { 35 | return nextTest(deletePass); 36 | } 37 | process.nextTick(function() { 38 | clucene.search(indexPath, '_id:"' + ctr + '"', function(err, results, searchTime) { 39 | ++ctr; 40 | runStep(); 41 | }) 42 | }); 43 | } 44 | runStep(); 45 | } 46 | 47 | function deletePass() { 48 | /* 49 | ctr = 0; 50 | console.log("Doing deletes"); 51 | function runStep() { 52 | if (ctr > MAX_COUNTER) { 53 | return nextTest(function() { console.log("All done"); }); 54 | } 55 | process.nextTick(function() { 56 | clucene.search(indexPath, 'json:"Muldowney"', function(err, results, searchTime) { 57 | ++ctr; 58 | runStep(); 59 | }) 60 | }); 61 | } 62 | runStep(); 63 | */ 64 | console.log("done"); setTimeout(function() {}, 360000); 65 | } 66 | 67 | 68 | function indexPass(updates) { 69 | console.log("Index docs" + (updates ? " with updates" : "")); 70 | var doc = new cl.Document(); 71 | ctr = 0; 72 | function nextStep() { ctr++; runAgain(); } 73 | function runAgain() { 74 | if (ctr > MAX_COUNTER) { 75 | delete doc; 76 | doc = null; 77 | clucene.closeWriter(); 78 | //nextTest(function() {console.log("Here"); if (!updates) indexPass(true); else searchOnePass(); }); 79 | nextTest(deletePass); 80 | return; 81 | } 82 | process.nextTick(function() { 83 | doc.clear(); 84 | if (ctr % 1000 == 0) console.log("Adding " + ctr); 85 | testJson.newField = ctr; 86 | var jsonStr = JSON.stringify(testJson); 87 | doc.addField("json", jsonStr, cl.STORE_NO|cl.INDEX_TOKENIZED); 88 | delete jsonStr; 89 | jsonStr = null; 90 | var ctrStr = String(ctr); 91 | doc.addField("baseId", ctrStr, cl.STORE_NO|cl.INDEX_UNTOKENIZED); 92 | delete ctrStr; 93 | ctrStr = null; 94 | var docId = "id" + ctr; 95 | clucene.addDocument(docId, doc, indexPath, nextStep); 96 | }); 97 | } 98 | runAgain(); 99 | } 100 | 101 | process.stdin.on("data", function(data) { 102 | if (data == "end") process.exit(0); 103 | console.log(data); 104 | }); 105 | 106 | if (process.argv.length >= 3) { 107 | pauseForRam = true; 108 | } 109 | 110 | // Our startup ram check stop 111 | console.log("Check start size"); 112 | nextTest(indexPass); 113 | -------------------------------------------------------------------------------- /test/index.js: -------------------------------------------------------------------------------- 1 | var path = require('path'); 2 | var fs = require('fs'); 3 | var wrench = require('wrench'); 4 | 5 | var cl = require('../clucene').CLucene; 6 | var clucene = new cl.Lucene(); 7 | 8 | var indexPath = './test.index'; 9 | 10 | exports['add new document'] = function (test) { 11 | if (path.existsSync(indexPath)) { 12 | wrench.rmdirSyncRecursive(indexPath); 13 | } 14 | 15 | var doc = new cl.Document(); 16 | var docId = '1'; 17 | 18 | doc.addField('name', 'Eric Jennings', cl.STORE_YES|cl.INDEX_TOKENIZED); 19 | doc.addField('_type', 'contact', cl.STORE_YES|cl.INDEX_UNTOKENIZED); 20 | doc.addField('timestamp', '1293765885000', cl.STORE_YES|cl.INDEX_UNTOKENIZED); 21 | 22 | clucene.addDocument(docId, doc, indexPath, function(err, indexTime) { 23 | test.equal(err, null); 24 | test.ok(is('Number', indexTime)); 25 | clucene.closeWriter(); 26 | test.done(); 27 | }); 28 | }; 29 | 30 | exports['query newly-added document'] = function (test) { 31 | clucene.search(indexPath, '1', function(err, results, searchTime) { 32 | test.equal(err, null); 33 | test.ok(is('Array', results)); 34 | test.ok(is('Number', searchTime)); 35 | test.equal(results[0]._id, 1); 36 | test.equal(results[0].name, 'Eric Jennings'); 37 | test.equal(results[0]._type, 'contact'); 38 | test.equal(results[0].timestamp, '1293765885000'); 39 | test.done(); 40 | }); 41 | }; 42 | 43 | exports['update existing document'] = function (test) { 44 | var doc = new cl.Document(); 45 | var docId = '1'; 46 | 47 | doc.addField('name', 'Thomas Anderson', cl.STORE_YES|cl.INDEX_TOKENIZED); 48 | doc.addField('timestamp', '129555555555555', cl.STORE_YES|cl.INDEX_UNTOKENIZED); 49 | 50 | clucene.addDocument(docId, doc, indexPath, function(err, indexTime) { 51 | test.equal(err, null); 52 | test.ok(is('Number', indexTime)); 53 | clucene.closeWriter(); 54 | test.done(); 55 | }); 56 | }; 57 | 58 | exports['query updated document'] = function (test) { 59 | clucene.search(indexPath, '1', function(err, results, searchTime) { 60 | test.equal(err, null); 61 | test.ok(is('Array', results)); 62 | test.ok(is('Number', searchTime)); 63 | test.equal(results[0]._id, 1); 64 | test.equal(results[0].name, 'Thomas Anderson'); 65 | test.equal(results[0].timestamp, '129555555555555'); 66 | test.done(); 67 | }); 68 | }; 69 | 70 | exports['query by full field name'] = function (test) { 71 | clucene.search(indexPath, 'name:"Thomas"', function(err, results, searchTime) { 72 | test.equal(err, null); 73 | test.ok(is('Array', results)); 74 | test.ok(is('Number', searchTime)); 75 | test.equal(results[0]._id, 1); 76 | test.equal(results[0].name, 'Thomas Anderson'); 77 | test.equal(results[0].timestamp, '129555555555555'); 78 | test.done(); 79 | }); 80 | }; 81 | 82 | exports['query by wildcard'] = function (test) { 83 | clucene.search(indexPath, 'name:Thom*', function(err, results, searchTime) { 84 | test.equal(err, null); 85 | test.ok(is('Array', results)); 86 | test.ok(is('Number', searchTime)); 87 | test.equal(results[0]._id, 1); 88 | test.equal(results[0].name, 'Thomas Anderson'); 89 | test.equal(results[0].timestamp, '129555555555555'); 90 | test.done(); 91 | }); 92 | }; 93 | 94 | exports['delete document'] = function (test) { 95 | clucene.deleteDocument('1', indexPath, function(err, indexTime) { 96 | test.equal(err, null); 97 | test.ok(is('Number', indexTime)); 98 | test.done(); 99 | }); 100 | }; 101 | 102 | exports['add doc1 for type test'] = function (test) { 103 | var doc = new cl.Document(); 104 | var docId = '10'; 105 | 106 | doc.addField('name', 'Eric Jennings', cl.STORE_YES|cl.INDEX_TOKENIZED); 107 | doc.addField('_type', 'contact', cl.STORE_YES|cl.INDEX_UNTOKENIZED); 108 | doc.addField('timestamp', '1293765885000', cl.STORE_YES|cl.INDEX_UNTOKENIZED); 109 | 110 | clucene.addDocument(docId, doc, indexPath, function(err, indexTime) { 111 | test.equal(err, null); 112 | test.ok(is('Number', indexTime)); 113 | test.done(); 114 | }); 115 | }; 116 | 117 | exports['add doc2 for type test'] = function (test) { 118 | var doc = new cl.Document(); 119 | var docId = '11'; 120 | 121 | doc.addField('name', 'asdfasdf Jennings', cl.STORE_YES|cl.INDEX_TOKENIZED); 122 | doc.addField('_type', 'contact', cl.STORE_YES|cl.INDEX_UNTOKENIZED); 123 | doc.addField('timestamp', '1293765885000', cl.STORE_YES|cl.INDEX_UNTOKENIZED); 124 | 125 | clucene.addDocument(docId, doc, indexPath, function(err, indexTime) { 126 | test.equal(err, null); 127 | test.ok(is('Number', indexTime)); 128 | clucene.closeWriter(); 129 | test.done(); 130 | }); 131 | }; 132 | 133 | exports['add doc3 for type test'] = function (test) { 134 | var doc = new cl.Document(); 135 | var docId = '12'; 136 | 137 | doc.addField('name', 'asdssdf Jennings', cl.STORE_YES|cl.INDEX_TOKENIZED); 138 | doc.addField('_type', 'contact', cl.STORE_YES|cl.INDEX_UNTOKENIZED); 139 | doc.addField('timestamp', '1293765885000', cl.STORE_YES|cl.INDEX_UNTOKENIZED); 140 | 141 | clucene.addDocument(docId, doc, indexPath, function(err, indexTime) { 142 | test.equal(err, null); 143 | test.ok(is('Number', indexTime)); 144 | clucene.closeWriter(); 145 | test.done(); 146 | }); 147 | }; 148 | 149 | exports['ensure 3 docs exist for type test'] = function (test) { 150 | clucene.search(indexPath, '_type:"contact"', function(err, results, searchTime) { 151 | test.equal(err, null); 152 | test.ok(is('Array', results)); 153 | test.ok(is('Number', searchTime)); 154 | test.equal(results.length, 3); 155 | test.done(); 156 | }); 157 | }; 158 | 159 | exports['delete all docs of type'] = function (test) { 160 | clucene.deleteDocumentsByType('contact', indexPath, function(err, indexTime) { 161 | test.equal(err, null); 162 | test.ok(is('Number', indexTime)); 163 | test.done(); 164 | }); 165 | }; 166 | 167 | exports['ensure deleted docs of type are all gone'] = function (test) { 168 | clucene.search(indexPath, '_type:"contact"', function(err, results, searchTime) { 169 | test.equal(err, null); 170 | test.ok(is('Array', results)); 171 | test.ok(is('Number', searchTime)); 172 | test.equal(results.length, 0); 173 | test.done(); 174 | }); 175 | }; 176 | 177 | exports['the index can be optimized'] = function(test) { 178 | clucene.optimize(indexPath, function(err) { 179 | test.equal(err, null); 180 | test.done(); 181 | }); 182 | }; 183 | 184 | function is(type, obj) { 185 | var clas = Object.prototype.toString.call(obj).slice(8, -1); 186 | return obj !== undefined && obj !== null && clas === type; 187 | } 188 | -------------------------------------------------------------------------------- /wscript: -------------------------------------------------------------------------------- 1 | def set_options(opt): 2 | opt.tool_options("compiler_cxx") 3 | 4 | def configure(conf): 5 | conf.check_tool("compiler_cxx") 6 | conf.check_tool("node_addon") 7 | conf.check_cxx(lib="clucene-core", mandatory=True, errmsg="Please install CLucene from http://clucene.sourceforge.net") 8 | 9 | def build(bld): 10 | obj = bld.new_task_gen("cxx", "shlib", "node_addon") 11 | obj.cxxflags = ["-g", "-D_FILE_OFFSET_BITS=64", "-D_LARGEFILE_SOURCE", "-Wall"] 12 | obj.target = "clucene" 13 | obj.source = "src/clucene_bindings.cpp" 14 | obj.lib = "clucene-core" --------------------------------------------------------------------------------