├── .gitignore ├── test └── performance │ ├── spell.png │ ├── suggest.png │ └── performance.ts ├── .gitmodules ├── tsconfig.json ├── hunspell ├── AUTHORS ├── src │ ├── hunspell │ │ ├── istrmgr.hxx │ │ ├── w_char.hxx │ │ ├── hunvisapi.h │ │ ├── hunvisapi.h.in │ │ ├── replist.hxx │ │ ├── strmgr.hxx │ │ ├── filemgr.hxx │ │ ├── baseaffix.hxx │ │ ├── langnum.hxx │ │ ├── dictmgr.hxx │ │ ├── Makefile.am │ │ ├── README │ │ ├── strmgr.cxx │ │ ├── htypes.hxx │ │ ├── filemgr.cxx │ │ ├── hunzip.hxx │ │ ├── phonet.hxx │ │ ├── license.hunspell │ │ ├── replist.cxx │ │ ├── makefile.mk │ │ ├── atypes.hxx │ │ ├── hashmgr.hxx │ │ ├── license.myspell │ │ ├── hunspell.h │ │ ├── suggestmgr.hxx │ │ ├── hunspell.dsp │ │ ├── affentry.hxx │ │ ├── dictmgr.cxx │ │ ├── hunspell.hxx │ │ ├── hunzip.cxx │ │ ├── csutil.hxx │ │ ├── phonet.cxx │ │ ├── affixmgr.hxx │ │ └── config.h │ └── win_api │ │ └── config.h ├── COPYING ├── readme.md ├── README.myspell ├── binding.gyp ├── AUTHORS.myspell └── README.hunspell ├── examples ├── 1_create_nodehun_instance.js ├── 13_get_version.js ├── 11_dictionary_encoding.js ├── dictionaries │ └── index.js ├── 12_word_characters.js ├── 5_stems.js ├── 6_generate.js ├── 2_spelling.js ├── 3_suggesting.js ├── 4_analyzing.js ├── 8_adding_words.js ├── 9_adding_words_with_example_affix.js ├── 10_removing_words.js └── 7_adding_dictionaries.js ├── binding.gyp ├── src ├── index.cc ├── Async │ ├── AddWorker.cc │ ├── RemoveWorker.cc │ ├── SpellWorker.cc │ ├── AddDictionaryWorker.cc │ ├── AddWithAffixWorker.cc │ ├── StemWorker.cc │ ├── AnalyzeWorker.cc │ ├── GenerateWorker.cc │ ├── SuggestWorker.cc │ └── Worker.cc ├── HunspellContext.h ├── Nodehun.d.ts ├── Nodehun.h └── Nodehun.cc ├── LICENSE.md ├── package.json ├── .github └── workflows │ └── Test.yml └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | build 3 | .idea 4 | log.txt 5 | .vscode 6 | -------------------------------------------------------------------------------- /test/performance/spell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wulf/nodehun/main/test/performance/spell.png -------------------------------------------------------------------------------- /test/performance/suggest.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wulf/nodehun/main/test/performance/suggest.png -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "hunspell"] 2 | path = hunspell 3 | url = https://github.com/wulf/hunspell-distributed.git 4 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES6", 4 | "types": [ 5 | "./src/Nodehun", 6 | "node", 7 | "mocha" 8 | ] 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /hunspell/AUTHORS: -------------------------------------------------------------------------------- 1 | Author of Hunspell: 2 | Németh László nemeth (at) OpenOffice.org 3 | 4 | Hunspell based on OpenOffice.org's Myspell. MySpell's author: 5 | Kevin Hendricks kevin.hendricks (at) sympatico.ca 6 | -------------------------------------------------------------------------------- /examples/1_create_nodehun_instance.js: -------------------------------------------------------------------------------- 1 | const Nodehun = require('bindings')('Nodehun') 2 | const dictionaries = require('./dictionaries') 3 | 4 | const nodehun = new Nodehun(dictionaries.en_US.affix, dictionaries.en_US.dictionary) 5 | 6 | // ready to use :) 7 | -------------------------------------------------------------------------------- /examples/13_get_version.js: -------------------------------------------------------------------------------- 1 | const Nodehun = require('bindings')('Nodehun') 2 | const dictionaries = require('./dictionaries') 3 | 4 | const nodehun = new Nodehun(dictionaries.en_US.affix, dictionaries.en_US.dictionary) 5 | 6 | 7 | 8 | const version = nodehun.getVersion() 9 | console.log(version) // => undefined -------------------------------------------------------------------------------- /hunspell/src/hunspell/istrmgr.hxx: -------------------------------------------------------------------------------- 1 | #ifndef _ISTRMGR_HXX 2 | #define _ISTRMGR_HXX 3 | #include "license.hunspell" 4 | #include "license.myspell" 5 | 6 | class IStrMgr 7 | { 8 | public: 9 | virtual ~IStrMgr(){} 10 | virtual char * getline() = 0; 11 | virtual int getlinenum() = 0; 12 | }; 13 | #endif 14 | -------------------------------------------------------------------------------- /examples/11_dictionary_encoding.js: -------------------------------------------------------------------------------- 1 | const Nodehun = require('bindings')('Nodehun') 2 | const dictionaries = require('./dictionaries') 3 | 4 | const nodehun = new Nodehun(dictionaries.en_US.affix, dictionaries.en_US.dictionary) 5 | 6 | 7 | 8 | const encoding = nodehun.getDictionaryEncoding() 9 | console.log('Dictionary\'s encoding is:', encoding) // => 'UTF-8' 10 | -------------------------------------------------------------------------------- /hunspell/src/hunspell/w_char.hxx: -------------------------------------------------------------------------------- 1 | #ifndef __WCHARHXX__ 2 | #define __WCHARHXX__ 3 | 4 | #ifndef GCC 5 | typedef struct { 6 | #else 7 | typedef struct __attribute__ ((packed)) { 8 | #endif 9 | unsigned char l; 10 | unsigned char h; 11 | } w_char; 12 | 13 | // two character arrays 14 | struct replentry { 15 | char * pattern; 16 | char * pattern2; 17 | bool start; 18 | bool end; 19 | }; 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /binding.gyp: -------------------------------------------------------------------------------- 1 | { 2 | "targets": [ 3 | { 4 | "target_name": "Nodehun", 5 | "sources": [ "src/index.cc" ], 6 | "include_dirs": [" 2 | #include "Nodehun.cc" 3 | 4 | Napi::Object CreateObject(const Napi::CallbackInfo& info) { 5 | Napi::Env env = info.Env(); 6 | Napi::HandleScope scope(env); 7 | 8 | return Nodehun::NewInstance(info); 9 | } 10 | 11 | Napi::Object InitAll(Napi::Env env, Napi::Object exports) { 12 | Napi::Object new_exports = Napi::Function::New(env, CreateObject, "Nodehun"); 13 | return Nodehun::Init(env, new_exports); 14 | } 15 | 16 | NODE_API_MODULE(addon, InitAll) 17 | -------------------------------------------------------------------------------- /examples/12_word_characters.js: -------------------------------------------------------------------------------- 1 | const Nodehun = require('bindings')('Nodehun') 2 | const dictionaries = require('./dictionaries') 3 | 4 | const nodehun = new Nodehun(dictionaries.en_US.affix, dictionaries.en_US.dictionary) 5 | 6 | 7 | 8 | const wordCharacters = nodehun.getWordCharacters() 9 | console.log('Dictionary\'s word characters are:', wordCharacters) // => 0123456789'.-’ 10 | 11 | const wordCharactersUTF16 = nodehun.getWordCharactersUTF16() 12 | console.log('Dictionary\'s word characters (UTF-16) are:', wordCharactersUTF16) // => '-.0123456789’ -------------------------------------------------------------------------------- /hunspell/src/hunspell/hunvisapi.h: -------------------------------------------------------------------------------- 1 | #ifndef _HUNSPELL_VISIBILITY_H_ 2 | #define _HUNSPELL_VISIBILITY_H_ 3 | 4 | #if defined(HUNSPELL_STATIC) 5 | # define LIBHUNSPELL_DLL_EXPORTED 6 | #elif defined(_MSC_VER) 7 | # if defined(BUILDING_LIBHUNSPELL) 8 | # define LIBHUNSPELL_DLL_EXPORTED __declspec(dllexport) 9 | # else 10 | # define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport) 11 | # endif 12 | #elif BUILDING_LIBHUNSPELL && 1 13 | # define LIBHUNSPELL_DLL_EXPORTED __attribute__((__visibility__("default"))) 14 | #else 15 | # define LIBHUNSPELL_DLL_EXPORTED 16 | #endif 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /hunspell/src/hunspell/hunvisapi.h.in: -------------------------------------------------------------------------------- 1 | #ifndef _HUNSPELL_VISIBILITY_H_ 2 | #define _HUNSPELL_VISIBILITY_H_ 3 | 4 | #if defined(HUNSPELL_STATIC) 5 | # define LIBHUNSPELL_DLL_EXPORTED 6 | #elif defined(_MSC_VER) 7 | # if defined(BUILDING_LIBHUNSPELL) 8 | # define LIBHUNSPELL_DLL_EXPORTED __declspec(dllexport) 9 | # else 10 | # define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport) 11 | # endif 12 | #elif BUILDING_LIBHUNSPELL && @HAVE_VISIBILITY@ 13 | # define LIBHUNSPELL_DLL_EXPORTED __attribute__((__visibility__("default"))) 14 | #else 15 | # define LIBHUNSPELL_DLL_EXPORTED 16 | #endif 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /hunspell/src/hunspell/replist.hxx: -------------------------------------------------------------------------------- 1 | /* string replacement list class */ 2 | #ifndef _REPLIST_HXX_ 3 | #define _REPLIST_HXX_ 4 | 5 | #include "hunvisapi.h" 6 | 7 | #include "w_char.hxx" 8 | 9 | #undef near 10 | 11 | class LIBHUNSPELL_DLL_EXPORTED RepList 12 | { 13 | protected: 14 | replentry ** dat; 15 | int size; 16 | int pos; 17 | 18 | public: 19 | RepList(int n); 20 | ~RepList(); 21 | 22 | int get_pos(); 23 | int add(char * pat1, char * pat2); 24 | replentry * item(int n); 25 | int near(const char * word); 26 | int match(const char * word, int n); 27 | int conv(const char * word, char * dest); 28 | }; 29 | #endif 30 | -------------------------------------------------------------------------------- /hunspell/COPYING: -------------------------------------------------------------------------------- 1 | GPL 2.0/LGPL 2.1/MPL 1.1 tri-license 2 | 3 | The contents of this software may be used under the terms of 4 | the GNU General Public License Version 2 or later (the "GPL"), or 5 | the GNU Lesser General Public License Version 2.1 or later (the "LGPL", 6 | see COPYING.LGPL) or (excepting the LGPLed GNU gettext library in the 7 | intl/ directory) the Mozilla Public License Version 1.1 or later 8 | (the "MPL", see COPYING.MPL). 9 | 10 | Software distributed under these licenses is distributed on an "AS IS" basis, 11 | WITHOUT WARRANTY OF ANY KIND, either express or implied. See the licences 12 | for the specific language governing rights and limitations under the licenses. 13 | -------------------------------------------------------------------------------- /hunspell/src/hunspell/strmgr.hxx: -------------------------------------------------------------------------------- 1 | /* file manager class - read lines of files [filename] OR [filename.hz] */ 2 | #ifndef _STRMGR_HXX_ 3 | #define _STRMGR_HXX_ 4 | 5 | #include "hunvisapi.h" 6 | 7 | #include "hunzip.hxx" 8 | #include "istrmgr.hxx" 9 | 10 | class LIBHUNSPELL_DLL_EXPORTED StrMgr : public IStrMgr 11 | { 12 | protected: 13 | char* st; 14 | size_t size; 15 | size_t index; 16 | int fail(const char * err,const char * par); 17 | int linenum; 18 | char in[BUFSIZE + 50]; // input buffer 19 | 20 | public: 21 | StrMgr(const char * str, const char * key = NULL); 22 | virtual ~StrMgr(); 23 | virtual char * getline(); 24 | virtual int getlinenum(); 25 | }; 26 | #endif 27 | -------------------------------------------------------------------------------- /hunspell/src/hunspell/filemgr.hxx: -------------------------------------------------------------------------------- 1 | /* file manager class - read lines of files [filename] OR [filename.hz] */ 2 | #ifndef _FILEMGR_HXX_ 3 | #define _FILEMGR_HXX_ 4 | 5 | #include "hunvisapi.h" 6 | 7 | #include "hunzip.hxx" 8 | #include "istrmgr.hxx" 9 | #include 10 | 11 | class LIBHUNSPELL_DLL_EXPORTED FileMgr : public IStrMgr 12 | { 13 | protected: 14 | FILE * fin; 15 | Hunzip * hin; 16 | char in[BUFSIZE + 50]; // input buffer 17 | int fail(const char * err, const char * par); 18 | int linenum; 19 | 20 | public: 21 | FileMgr(const char * filename, const char * key = NULL); 22 | virtual ~FileMgr(); 23 | virtual char * getline(); 24 | virtual int getlinenum(); 25 | }; 26 | #endif 27 | -------------------------------------------------------------------------------- /hunspell/src/hunspell/baseaffix.hxx: -------------------------------------------------------------------------------- 1 | #ifndef _BASEAFF_HXX_ 2 | #define _BASEAFF_HXX_ 3 | 4 | #include "hunvisapi.h" 5 | 6 | class LIBHUNSPELL_DLL_EXPORTED AffEntry 7 | { 8 | protected: 9 | char * appnd; 10 | char * strip; 11 | unsigned char appndl; 12 | unsigned char stripl; 13 | char numconds; 14 | char opts; 15 | unsigned short aflag; 16 | union { 17 | char conds[MAXCONDLEN]; 18 | struct { 19 | char conds1[MAXCONDLEN_1]; 20 | char * conds2; 21 | } l; 22 | } c; 23 | char * morphcode; 24 | unsigned short * contclass; 25 | short contclasslen; 26 | }; 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /hunspell/src/hunspell/langnum.hxx: -------------------------------------------------------------------------------- 1 | #ifndef _LANGNUM_HXX_ 2 | #define _LANGNUM_HXX_ 3 | 4 | /* 5 | language numbers for language specific codes 6 | see http://l10n.openoffice.org/languages.html 7 | */ 8 | 9 | enum { 10 | LANG_ar=96, 11 | LANG_az=100, // custom number 12 | LANG_bg=41, 13 | LANG_ca=37, 14 | LANG_cs=42, 15 | LANG_da=45, 16 | LANG_de=49, 17 | LANG_el=30, 18 | LANG_en=01, 19 | LANG_es=34, 20 | LANG_eu=10, 21 | LANG_fr=02, 22 | LANG_gl=38, 23 | LANG_hr=78, 24 | LANG_hu=36, 25 | LANG_it=39, 26 | LANG_la=99, // custom number 27 | LANG_lv=101, // custom number 28 | LANG_nl=31, 29 | LANG_pl=48, 30 | LANG_pt=03, 31 | LANG_ru=07, 32 | LANG_sv=50, 33 | LANG_tr=90, 34 | LANG_uk=80, 35 | LANG_xx=999 36 | }; 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /examples/5_stems.js: -------------------------------------------------------------------------------- 1 | const Nodehun = require('bindings')('Nodehun') 2 | const dictionaries = require('./dictionaries') 3 | 4 | const nodehun = new Nodehun(dictionaries.en_US.affix, dictionaries.en_US.dictionary) 5 | 6 | 7 | 8 | /** 9 | * Async : using the promise 10 | */ 11 | nodehun 12 | .stem('telling') 13 | .then(stems => { 14 | console.log(stems) // => ['telling', 'tell'] 15 | }) 16 | 17 | /** 18 | * Async : using async/await 19 | */ 20 | async function getStems() { 21 | const stems = await nodehun.stem('telling') 22 | console.log(stems) // => ['telling', 'tell'] 23 | } 24 | getStems() 25 | 26 | /** 27 | * Sync 28 | */ 29 | const stems = nodehun.stemSync('telling') 30 | console.log(stems) // => ['telling', 'tell'] 31 | -------------------------------------------------------------------------------- /examples/6_generate.js: -------------------------------------------------------------------------------- 1 | const Nodehun = require('bindings')('Nodehun') 2 | const dictionaries = require('./dictionaries') 3 | 4 | const nodehun = new Nodehun(dictionaries.en_US.affix, dictionaries.en_US.dictionary) 5 | 6 | 7 | 8 | /** 9 | * Async : using the promise 10 | */ 11 | nodehun 12 | .generate('telling', 'ran') 13 | .then(generate => { 14 | console.log(generate) // => ['told'] 15 | }) 16 | 17 | /** 18 | * Async : using async/await 19 | */ 20 | async function generateExample() { 21 | const generate = await nodehun.generate('telling', 'ran') 22 | console.log(generate) // => ['told'] 23 | } 24 | generateExample() 25 | 26 | /** 27 | * Sync 28 | */ 29 | const generate = nodehun.generateSync('told', 'run') 30 | console.log(generate) // => ['tell'] 31 | -------------------------------------------------------------------------------- /examples/2_spelling.js: -------------------------------------------------------------------------------- 1 | const Nodehun = require('bindings')('Nodehun') 2 | const dictionaries = require('./dictionaries') 3 | 4 | const nodehun = new Nodehun(dictionaries.en_US.affix, dictionaries.en_US.dictionary) 5 | 6 | 7 | 8 | /** 9 | * Async : using the promise 10 | */ 11 | nodehun 12 | .spell('color') 13 | .then(isCorrect => { 14 | console.log('"color" is correct?', isCorrect) // => true 15 | }) 16 | 17 | /** 18 | * Async : using async/await 19 | */ 20 | async function spell() { 21 | const isCorrect = await nodehun.spell('color') 22 | console.log('"color" is correct?', isCorrect) // => true 23 | } 24 | spell() 25 | 26 | /** 27 | * Sync 28 | */ 29 | const isCorrect = nodehun.spellSync('colour') 30 | console.log('"colour" is correct?', isCorrect) // => false 31 | -------------------------------------------------------------------------------- /src/Async/AddWorker.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "Worker.cc" 4 | 5 | class AddWorker : public Worker { 6 | public: 7 | AddWorker( 8 | HunspellContext* context, 9 | Napi::Promise::Deferred d, 10 | std::string word) 11 | : Worker(context, d), word(std::move(word)) {} 12 | 13 | void Execute() { 14 | // Worker thread; don't use N-API here 15 | context->lockWrite(); 16 | context->instance->add(word.c_str()); 17 | context->unlockWrite(); 18 | } 19 | 20 | void Resolve(Napi::Promise::Deferred const &deferred) { 21 | Napi::Env env = deferred.Env(); 22 | 23 | deferred.Resolve(env.Undefined()); 24 | } 25 | 26 | private: 27 | std::string word; 28 | }; -------------------------------------------------------------------------------- /src/Async/RemoveWorker.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "Worker.cc" 4 | 5 | class RemoveWorker : public Worker { 6 | public: 7 | RemoveWorker( 8 | HunspellContext* context, 9 | Napi::Promise::Deferred d, 10 | std::string word) 11 | : Worker(context, d), word(std::move(word)) {} 12 | 13 | void Execute() { 14 | // Worker thread; don't use N-API here 15 | context->lockRead(); 16 | context->instance->remove(word.c_str()); 17 | context->unlockRead(); 18 | } 19 | 20 | void Resolve(Napi::Promise::Deferred const &deferred) { 21 | Napi::Env env = deferred.Env(); 22 | 23 | deferred.Resolve(env.Undefined()); 24 | } 25 | 26 | private: 27 | std::string word; 28 | }; -------------------------------------------------------------------------------- /examples/3_suggesting.js: -------------------------------------------------------------------------------- 1 | const Nodehun = require('bindings')('Nodehun') 2 | const dictionaries = require('./dictionaries') 3 | 4 | const nodehun = new Nodehun(dictionaries.en_US.affix, dictionaries.en_US.dictionary) 5 | 6 | 7 | 8 | /** 9 | * Async : using the promise 10 | */ 11 | nodehun 12 | .suggest('color') 13 | .then(suggestions => { 14 | console.log(suggestions) // => null because 'color' is correct 15 | }) 16 | 17 | /** 18 | * Async : using async/await 19 | */ 20 | async function suggest() { 21 | const suggestions = await nodehun.suggest('color') 22 | console.log(suggestions) // => null because 'color' is correct 23 | } 24 | suggest() 25 | 26 | /** 27 | * Sync 28 | */ 29 | const suggestions = nodehun.suggestSync('colour') 30 | console.log(suggestions) // => array of suggestions 31 | -------------------------------------------------------------------------------- /hunspell/src/hunspell/dictmgr.hxx: -------------------------------------------------------------------------------- 1 | #ifndef _DICTMGR_HXX_ 2 | #define _DICTMGR_HXX_ 3 | 4 | #include "hunvisapi.h" 5 | 6 | #define MAXDICTIONARIES 100 7 | #define MAXDICTENTRYLEN 1024 8 | 9 | struct dictentry { 10 | char * filename; 11 | char * lang; 12 | char * region; 13 | }; 14 | 15 | 16 | class LIBHUNSPELL_DLL_EXPORTED DictMgr 17 | { 18 | 19 | int numdict; 20 | dictentry * pdentry; 21 | 22 | public: 23 | 24 | DictMgr(const char * dictpath, const char * etype); 25 | ~DictMgr(); 26 | int get_list(dictentry** ppentry); 27 | 28 | private: 29 | int parse_file(const char * dictpath, const char * etype); 30 | char * mystrsep(char ** stringp, const char delim); 31 | char * mystrdup(const char * s); 32 | void mychomp(char * s); 33 | 34 | }; 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /src/Async/SpellWorker.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "Worker.cc" 4 | 5 | class SpellWorker : public Worker { 6 | public: 7 | SpellWorker( 8 | HunspellContext* context, 9 | Napi::Promise::Deferred d, 10 | std::string word) 11 | : Worker(context, d), word(std::move(word)) {} 12 | 13 | void Execute() { 14 | // Worker thread; don't use N-API here 15 | context->lockRead(); 16 | correct = context->instance->spell(word.c_str()); 17 | context->unlockRead(); 18 | } 19 | 20 | void Resolve(Napi::Promise::Deferred const &deferred) { 21 | Napi::Env env = deferred.Env(); 22 | 23 | deferred.Resolve(Napi::Boolean::New(env, correct)); 24 | } 25 | 26 | private: 27 | bool correct = false; 28 | std::string word; 29 | }; -------------------------------------------------------------------------------- /examples/4_analyzing.js: -------------------------------------------------------------------------------- 1 | const Nodehun = require('bindings')('Nodehun') 2 | const dictionaries = require('./dictionaries') 3 | 4 | const nodehun = new Nodehun(dictionaries.en_US.affix, dictionaries.en_US.dictionary) 5 | 6 | 7 | 8 | /** 9 | * Async : using the promise 10 | */ 11 | nodehun 12 | .analyze('telling') 13 | .then(analysis => { 14 | console.log(analysis) // => [' st:telling ts:0', ' st:tell ts:0 al:told is:Vg'] 15 | }) 16 | 17 | /** 18 | * Async : using async/await 19 | */ 20 | async function analyze() { 21 | const analysis = await nodehun.analyze('telling') 22 | console.log(analysis) // => [' st:telling ts:0', ' st:tell ts:0 al:told is:Vg'] 23 | } 24 | analyze() 25 | 26 | /** 27 | * Sync 28 | */ 29 | const analysis = nodehun.analyzeSync('telling') 30 | console.log(analysis) // => [' st:telling ts:0', ' st:tell ts:0 al:told is:Vg'] 31 | -------------------------------------------------------------------------------- /src/Async/AddDictionaryWorker.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "Worker.cc" 5 | 6 | class AddDictionaryWorker : public Worker { 7 | public: 8 | AddDictionaryWorker( 9 | HunspellContext* context, 10 | Napi::Promise::Deferred d, 11 | std::string dictionary) 12 | : Worker(context, d), dictionary(std::move(dictionary)) {} 13 | 14 | void Execute() { 15 | // Worker thread; don't use N-API here 16 | context->lockWrite(); 17 | context->instance->add_dic(dictionary.c_str()); 18 | context->unlockWrite(); 19 | } 20 | 21 | void Resolve(Napi::Promise::Deferred const &deferred) { 22 | Napi::Env env = deferred.Env(); 23 | 24 | deferred.Resolve(env.Undefined()); 25 | } 26 | 27 | private: 28 | std::string dictionary; 29 | }; -------------------------------------------------------------------------------- /hunspell/src/hunspell/Makefile.am: -------------------------------------------------------------------------------- 1 | lib_LTLIBRARIES = libhunspell-1.3.la 2 | libhunspell_1_3_includedir = $(includedir)/hunspell 3 | libhunspell_1_3_la_SOURCES=affentry.cxx affixmgr.cxx csutil.cxx \ 4 | dictmgr.cxx hashmgr.cxx hunspell.cxx \ 5 | suggestmgr.cxx license.myspell license.hunspell \ 6 | phonet.cxx filemgr.cxx hunzip.cxx replist.cxx 7 | 8 | libhunspell_1_3_include_HEADERS=affentry.hxx htypes.hxx affixmgr.hxx \ 9 | csutil.hxx hunspell.hxx atypes.hxx dictmgr.hxx hunspell.h \ 10 | suggestmgr.hxx baseaffix.hxx hashmgr.hxx langnum.hxx \ 11 | phonet.hxx filemgr.hxx hunzip.hxx w_char.hxx replist.hxx \ 12 | hunvisapi.h 13 | 14 | libhunspell_1_3_la_DEPENDENCIES=utf_info.cxx 15 | libhunspell_1_3_la_LDFLAGS=-no-undefined 16 | 17 | AM_CXXFLAGS=$(CFLAG_VISIBILITY) -DBUILDING_LIBHUNSPELL 18 | 19 | EXTRA_DIST=hunspell.dsp makefile.mk README utf_info.cxx 20 | -------------------------------------------------------------------------------- /hunspell/readme.md: -------------------------------------------------------------------------------- 1 | Hunspell Distributed 2 | -------------------- 3 | This fork of hunspell is specifically refactored to make passing around file-paths irrelvant, raw buffers are passed instead. Additionally, all extraneous tools have been removed. This allows hunspell to run on a distributed system much more easily. 4 | 5 | Some History 6 | ------------ 7 | This fork of hunspell is significant in that a few core things about hunspell have been changed (some interface classes have been created, etc) to allow buffers of dictionaries rather than references to them to be passed around. The main project hasn't wanted these changes so this fork will keep them here. The project originally started so that hunspell could run distributed on node.js, and be able to read dictionaries that were stored on a database rather than on the local file system. This has worked well, but the time to split has come, as other projects have expressed interest in using this code. -------------------------------------------------------------------------------- /src/Async/AddWithAffixWorker.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "Worker.cc" 4 | 5 | class AddWithAffixWorker : public Worker { 6 | public: 7 | AddWithAffixWorker( 8 | HunspellContext* context, 9 | Napi::Promise::Deferred d, 10 | std::string word, 11 | std::string example) 12 | : Worker(context, d), word(std::move(word)), example(std::move(example)) {} 13 | 14 | void Execute() { 15 | // Worker thread; don't use N-API here 16 | context->lockWrite(); 17 | context->instance->add_with_affix(word.c_str(), example.c_str()); 18 | context->unlockWrite(); 19 | } 20 | 21 | void Resolve(Napi::Promise::Deferred const &deferred) { 22 | Napi::Env env = deferred.Env(); 23 | 24 | deferred.Resolve(env.Undefined()); 25 | } 26 | 27 | private: 28 | std::string word; 29 | std::string example; 30 | }; -------------------------------------------------------------------------------- /hunspell/src/hunspell/README: -------------------------------------------------------------------------------- 1 | Hunspell spell checker and morphological analyser library 2 | 3 | Documentation, tests, examples: http://hunspell.sourceforge.net 4 | 5 | Author of Hunspell: 6 | László Németh (nemethl (at) gyorsposta.hu) 7 | 8 | Hunspell based on OpenOffice.org's Myspell. MySpell's author: 9 | Kevin Hendricks (kevin.hendricks (at) sympatico.ca) 10 | 11 | License: GPL 2.0/LGPL 2.1/MPL 1.1 tri-license 12 | 13 | The contents of this library may be used under the terms of 14 | the GNU General Public License Version 2 or later (the "GPL"), or 15 | the GNU Lesser General Public License Version 2.1 or later (the "LGPL", 16 | see http://gnu.org/copyleft/lesser.html) or the Mozilla Public License 17 | Version 1.1 or later (the "MPL", see http://mozilla.org/MPL/MPL-1.1.html). 18 | 19 | Software distributed under these licenses is distributed on an "AS IS" basis, 20 | WITHOUT WARRANTY OF ANY KIND, either express or implied. See the licences 21 | for the specific language governing rights and limitations under the licenses. 22 | -------------------------------------------------------------------------------- /src/HunspellContext.h: -------------------------------------------------------------------------------- 1 | #ifndef HunspellContext_H 2 | #define HunspellContext_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | class HunspellContext { 10 | public: 11 | Hunspell* instance; 12 | 13 | HunspellContext(Hunspell* instance): instance(instance) { 14 | uv_rwlock_init(&rwLock); 15 | }; 16 | 17 | ~HunspellContext() { 18 | if (instance) { 19 | delete instance; 20 | instance = NULL; 21 | } 22 | 23 | uv_rwlock_destroy(&rwLock); 24 | } 25 | 26 | void lockRead() { 27 | uv_rwlock_rdlock(&rwLock); 28 | } 29 | 30 | void unlockRead() { 31 | uv_rwlock_rdunlock(&rwLock); 32 | } 33 | 34 | void lockWrite() { 35 | uv_rwlock_wrlock(&rwLock); 36 | } 37 | 38 | void unlockWrite() { 39 | uv_rwlock_wrunlock(&rwLock); 40 | } 41 | 42 | private: 43 | /* 44 | * The Hunspell instance is not thread safe, so we use a mutex 45 | * to manage asynchronous usage. 46 | */ 47 | uv_rwlock_t rwLock; 48 | }; 49 | 50 | #endif -------------------------------------------------------------------------------- /hunspell/src/hunspell/strmgr.cxx: -------------------------------------------------------------------------------- 1 | #include "license.hunspell" 2 | #include "license.myspell" 3 | 4 | 5 | #include "strmgr.hxx" 6 | #include 7 | #include 8 | #include 9 | 10 | int StrMgr::fail(const char * err, const char * par) { 11 | std::cout << err << "\n" << par; 12 | return -1; 13 | } 14 | 15 | StrMgr::StrMgr(const char * str, const char * key) { 16 | linenum = 0; 17 | index = 0; 18 | size = strlen(str); 19 | if(size > 0) 20 | st = strdup(str); 21 | if (!st) fail(MSG_OPEN, "Buffer allocation failed in StrMgr."); 22 | } 23 | 24 | StrMgr::~StrMgr() 25 | { 26 | if (st) 27 | free(st); 28 | } 29 | 30 | char * StrMgr::getline() { 31 | if(index >= size) 32 | return NULL; 33 | int i = 0, 34 | l = BUFSIZE -1; 35 | for(;index < size && st[index] != '\n' && st[index] != EOF && i <= l; index++, i++) 36 | in[i] = st[index]; 37 | in[i] = '\0'; 38 | index++; 39 | if(index < size) 40 | linenum++; 41 | return strdup(in); 42 | } 43 | 44 | int StrMgr::getlinenum() { 45 | return linenum; 46 | } 47 | -------------------------------------------------------------------------------- /hunspell/src/hunspell/htypes.hxx: -------------------------------------------------------------------------------- 1 | #ifndef _HTYPES_HXX_ 2 | #define _HTYPES_HXX_ 3 | 4 | #define ROTATE_LEN 5 5 | 6 | #define ROTATE(v,q) \ 7 | (v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q))-1)); 8 | 9 | // hentry options 10 | #define H_OPT (1 << 0) 11 | #define H_OPT_ALIASM (1 << 1) 12 | #define H_OPT_PHON (1 << 2) 13 | 14 | // see also csutil.hxx 15 | #define HENTRY_WORD(h) &(h->word[0]) 16 | 17 | // approx. number of user defined words 18 | #define USERWORD 1000 19 | 20 | struct hentry 21 | { 22 | unsigned char blen; // word length in bytes 23 | unsigned char clen; // word length in characters (different for UTF-8 enc.) 24 | short alen; // length of affix flag vector 25 | unsigned short * astr; // affix flag vector 26 | struct hentry * next; // next word with same hash code 27 | struct hentry * next_homonym; // next homonym word (with same hash code) 28 | char var; // variable fields (only for special pronounciation yet) 29 | char word[1]; // variable-length word (8-bit or UTF-8 encoding) 30 | }; 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /examples/8_adding_words.js: -------------------------------------------------------------------------------- 1 | const Nodehun = require('bindings')('Nodehun') 2 | const dictionaries = require('./dictionaries') 3 | 4 | let nodehun 5 | 6 | /** 7 | * Async : using the promise 8 | */ 9 | nodehun = new Nodehun(dictionaries.en_US.affix, dictionaries.en_US.dictionary) 10 | console.log(nodehun.spellSync('colour')) // => false 11 | nodehun 12 | .add('colour') 13 | .then(() => { 14 | console.log(nodehun.spellSync('colour')) // => true, no longer incorrect 15 | }) 16 | 17 | /** 18 | * Async : using async/await 19 | */ 20 | nodehun = new Nodehun(dictionaries.en_US.affix, dictionaries.en_US.dictionary) 21 | async function generate() { 22 | console.log(nodehun.spellSync('colour')) // => false 23 | await nodehun.add('colour') 24 | console.log(nodehun.spellSync('colour')) // => true, no longer incorrect 25 | } 26 | generate() 27 | 28 | /** 29 | * Sync 30 | */ 31 | nodehun = new Nodehun(dictionaries.en_US.affix, dictionaries.en_US.dictionary) 32 | console.log(nodehun.spellSync('colour')) // => false 33 | nodehun.addSync('colour') 34 | console.log(nodehun.spellSync('colour')) // => true, no longer incorrect 35 | -------------------------------------------------------------------------------- /src/Async/StemWorker.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "Worker.cc" 4 | 5 | class StemWorker : public Worker { 6 | public: 7 | StemWorker( 8 | HunspellContext* context, 9 | Napi::Promise::Deferred d, 10 | std::string word) 11 | : Worker(context, d), word(std::move(word)) {} 12 | 13 | void Execute() { 14 | // Worker thread; don't use N-API here 15 | context->lockRead(); 16 | length = this->context->instance->stem(&stems, word.c_str()); 17 | context->unlockRead(); 18 | } 19 | 20 | void Resolve(Napi::Promise::Deferred const &deferred) { 21 | Napi::Env env = deferred.Env(); 22 | 23 | Napi::Array array = Napi::Array::New(env, length); 24 | for (int i = 0; i < length; i++) { 25 | array.Set(i, Napi::String::New(env, stems[i])); 26 | } 27 | 28 | context->instance->free_list(&stems, length); 29 | 30 | deferred.Resolve(array); 31 | } 32 | 33 | private: 34 | int length = 0; 35 | std::string word; 36 | char** stems = NULL; 37 | }; -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 Nathan Sweet, DataSphere Inc. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a 4 | copy of this software and associated documentation files (the "Software"), 5 | to deal in the Software without restriction, including without limitation 6 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 | and/or sell copies of the Software, and to permit persons to whom the 8 | Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 14 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 15 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 16 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 17 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 18 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 19 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /src/Async/AnalyzeWorker.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "Worker.cc" 4 | 5 | class AnalyzeWorker : public Worker { 6 | public: 7 | AnalyzeWorker( 8 | HunspellContext* context, 9 | Napi::Promise::Deferred d, 10 | std::string word) 11 | : Worker(context, d), word(std::move(word)) {} 12 | 13 | void Execute() { 14 | // Worker thread; don't use N-API here 15 | context->lockRead(); 16 | length = this->context->instance->analyze(&analysis, word.c_str()); 17 | context->unlockRead(); 18 | } 19 | 20 | void Resolve(Napi::Promise::Deferred const &deferred) { 21 | Napi::Env env = deferred.Env(); 22 | 23 | Napi::Array array = Napi::Array::New(env, length); 24 | for (int i = 0; i < length; i++) { 25 | array.Set(i, Napi::String::New(env, analysis[i])); 26 | } 27 | 28 | context->instance->free_list(&analysis, length); 29 | 30 | deferred.Resolve(array); 31 | } 32 | 33 | private: 34 | int length = 0; 35 | std::string word; 36 | char** analysis = NULL; 37 | }; -------------------------------------------------------------------------------- /examples/9_adding_words_with_example_affix.js: -------------------------------------------------------------------------------- 1 | const Nodehun = require('bindings')('Nodehun') 2 | const dictionaries = require('./dictionaries') 3 | 4 | let nodehun 5 | 6 | /** 7 | * Async : using the promise 8 | */ 9 | nodehun = new Nodehun(dictionaries.en_US.affix, dictionaries.en_US.dictionary) 10 | console.log(nodehun.spellSync('colouring')) // => false 11 | nodehun 12 | .addWithAffix('colour', 'color') 13 | .then(() => { 14 | console.log(nodehun.spellSync('colouring')) // => true, no longer incorrect 15 | }) 16 | 17 | /** 18 | * Async : using async/await 19 | */ 20 | nodehun = new Nodehun(dictionaries.en_US.affix, dictionaries.en_US.dictionary) 21 | async function generate() { 22 | console.log(nodehun.spellSync('colouring')) // => false 23 | await nodehun.addWithAffix('colour', 'color') 24 | console.log(nodehun.spellSync('colouring')) // => true, no longer incorrect 25 | } 26 | generate() 27 | 28 | /** 29 | * Sync 30 | */ 31 | nodehun = new Nodehun(dictionaries.en_US.affix, dictionaries.en_US.dictionary) 32 | console.log(nodehun.spellSync('colouring')) // => false 33 | nodehun.addWithAffixSync('colour', 'color') 34 | console.log(nodehun.spellSync('colouring')) // => true, no longer incorrect 35 | -------------------------------------------------------------------------------- /examples/10_removing_words.js: -------------------------------------------------------------------------------- 1 | const Nodehun = require('bindings')('Nodehun') 2 | const dictionaries = require('./dictionaries') 3 | 4 | let nodehun 5 | 6 | /** 7 | * Async : using the promise 8 | */ 9 | nodehun = new Nodehun(dictionaries.en_US.affix, dictionaries.en_US.dictionary) 10 | nodehun.addSync('colour') 11 | console.log(nodehun.spellSync('colour')) // => true 12 | nodehun 13 | .remove('colour') 14 | .then(() => { 15 | console.log(nodehun.spellSync('colour')) // => false, no longer correct 16 | }) 17 | 18 | /** 19 | * Async : using async/await 20 | */ 21 | nodehun = new Nodehun(dictionaries.en_US.affix, dictionaries.en_US.dictionary) 22 | nodehun.addSync('colour') 23 | async function generate() { 24 | console.log(nodehun.spellSync('colour')) // => true 25 | await nodehun.remove('colour') 26 | console.log(nodehun.spellSync('colour')) // => false, no longer correct 27 | } 28 | generate() 29 | 30 | /** 31 | * Sync 32 | */ 33 | nodehun = new Nodehun(dictionaries.en_US.affix, dictionaries.en_US.dictionary) 34 | nodehun.addSync('colour') 35 | console.log(nodehun.spellSync('colour')) // => true 36 | nodehun.removeSync('colour') 37 | console.log(nodehun.spellSync('colour')) // => false, no longer correct 38 | -------------------------------------------------------------------------------- /examples/7_adding_dictionaries.js: -------------------------------------------------------------------------------- 1 | const Nodehun = require('bindings')('Nodehun') 2 | const dictionaries = require('./dictionaries') 3 | 4 | let nodehun 5 | 6 | /** 7 | * Async : using the promise 8 | */ 9 | nodehun = new Nodehun(dictionaries.en_US.affix, dictionaries.en_US.dictionary) 10 | console.log(nodehun.spellSync('colour')) // => false 11 | nodehun 12 | .addDictionary(dictionaries.en_CA.dictionary) 13 | .then(() => { 14 | console.log(nodehun.spellSync('colour')) // => true, no longer incorrect 15 | }) 16 | 17 | /** 18 | * Async : using async/await 19 | */ 20 | nodehun = new Nodehun(dictionaries.en_US.affix, dictionaries.en_US.dictionary) 21 | async function generate() { 22 | console.log(nodehun.spellSync('colour')) // => false 23 | await nodehun.addDictionary(dictionaries.en_CA.dictionary) 24 | console.log(nodehun.spellSync('colour')) // => true, no longer incorrect 25 | } 26 | generate() 27 | 28 | /** 29 | * Sync 30 | */ 31 | nodehun = new Nodehun(dictionaries.en_US.affix, dictionaries.en_US.dictionary) 32 | console.log(nodehun.spellSync('colour')) // => false 33 | nodehun.addDictionarySync(dictionaries.en_CA.dictionary) 34 | console.log(nodehun.spellSync('colour')) // => true, no longer incorrect 35 | -------------------------------------------------------------------------------- /hunspell/src/hunspell/filemgr.cxx: -------------------------------------------------------------------------------- 1 | #include "license.hunspell" 2 | #include "license.myspell" 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "filemgr.hxx" 9 | 10 | int FileMgr::fail(const char * err, const char * par) { 11 | fprintf(stderr, err, par); 12 | return -1; 13 | } 14 | 15 | FileMgr::FileMgr(const char * file, const char * key) { 16 | linenum = 0; 17 | hin = NULL; 18 | fin = fopen(file, "r"); 19 | if (!fin) { 20 | // check hzipped file 21 | char * st = (char *) malloc(strlen(file) + strlen(HZIP_EXTENSION) + 1); 22 | if (st) { 23 | strcpy(st, file); 24 | strcat(st, HZIP_EXTENSION); 25 | hin = new Hunzip(st, key); 26 | free(st); 27 | } 28 | } 29 | if (!fin && !hin) fail(MSG_OPEN, file); 30 | } 31 | 32 | FileMgr::~FileMgr() 33 | { 34 | if (fin) fclose(fin); 35 | if (hin) delete hin; 36 | } 37 | 38 | char * FileMgr::getline() { 39 | const char * l; 40 | linenum++; 41 | if (fin) return fgets(in, BUFSIZE - 1, fin); 42 | if (hin && (l = hin->getline())) return strcpy(in, l); 43 | linenum--; 44 | return NULL; 45 | } 46 | 47 | int FileMgr::getlinenum() { 48 | return linenum; 49 | } 50 | -------------------------------------------------------------------------------- /hunspell/src/hunspell/hunzip.hxx: -------------------------------------------------------------------------------- 1 | /* hunzip: file decompression for sorted dictionaries with optional encryption, 2 | * algorithm: prefix-suffix encoding and 16-bit Huffman encoding */ 3 | 4 | #ifndef _HUNZIP_HXX_ 5 | #define _HUNZIP_HXX_ 6 | 7 | #include "hunvisapi.h" 8 | 9 | #include 10 | 11 | #define BUFSIZE 65536 12 | #define HZIP_EXTENSION ".hz" 13 | 14 | #define MSG_OPEN "error: %s: cannot open\n" 15 | #define MSG_FORMAT "error: %s: not in hzip format\n" 16 | #define MSG_MEMORY "error: %s: missing memory\n" 17 | #define MSG_KEY "error: %s: missing or bad password\n" 18 | 19 | struct bit { 20 | unsigned char c[2]; 21 | int v[2]; 22 | }; 23 | 24 | class LIBHUNSPELL_DLL_EXPORTED Hunzip 25 | { 26 | 27 | protected: 28 | char * filename; 29 | FILE * fin; 30 | int bufsiz, lastbit, inc, inbits, outc; 31 | struct bit * dec; // code table 32 | char in[BUFSIZE]; // input buffer 33 | char out[BUFSIZE + 1]; // Huffman-decoded buffer 34 | char line[BUFSIZE + 50]; // decoded line 35 | int getcode(const char * key); 36 | int getbuf(); 37 | int fail(const char * err, const char * par); 38 | 39 | public: 40 | Hunzip(const char * filename, const char * key = NULL); 41 | ~Hunzip(); 42 | const char * getline(); 43 | }; 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /src/Async/GenerateWorker.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "Worker.cc" 4 | 5 | class GenerateWorker : public Worker { 6 | public: 7 | GenerateWorker( 8 | HunspellContext* context, 9 | Napi::Promise::Deferred d, 10 | std::string word, 11 | std::string example) 12 | : Worker(context, d), word(std::move(word)), example(std::move(example)) {} 13 | 14 | void Execute() { 15 | // Worker thread; don't use N-API here 16 | context->lockRead(); 17 | length = context->instance->generate( 18 | &generates, 19 | word.c_str(), 20 | example.c_str() 21 | ); 22 | context->unlockRead(); 23 | } 24 | 25 | void Resolve(Napi::Promise::Deferred const &deferred) { 26 | Napi::Env env = deferred.Env(); 27 | 28 | Napi::Array array = Napi::Array::New(env, length); 29 | for (int i = 0; i < length; i++) { 30 | array.Set(i, Napi::String::New(env, generates[i])); 31 | } 32 | 33 | context->instance->free_list(&generates, length); 34 | 35 | deferred.Resolve(array); 36 | } 37 | 38 | private: 39 | int length = 0; 40 | char** generates = NULL; 41 | std::string word; 42 | std::string example; 43 | }; -------------------------------------------------------------------------------- /src/Nodehun.d.ts: -------------------------------------------------------------------------------- 1 | declare module 'nodehun' { 2 | export class Nodehun { 3 | constructor(affix: Buffer, dictionary: Buffer); 4 | 5 | spell(word: string): Promise; 6 | spellSync(word: string): boolean; 7 | 8 | suggest(word: string): Promise; 9 | suggestSync(word: string): string[] | null; 10 | 11 | analyze(word: string): Promise; 12 | analyzeSync(word: string): string[]; 13 | 14 | stem(word: string): Promise; 15 | stemSync(word: string): string[]; 16 | 17 | generate(word: string, example: string): Promise; 18 | generateSync(word: string, example: string): string[]; 19 | 20 | addDictionary(dictionary: Buffer): Promise; 21 | addDictionarySync(dictionary: Buffer): void; 22 | 23 | add(word: string): Promise; 24 | addSync(word: string): void; 25 | 26 | addWithAffix(word: string, example: string): Promise; 27 | addWithAffixSync(word: string, example: string): void; 28 | 29 | remove(word: string): Promise; 30 | removeSync(word: string): void; 31 | 32 | getDictionaryEncoding(): string | undefined; 33 | getWordCharacters(): string | undefined; 34 | getWordCharactersUTF16(): string | undefined; 35 | getVersion(): string | undefined; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/Async/SuggestWorker.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "Worker.cc" 4 | 5 | class SuggestWorker : public Worker { 6 | public: 7 | SuggestWorker( 8 | HunspellContext* context, 9 | Napi::Promise::Deferred d, 10 | std::string word) 11 | : Worker(context, d), word(std::move(word)) {} 12 | 13 | void Execute() { 14 | // Worker thread; don't use N-API here 15 | context->lockRead(); 16 | bool correct = context->instance->spell(word.c_str()); 17 | if (!correct) { 18 | length = this->context->instance->suggest(&suggestions, word.c_str()); 19 | } 20 | context->unlockRead(); 21 | } 22 | 23 | void Resolve(Napi::Promise::Deferred const &deferred) { 24 | Napi::Env env = deferred.Env(); 25 | 26 | if (length == -1) { 27 | deferred.Resolve(env.Null()); 28 | return; 29 | } 30 | 31 | Napi::Array array = Napi::Array::New(env, length); 32 | for (int i = 0; i < length; i++) { 33 | array.Set(i, Napi::String::New(env, suggestions[i])); 34 | } 35 | 36 | context->instance->free_list(&suggestions, length); 37 | 38 | deferred.Resolve(array); 39 | } 40 | 41 | private: 42 | int length = -1; 43 | std::string word; 44 | char** suggestions = NULL; 45 | }; -------------------------------------------------------------------------------- /src/Async/Worker.cc: -------------------------------------------------------------------------------- 1 | #ifndef Worker_cc 2 | #define Worker_cc 3 | 4 | // Thanks @jaubourg, @Superlokkus, and @greg9504. 5 | // https://github.com/nodejs/node-addon-api/issues/231 6 | #include 7 | #include "../HunspellContext.h" 8 | 9 | class Worker : public Napi::AsyncWorker { 10 | public: 11 | Worker(HunspellContext* context, Napi::Promise::Deferred const &d, const char* resource_name) : AsyncWorker(get_fake_callback(d.Env()).Value(), resource_name), deferred(d), context(context) {} 12 | Worker(HunspellContext* context, Napi::Promise::Deferred const &d) : AsyncWorker(get_fake_callback(d.Env()).Value()), deferred(d), context(context) {} 13 | 14 | virtual void Resolve(Napi::Promise::Deferred const &deferred) = 0; 15 | 16 | void OnOK() override { 17 | Resolve(deferred); 18 | } 19 | 20 | void OnError(Napi::Error const &error) override { 21 | deferred.Reject(error.Value()); 22 | } 23 | 24 | protected: 25 | HunspellContext* context; 26 | 27 | private: 28 | static Napi::Value noop(Napi::CallbackInfo const &info) { 29 | return info.Env().Undefined(); 30 | } 31 | 32 | Napi::Reference const &get_fake_callback(Napi::Env const &env) { 33 | static Napi::Reference fake_callback 34 | = Napi::Reference::New(Napi::Function::New(env, noop), 1); 35 | fake_callback.SuppressDestruct(); 36 | 37 | return fake_callback; 38 | } 39 | 40 | Napi::Promise::Deferred deferred; 41 | }; 42 | 43 | #endif -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "nodehun", 3 | "version": "3.0.2", 4 | "description": "The Hunspell binding for nodejs that exposes as much of hunspell as possible and also adds new features. Hunspell is a first class spellcheck library used by Google, Apple, and Mozilla.", 5 | "license": "MIT", 6 | "keywords": [ 7 | "spell", 8 | "spellcheck", 9 | "spellchecker", 10 | "hunspell", 11 | "spell check", 12 | "spell checker", 13 | "check" 14 | ], 15 | "repository": "https://github.com/Wulf/nodehun", 16 | "bugs": "https://github.com/Wulf/nodehun/issues", 17 | "author": "Nathan Sweet ", 18 | "contributors": [ 19 | "Nathan Sweet ", 20 | "Haris Khan ", 21 | "Titus Wormer ", 22 | "Espen Hovlandsdal ", 23 | "Thomas Beverley " 24 | ], 25 | "main": "build/Release/Nodehun", 26 | "types": "src/Nodehun.d.ts", 27 | "gypfile": true, 28 | "dependencies": { 29 | "node-addon-api": "*" 30 | }, 31 | "devDependencies": { 32 | "@types/mocha": "^5.2.7", 33 | "@types/node": "^12.12.6", 34 | "bindings": "~1.2.1", 35 | "mocha": "^6.2.2", 36 | "ts-node": "^8.4.1", 37 | "typescript": "^3.7.2" 38 | }, 39 | "scripts": { 40 | "start": "npx nodemon -e ts,js,cc,h,gyp -x \"npm run build && npm run test\"", 41 | "start-test": "npx nodemon -e ts,js -x \"npm run test\"", 42 | "build": "node-gyp rebuild -j 8", 43 | "test": "ts-node --transpile-only node_modules/mocha/bin/mocha ./test/*.spec.ts", 44 | "performance-test": "echo \"This command only works with node version 11 or lower!\" && npm i chart.js@2.9.2 chartjs-node@1.7.1 nodehun@2.0.12 && ts-node test/performance/performance.ts" 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /hunspell/src/hunspell/phonet.hxx: -------------------------------------------------------------------------------- 1 | /* phonetic.c - generic replacement aglogithms for phonetic transformation 2 | Copyright (C) 2000 Bjoern Jacke 3 | 4 | This library is free software; you can redistribute it and/or 5 | modify it under the terms of the GNU Lesser General Public 6 | License version 2.1 as published by the Free Software Foundation; 7 | 8 | This library is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 | Lesser General Public License for more details. 12 | 13 | You should have received a copy of the GNU Lesser General Public 14 | License along with this library; If not, see 15 | . 16 | 17 | Changelog: 18 | 19 | 2000-01-05 Bjoern Jacke 20 | Initial Release insprired by the article about phonetic 21 | transformations out of c't 25/1999 22 | 23 | 2007-07-26 Bjoern Jacke 24 | Released under MPL/GPL/LGPL tri-license for Hunspell 25 | 26 | 2007-08-23 Laszlo Nemeth 27 | Porting from Aspell to Hunspell using C-like structs 28 | */ 29 | 30 | #ifndef __PHONETHXX__ 31 | #define __PHONETHXX__ 32 | 33 | #define HASHSIZE 256 34 | #define MAXPHONETLEN 256 35 | #define MAXPHONETUTF8LEN (MAXPHONETLEN * 4) 36 | 37 | #include "hunvisapi.h" 38 | 39 | struct phonetable { 40 | char utf8; 41 | cs_info * lang; 42 | int num; 43 | char * * rules; 44 | int hash[HASHSIZE]; 45 | }; 46 | 47 | LIBHUNSPELL_DLL_EXPORTED void init_phonet_hash(phonetable & parms); 48 | 49 | LIBHUNSPELL_DLL_EXPORTED int phonet (const char * inword, char * target, 50 | int len, phonetable & phone); 51 | 52 | #endif 53 | -------------------------------------------------------------------------------- /hunspell/src/hunspell/license.hunspell: -------------------------------------------------------------------------------- 1 | /* ***** BEGIN LICENSE BLOCK ***** 2 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 3 | * 4 | * The contents of this file are subject to the Mozilla Public License Version 5 | * 1.1 (the "License"); you may not use this file except in compliance with 6 | * the License. You may obtain a copy of the License at 7 | * http://www.mozilla.org/MPL/ 8 | * 9 | * Software distributed under the License is distributed on an "AS IS" basis, 10 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 11 | * for the specific language governing rights and limitations under the 12 | * License. 13 | * 14 | * The Original Code is Hunspell, based on MySpell. 15 | * 16 | * The Initial Developers of the Original Code are 17 | * Kevin Hendricks (MySpell) and Laszlo Nemeth (Hunspell). 18 | * Portions created by the Initial Developers are Copyright (C) 2002-2005 19 | * the Initial Developers. All Rights Reserved. 20 | * 21 | * Contributor(s): 22 | * David Einstein 23 | * Davide Prina 24 | * Giuseppe Modugno 25 | * Gianluca Turconi 26 | * Simon Brouwer 27 | * Noll Janos 28 | * Biro Arpad 29 | * Goldman Eleonora 30 | * Sarlos Tamas 31 | * Bencsath Boldizsar 32 | * Halacsy Peter 33 | * Dvornik Laszlo 34 | * Gefferth Andras 35 | * Nagy Viktor 36 | * Varga Daniel 37 | * Chris Halls 38 | * Rene Engelhard 39 | * Bram Moolenaar 40 | * Dafydd Jones 41 | * Harri Pitkanen 42 | * Andras Timar 43 | * Tor Lillqvist 44 | * 45 | * Alternatively, the contents of this file may be used under the terms of 46 | * either the GNU General Public License Version 2 or later (the "GPL"), or 47 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 48 | * in which case the provisions of the GPL or the LGPL are applicable instead 49 | * of those above. If you wish to allow use of your version of this file only 50 | * under the terms of either the GPL or the LGPL, and not to allow others to 51 | * use your version of this file under the terms of the MPL, indicate your 52 | * decision by deleting the provisions above and replace them with the notice 53 | * and other provisions required by the GPL or the LGPL. If you do not delete 54 | * the provisions above, a recipient may use your version of this file under 55 | * the terms of any one of the MPL, the GPL or the LGPL. 56 | * 57 | * ***** END LICENSE BLOCK ***** */ 58 | -------------------------------------------------------------------------------- /hunspell/README.myspell: -------------------------------------------------------------------------------- 1 | MySpell is a simple spell checker that uses affix 2 | compression and is modelled after the spell checker 3 | ispell. 4 | 5 | MySpell was written to explore how affix compression 6 | can be implemented. 7 | 8 | The Main features of MySpell are: 9 | 10 | 1. written in C++ to make it easier to interface with 11 | Pspell, OpenOffice, AbiWord, etc 12 | 13 | 2. it is stateless, uses no static variables and 14 | should be completely reentrant with almost no 15 | ifdefs 16 | 17 | 3. it tries to be as compatible with ispell to 18 | the extent it can. It can read slightly modified 19 | versions of munched ispell dictionaries (and it 20 | comes with a munched english wordlist borrowed from 21 | Kevin Atkinson's excellent Aspell. 22 | 23 | 4. it uses a heavily modified aff file format that 24 | can be derived from ispell aff files but uses 25 | the iso-8859-X character sets only 26 | 27 | 5. it is simple with *lots* of comments that 28 | describes how the affixes are stored 29 | and tested for (based on the approach used by 30 | ispell). 31 | 32 | 6. it supports improved suggestions with replacement 33 | tables and ngram-scoring based mechanisms in addition 34 | to the main suggestion mechanisms 35 | 36 | 7. like ispell it has a BSD license (and no 37 | advertising clause) 38 | 39 | But ... it has *no* support for adding words 40 | to a personal dictionary, *no* support for converting 41 | between various text encodings, and *no* command line 42 | interface (it is purely meant to be a library). 43 | 44 | It can not (in any way) replace all of the functionality 45 | of ispell or aspell/pspell. It is meant as a learning 46 | tool for understanding affix compression and for 47 | being used by front ends like OpenOffice, Abiword, etc. 48 | 49 | MySpell has been tested under Linux and Solaris 50 | and has the world's simplest Makefile and no 51 | configure support. 52 | 53 | It does come with a simple example program that 54 | spell checks some words and returns suggestions. 55 | 56 | To build a static library and an example 57 | program under Linux simply type: 58 | 59 | tar -zxvf myspell.tar.gz 60 | cd myspell2 61 | make 62 | 63 | To run the example program: 64 | ./example ./en_US.aff ./en_US.dic checkme.lst 65 | 66 | Please play around with it and let me know 67 | what you think. 68 | 69 | Please see the file CONTRIBUTORS for more info. 70 | -------------------------------------------------------------------------------- /hunspell/src/hunspell/replist.cxx: -------------------------------------------------------------------------------- 1 | #include "license.hunspell" 2 | #include "license.myspell" 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "replist.hxx" 9 | #include "csutil.hxx" 10 | 11 | RepList::RepList(int n) { 12 | dat = (replentry **) malloc(sizeof(replentry *) * n); 13 | if (dat == 0) size = 0; else size = n; 14 | pos = 0; 15 | } 16 | 17 | RepList::~RepList() 18 | { 19 | for (int i = 0; i < pos; i++) { 20 | free(dat[i]->pattern); 21 | free(dat[i]->pattern2); 22 | free(dat[i]); 23 | } 24 | free(dat); 25 | } 26 | 27 | int RepList::get_pos() { 28 | return pos; 29 | } 30 | 31 | replentry * RepList::item(int n) { 32 | return dat[n]; 33 | } 34 | 35 | int RepList::near(const char * word) { 36 | int p1 = 0; 37 | int p2 = pos; 38 | while ((p2 - p1) > 1) { 39 | int m = (p1 + p2) / 2; 40 | int c = strcmp(word, dat[m]->pattern); 41 | if (c <= 0) { 42 | if (c < 0) p2 = m; else p1 = p2 = m; 43 | } else p1 = m; 44 | } 45 | return p1; 46 | } 47 | 48 | int RepList::match(const char * word, int n) { 49 | if (strncmp(word, dat[n]->pattern, strlen(dat[n]->pattern)) == 0) return strlen(dat[n]->pattern); 50 | return 0; 51 | } 52 | 53 | int RepList::add(char * pat1, char * pat2) { 54 | if (pos >= size || pat1 == NULL || pat2 == NULL) return 1; 55 | replentry * r = (replentry *) malloc(sizeof(replentry)); 56 | if (r == NULL) return 1; 57 | r->pattern = mystrrep(pat1, "_", " "); 58 | r->pattern2 = mystrrep(pat2, "_", " "); 59 | r->start = false; 60 | r->end = false; 61 | dat[pos++] = r; 62 | for (int i = pos - 1; i > 0; i--) { 63 | r = dat[i]; 64 | if (strcmp(r->pattern, dat[i - 1]->pattern) < 0) { 65 | dat[i] = dat[i - 1]; 66 | dat[i - 1] = r; 67 | } else break; 68 | } 69 | return 0; 70 | } 71 | 72 | int RepList::conv(const char * word, char * dest) { 73 | int stl = 0; 74 | int change = 0; 75 | for (size_t i = 0; i < strlen(word); i++) { 76 | int n = near(word + i); 77 | int l = match(word + i, n); 78 | if (l) { 79 | strcpy(dest + stl, dat[n]->pattern2); 80 | stl += strlen(dat[n]->pattern2); 81 | i += l - 1; 82 | change = 1; 83 | } else dest[stl++] = word[i]; 84 | } 85 | dest[stl] = '\0'; 86 | return change; 87 | } 88 | -------------------------------------------------------------------------------- /src/Nodehun.h: -------------------------------------------------------------------------------- 1 | #ifndef Nodehun_H 2 | #define Nodehun_H 3 | 4 | #include 5 | #include "HunspellContext.h" 6 | 7 | class Nodehun : public Napi::ObjectWrap { 8 | public: 9 | static Napi::Object Init(Napi::Env env, Napi::Object exports); 10 | static Napi::Object NewInstance(const Napi::CallbackInfo& info); 11 | Nodehun(const Napi::CallbackInfo& info); 12 | ~Nodehun(); 13 | 14 | private: 15 | static Napi::FunctionReference constructor; 16 | HunspellContext* context; 17 | 18 | // (dictionary: Buffer) => void 19 | Napi::Value addDictionary(const Napi::CallbackInfo& info); 20 | Napi::Value addDictionarySync(const Napi::CallbackInfo& info); 21 | 22 | // (word: string) => boolean 23 | Napi::Value spell(const Napi::CallbackInfo& info); 24 | Napi::Value spellSync(const Napi::CallbackInfo& info); 25 | 26 | // (word: string) => string[] | null 27 | Napi::Value suggest(const Napi::CallbackInfo& info); 28 | Napi::Value suggestSync(const Napi::CallbackInfo& info); 29 | 30 | // (word: string) => string[] 31 | Napi::Value analyze(const Napi::CallbackInfo& info); 32 | Napi::Value analyzeSync (const Napi::CallbackInfo& info); 33 | 34 | // (word: string) => string[] 35 | Napi::Value stem(const Napi::CallbackInfo& info); 36 | Napi::Value stemSync(const Napi::CallbackInfo& info); 37 | 38 | // (word: string, example: string) => string[] 39 | Napi::Value generate(const Napi::CallbackInfo& info); 40 | Napi::Value generateSync(const Napi::CallbackInfo& info); 41 | 42 | // (word: string) => void 43 | Napi::Value add(const Napi::CallbackInfo& info); 44 | Napi::Value addSync(const Napi::CallbackInfo& info); 45 | 46 | // (word: string, example: string) => void 47 | Napi::Value addWithAffix(const Napi::CallbackInfo& info); 48 | Napi::Value addWithAffixSync(const Napi::CallbackInfo& info); 49 | 50 | // (word: string) => void 51 | Napi::Value remove(const Napi::CallbackInfo& info); 52 | Napi::Value removeSync(const Napi::CallbackInfo& info); 53 | 54 | // () => string | undefined 55 | Napi::Value getDictionaryEncoding(const Napi::CallbackInfo& info); 56 | // () => string | undefined 57 | Napi::Value getWordCharacters(const Napi::CallbackInfo& info); 58 | // () => string | undefined 59 | Napi::Value getWordCharactersUTF16(const Napi::CallbackInfo& info); 60 | // () => string | undefined 61 | Napi::Value getVersion(const Napi::CallbackInfo& info); 62 | }; 63 | 64 | #endif -------------------------------------------------------------------------------- /.github/workflows/Test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | jobs: 10 | test-linux-macos: 11 | name: Test on ${{ matrix.os }} with Node ${{ matrix.node-version }} 12 | runs-on: ${{ matrix.os }} 13 | 14 | strategy: 15 | matrix: 16 | os: [ubuntu-latest, macos-latest] 17 | node-version: ["lts/*"] 18 | 19 | steps: 20 | - name: Checkout code 21 | uses: actions/checkout@v4 22 | with: 23 | submodules: recursive 24 | 25 | - name: Setup Node.js 26 | uses: actions/setup-node@v4 27 | with: 28 | node-version: ${{ matrix.node-version }} 29 | 30 | - name: Setup Python 31 | uses: actions/setup-python@v5 32 | with: 33 | python-version: "3.x" 34 | 35 | - name: Install build tools (Linux) 36 | if: runner.os == 'Linux' 37 | run: | 38 | sudo apt-get update 39 | sudo apt-get install -y build-essential 40 | 41 | - name: Install dependencies 42 | run: | 43 | npm install 44 | 45 | - name: Display versions 46 | run: | 47 | node --version 48 | npm --version 49 | python --version || python3 --version 50 | 51 | - name: Run tests 52 | run: npm test 53 | 54 | test-windows: 55 | name: Test on Windows with Node ${{ matrix.node-version }} (${{ matrix.arch }}) 56 | runs-on: windows-latest 57 | 58 | strategy: 59 | matrix: 60 | node-version: ["lts/*"] 61 | arch: ["x64"] 62 | include: 63 | - node-version: "22" 64 | arch: "x86" 65 | 66 | steps: 67 | - name: Configure git line endings 68 | run: git config --global core.autocrlf input 69 | 70 | - name: Checkout code 71 | uses: actions/checkout@v4 72 | with: 73 | submodules: recursive 74 | 75 | - name: Setup Node.js 76 | uses: actions/setup-node@v4 77 | with: 78 | node-version: ${{ matrix.node-version }} 79 | architecture: ${{ matrix.arch }} 80 | 81 | - name: Setup Python 82 | uses: actions/setup-python@v5 83 | with: 84 | python-version: "3.x" 85 | 86 | - name: Install dependencies 87 | run: npm install 88 | 89 | - name: Display versions 90 | run: | 91 | node --version 92 | npm --version 93 | python --version 94 | 95 | - name: Run tests 96 | run: npm test 97 | -------------------------------------------------------------------------------- /hunspell/src/hunspell/makefile.mk: -------------------------------------------------------------------------------- 1 | #************************************************************************* 2 | # Version: MPL 1.1/GPL 2.0/LGPL 2.1 3 | # 4 | # The contents of this file are subject to the Mozilla Public License Version 5 | # 1.1 (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # http://www.mozilla.org/MPL/ 8 | # 9 | # Software distributed under the License is distributed on an "AS IS" basis, 10 | # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 11 | # for the specific language governing rights and limitations under the 12 | # License. 13 | # 14 | # Alternatively, the contents of this file may be used under the terms of 15 | # either the GNU General Public License Version 2 or later (the "GPL"), or 16 | # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 17 | # in which case the provisions of the GPL or the LGPL are applicable instead 18 | # of those above. If you wish to allow use of your version of this file only 19 | # under the terms of either the GPL or the LGPL, and not to allow others to 20 | # use your version of this file under the terms of the MPL, indicate your 21 | # decision by deleting the provisions above and replace them with the notice 22 | # and other provisions required by the GPL or the LGPL. If you do not delete 23 | # the provisions above, a recipient may use your version of this file under 24 | # the terms of any one of the MPL, the GPL or the LGPL. 25 | # 26 | #************************************************************************* 27 | 28 | PRJ = ../../../../../.. 29 | 30 | PRJNAME = hunspell 31 | TARGET = hunspell 32 | LIBTARGET=YES 33 | EXTERNAL_WARNINGS_NOT_ERRORS := TRUE 34 | UWINAPILIB= 35 | 36 | #----- Settings --------------------------------------------------------- 37 | 38 | .INCLUDE : settings.mk 39 | 40 | # --- Files -------------------------------------------------------- 41 | 42 | CFLAGS+=-I..$/..$/ 43 | CDEFS+=-DOPENOFFICEORG 44 | 45 | SLOFILES= \ 46 | $(SLO)$/affentry.obj \ 47 | $(SLO)$/affixmgr.obj \ 48 | $(SLO)$/dictmgr.obj \ 49 | $(SLO)$/csutil.obj \ 50 | $(SLO)$/hashmgr.obj \ 51 | $(SLO)$/suggestmgr.obj \ 52 | $(SLO)$/phonet.obj \ 53 | $(SLO)$/hunzip.obj \ 54 | $(SLO)$/filemgr.obj \ 55 | $(SLO)$/replist.obj \ 56 | $(SLO)$/hunspell.obj 57 | 58 | LIB1TARGET= $(SLB)$/lib$(TARGET).lib 59 | LIB1ARCHIV= $(LB)/lib$(TARGET).a 60 | LIB1OBJFILES= $(SLOFILES) 61 | 62 | # --- Targets ------------------------------------------------------ 63 | 64 | .INCLUDE : target.mk 65 | 66 | -------------------------------------------------------------------------------- /hunspell/binding.gyp: -------------------------------------------------------------------------------- 1 | { 2 | 'target_defaults': { 3 | 'default_configuration': 'Debug', 4 | 'configurations': { 5 | 'Debug': { 6 | 'defines': [ 'DEBUG', '_DEBUG' ], 7 | 'msvs_settings': { 8 | 'VCCLCompilerTool': { 9 | 'RuntimeLibrary': 1, # static debug 10 | }, 11 | }, 12 | }, 13 | 'Release': { 14 | 'defines': [ 'NDEBUG' ], 15 | 'msvs_settings': { 16 | 'VCCLCompilerTool': { 17 | 'RuntimeLibrary': 0, # static release 18 | }, 19 | }, 20 | } 21 | }, 22 | 'msvs_settings': { 23 | 'VCCLCompilerTool': { 24 | }, 25 | 'VCLibrarianTool': { 26 | }, 27 | 'VCLinkerTool': { 28 | 'GenerateDebugInformation': 'true', 29 | }, 30 | }, 31 | 'conditions': [ 32 | ['OS == "win"', { 33 | 'defines': [ 34 | 'WIN32' 35 | ], 36 | }] 37 | ], 38 | }, 39 | 40 | 'targets': [ 41 | { 42 | 'target_name': 'hunspell', 43 | 'type': 'static_library', 44 | 'include_dirs': [ 'src/hunspell' ], 45 | 'defines': [ 'HUNSPELL_STATIC' ], 46 | 'direct_dependent_settings': { 47 | 'include_dirs': [ 'src/hunspell' ], 48 | 'defines': [ 'HUNSPELL_STATIC' ], 49 | }, 50 | 'cflags': [ '-O3' ], 51 | 'sources': [ 52 | 'src/hunspell/affentry.cxx', 53 | 'src/hunspell/affentry.hxx', 54 | 'src/hunspell/affixmgr.cxx', 55 | 'src/hunspell/affixmgr.hxx', 56 | 'src/hunspell/atypes.hxx', 57 | 'src/hunspell/baseaffix.hxx', 58 | 'src/hunspell/csutil.cxx', 59 | 'src/hunspell/csutil.hxx', 60 | 'src/hunspell/dictmgr.cxx', 61 | 'src/hunspell/dictmgr.hxx', 62 | 'src/hunspell/filemgr.cxx', 63 | 'src/hunspell/filemgr.hxx', 64 | 'src/hunspell/hashmgr.cxx', 65 | 'src/hunspell/hashmgr.hxx', 66 | 'src/hunspell/htypes.hxx', 67 | 'src/hunspell/hunspell.cxx', 68 | 'src/hunspell/hunspell.hxx', 69 | 'src/hunspell/hunzip.cxx', 70 | 'src/hunspell/hunzip.hxx', 71 | 'src/hunspell/istrmgr.hxx', 72 | 'src/hunspell/langnum.hxx', 73 | 'src/hunspell/phonet.cxx', 74 | 'src/hunspell/phonet.hxx', 75 | 'src/hunspell/replist.cxx', 76 | 'src/hunspell/replist.hxx', 77 | 'src/hunspell/strmgr.cxx', 78 | 'src/hunspell/strmgr.hxx', 79 | 'src/hunspell/suggestmgr.cxx', 80 | 'src/hunspell/suggestmgr.hxx', 81 | 'src/hunspell/w_char.hxx' 82 | ], 83 | 'conditions': [ 84 | ['OS=="win"', { 85 | 'include_dirs': [ 'src/win_api' ], 86 | 'sources': [ 87 | 'src/win_api/config.h', 88 | ], 89 | }, { 90 | 'sources': [ 91 | 'src/hunspell/config.h', 92 | ], 93 | }], 94 | ] 95 | } 96 | ] 97 | } -------------------------------------------------------------------------------- /hunspell/src/hunspell/atypes.hxx: -------------------------------------------------------------------------------- 1 | #ifndef _ATYPES_HXX_ 2 | #define _ATYPES_HXX_ 3 | 4 | #ifndef HUNSPELL_WARNING 5 | #include 6 | #ifdef HUNSPELL_WARNING_ON 7 | #define HUNSPELL_WARNING fprintf 8 | #else 9 | // empty inline function to switch off warnings (instead of the C99 standard variadic macros) 10 | static inline void HUNSPELL_WARNING(FILE *, const char *, ...) {} 11 | #endif 12 | #endif 13 | 14 | // HUNSTEM def. 15 | #define HUNSTEM 16 | 17 | #include "hashmgr.hxx" 18 | #include "w_char.hxx" 19 | 20 | #define SETSIZE 256 21 | #define CONTSIZE 65536 22 | #define MAXWORDLEN 100 23 | #define MAXWORDUTF8LEN 256 24 | 25 | // affentry options 26 | #define aeXPRODUCT (1 << 0) 27 | #define aeUTF8 (1 << 1) 28 | #define aeALIASF (1 << 2) 29 | #define aeALIASM (1 << 3) 30 | #define aeLONGCOND (1 << 4) 31 | 32 | // compound options 33 | #define IN_CPD_NOT 0 34 | #define IN_CPD_BEGIN 1 35 | #define IN_CPD_END 2 36 | #define IN_CPD_OTHER 3 37 | 38 | // info options 39 | #define SPELL_COMPOUND (1 << 0) 40 | #define SPELL_FORBIDDEN (1 << 1) 41 | #define SPELL_ALLCAP (1 << 2) 42 | #define SPELL_NOCAP (1 << 3) 43 | #define SPELL_INITCAP (1 << 4) 44 | #define SPELL_ORIGCAP (1 << 5) 45 | #define SPELL_WARN (1 << 6) 46 | 47 | #define MAXLNLEN 8192 48 | 49 | #define MINCPDLEN 3 50 | #define MAXCOMPOUND 10 51 | #define MAXCONDLEN 20 52 | #define MAXCONDLEN_1 (MAXCONDLEN - sizeof(char *)) 53 | 54 | #define MAXACC 1000 55 | 56 | #define FLAG unsigned short 57 | #define FLAG_NULL 0x00 58 | #define FREE_FLAG(a) a = 0 59 | 60 | #define TESTAFF( a, b , c ) flag_bsearch((unsigned short *) a, (unsigned short) b, c) 61 | 62 | struct affentry 63 | { 64 | char * strip; 65 | char * appnd; 66 | unsigned char stripl; 67 | unsigned char appndl; 68 | char numconds; 69 | char opts; 70 | unsigned short aflag; 71 | unsigned short * contclass; 72 | short contclasslen; 73 | union { 74 | char conds[MAXCONDLEN]; 75 | struct { 76 | char conds1[MAXCONDLEN_1]; 77 | char * conds2; 78 | } l; 79 | } c; 80 | char * morphcode; 81 | }; 82 | 83 | struct guessword { 84 | char * word; 85 | bool allow; 86 | char * orig; 87 | }; 88 | 89 | struct mapentry { 90 | char ** set; 91 | int len; 92 | }; 93 | 94 | struct flagentry { 95 | FLAG * def; 96 | int len; 97 | }; 98 | 99 | struct patentry { 100 | char * pattern; 101 | char * pattern2; 102 | char * pattern3; 103 | FLAG cond; 104 | FLAG cond2; 105 | }; 106 | 107 | #endif 108 | -------------------------------------------------------------------------------- /hunspell/src/hunspell/hashmgr.hxx: -------------------------------------------------------------------------------- 1 | #ifndef _HASHMGR_HXX_ 2 | #define _HASHMGR_HXX_ 3 | 4 | #include "hunvisapi.h" 5 | 6 | #include 7 | 8 | #include "htypes.hxx" 9 | #include "istrmgr.hxx" 10 | 11 | enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI }; 12 | 13 | class LIBHUNSPELL_DLL_EXPORTED HashMgr 14 | { 15 | int tablesize; 16 | struct hentry ** tableptr; 17 | int userword; 18 | flag flag_mode; 19 | int complexprefixes; 20 | int utf8; 21 | unsigned short forbiddenword; 22 | int langnum; 23 | char * enc; 24 | char * lang; 25 | struct cs_info * csconv; 26 | char * ignorechars; 27 | unsigned short * ignorechars_utf16; 28 | int ignorechars_utf16_len; 29 | int numaliasf; // flag vector `compression' with aliases 30 | unsigned short ** aliasf; 31 | unsigned short * aliasflen; 32 | int numaliasm; // morphological desciption `compression' with aliases 33 | char ** aliasm; 34 | 35 | 36 | public: 37 | HashMgr(const char * tpath, const char * apath, const char * key = NULL, bool notpath = false); 38 | HashMgr(const char * tpath, const char * apath, bool notpath = false); 39 | ~HashMgr(); 40 | 41 | struct hentry * lookup(const char *) const; 42 | int hash(const char *) const; 43 | struct hentry * walk_hashtable(int & col, struct hentry * hp) const; 44 | 45 | int add(const char * word); 46 | int add_with_affix(const char * word, const char * pattern); 47 | int remove(const char * word); 48 | int decode_flags(unsigned short ** result, char * flags, IStrMgr * af); 49 | unsigned short decode_flag(const char * flag); 50 | char * encode_flag(unsigned short flag); 51 | int is_aliasf(); 52 | int get_aliasf(int index, unsigned short ** fvec, IStrMgr * af); 53 | int is_aliasm(); 54 | char * get_aliasm(int index); 55 | 56 | private: 57 | void Init(const char * tstr, const char * astr,const char * key, bool notpath); 58 | int get_clen_and_captype(const char * word, int wbl, int * captype); 59 | int load_tables(const char * tpath, const char * key, bool notpath); 60 | int add_word(const char * word, int wbl, int wcl, unsigned short * ap, 61 | int al, const char * desc, bool onlyupcase); 62 | int load_config(const char * affpath, const char * key, bool notpath); 63 | int parse_aliasf(char * line, IStrMgr * af); 64 | int add_hidden_capitalized_word(char * word, int wbl, int wcl, 65 | unsigned short * flags, int al, char * dp, int captype); 66 | int parse_aliasm(char * line, IStrMgr * af); 67 | int remove_forbidden_flag(const char * word); 68 | 69 | }; 70 | 71 | #endif 72 | -------------------------------------------------------------------------------- /hunspell/AUTHORS.myspell: -------------------------------------------------------------------------------- 1 | Developer Credits: 2 | 3 | Special credit and thanks go to ispell's creator Geoff Kuenning. 4 | Ispell affix compression code was used as the basis for the 5 | affix code used in MySpell. Specifically Geoff's use of a 6 | conds[] array that makes it easy to check if the conditions 7 | required for a particular affix are present was very 8 | ingenious! Kudos to Geoff. Very nicely done. 9 | BTW: ispell is available under a BSD style license 10 | from Geoff Kuennings ispell website: 11 | http://www.cs.ucla.edu/ficus-members/geoff/ispell.html 12 | 13 | 14 | Kevin Hendricks is the original 15 | author and now maintainer of the MySpell codebase. Recent 16 | additions include ngram support, and related character maps 17 | to help improve and create suggestions for very poorly 18 | spelled words. 19 | 20 | Please send any and all contributions or improvements 21 | to him or to dev@lingucomponent.openoffice.org. 22 | 23 | 24 | David Einstein (Deinst@world.std.com) developed an almost 25 | complete rewrite of MySpell for use by the Mozilla project. 26 | David and I are now working on parallel development tracks 27 | to help our respective projects (Mozilla and OpenOffice.org) 28 | and we will maintain full affix file and dictionary file 29 | compatibility and work on merging our versions of MySpell 30 | back into a single tree. David has been a significant help 31 | in improving MySpell. 32 | 33 | 34 | Németh László is the author of 35 | the Hungarian dictionary and he developed and contributed 36 | extensive changes to MySpell including ... 37 | * code to support compound words in MySpell 38 | * fixed numerous problems with encoding case conversion tables. 39 | * designed/developed replacement tables to improve suggestions 40 | * changed affix file parsing to trees to greatly speed loading 41 | * removed the need for malloc/free pairs in suffix_check which 42 | speeds up spell checking in suffix rich languages by 20% 43 | 44 | Davide Prina , Giuseppe Modugno 45 | , Gianluca Turconi 46 | all from the it_IT OpenOffice.org team performed an 47 | extremely detailed code review of MySpell and generated 48 | fixes for bugs, leaks, and speedup improvements. 49 | 50 | Simon Brouwer for fixes and enhancements 51 | that have greatly improved MySpell auggestions 52 | * n-gram suggestions for an initcap word have an init. cap. 53 | * fix for too many n-gram suggestions from specialized dictionary, 54 | * fix for long suggestions rather than close ones in case of 55 | dictionaries with many compound words (kompuuter) 56 | * optionally disabling split-word suggestions (controlled 57 | by NOSPLITSUGS line in affix file) 58 | 59 | 60 | Special Thanks to all others who have either contributed ideas or 61 | testing for MySpell 62 | 63 | 64 | Thanks, 65 | 66 | Kevin Hendricks 67 | kevin.hendricks@sympatico.ca 68 | -------------------------------------------------------------------------------- /hunspell/src/hunspell/license.myspell: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada 3 | * And Contributors. All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions 7 | * are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in the 14 | * documentation and/or other materials provided with the distribution. 15 | * 16 | * 3. All modifications to the source code must be clearly marked as 17 | * such. Binary redistributions based on modified source code 18 | * must be clearly marked as modified versions in the documentation 19 | * and/or other materials provided with the distribution. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS 22 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 25 | * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 | * SUCH DAMAGE. 33 | * 34 | * 35 | * NOTE: A special thanks and credit goes to Geoff Kuenning 36 | * the creator of ispell. MySpell's affix algorithms were 37 | * based on those of ispell which should be noted is 38 | * copyright Geoff Kuenning et.al. and now available 39 | * under a BSD style license. For more information on ispell 40 | * and affix compression in general, please see: 41 | * http://www.cs.ucla.edu/ficus-members/geoff/ispell.html 42 | * (the home page for ispell) 43 | * 44 | * An almost complete rewrite of MySpell for use by 45 | * the Mozilla project has been developed by David Einstein 46 | * (Deinst@world.std.com). David and I are now 47 | * working on parallel development tracks to help 48 | * our respective projects (Mozilla and OpenOffice.org 49 | * and we will maintain full affix file and dictionary 50 | * file compatibility and work on merging our versions 51 | * of MySpell back into a single tree. David has been 52 | * a significant help in improving MySpell. 53 | * 54 | * Special thanks also go to La'szlo' Ne'meth 55 | * who is the author of the 56 | * Hungarian dictionary and who developed and contributed 57 | * the code to support compound words in MySpell 58 | * and fixed numerous problems with the encoding 59 | * case conversion tables. 60 | * 61 | */ 62 | -------------------------------------------------------------------------------- /hunspell/src/hunspell/hunspell.h: -------------------------------------------------------------------------------- 1 | #ifndef _MYSPELLMGR_H_ 2 | #define _MYSPELLMGR_H_ 3 | 4 | #include "hunvisapi.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | typedef struct Hunhandle Hunhandle; 11 | 12 | LIBHUNSPELL_DLL_EXPORTED Hunhandle *Hunspell_create(const char * affpath, const char * dpath); 13 | 14 | LIBHUNSPELL_DLL_EXPORTED Hunhandle *Hunspell_create_notpath(const char * affpath, const char * dpath); 15 | 16 | LIBHUNSPELL_DLL_EXPORTED Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,const char * key); 17 | 18 | LIBHUNSPELL_DLL_EXPORTED Hunhandle *Hunspell_create_key_notpath(const char * affpath, const char * dpath,const char * key); 19 | 20 | LIBHUNSPELL_DLL_EXPORTED void Hunspell_destroy(Hunhandle *pHunspell); 21 | 22 | /* spell(word) - spellcheck word 23 | * output: 0 = bad word, not 0 = good word 24 | */ 25 | LIBHUNSPELL_DLL_EXPORTED int Hunspell_spell(Hunhandle *pHunspell, const char *); 26 | 27 | LIBHUNSPELL_DLL_EXPORTED char *Hunspell_get_dic_encoding(Hunhandle *pHunspell); 28 | 29 | /* suggest(suggestions, word) - search suggestions 30 | * input: pointer to an array of strings pointer and the (bad) word 31 | * array of strings pointer (here *slst) may not be initialized 32 | * output: number of suggestions in string array, and suggestions in 33 | * a newly allocated array of strings (*slts will be NULL when number 34 | * of suggestion equals 0.) 35 | */ 36 | LIBHUNSPELL_DLL_EXPORTED int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word); 37 | 38 | /* morphological functions */ 39 | 40 | /* analyze(result, word) - morphological analysis of the word */ 41 | 42 | LIBHUNSPELL_DLL_EXPORTED int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word); 43 | 44 | /* stem(result, word) - stemmer function */ 45 | 46 | LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word); 47 | 48 | /* stem(result, analysis, n) - get stems from a morph. analysis 49 | * example: 50 | * char ** result, result2; 51 | * int n1 = Hunspell_analyze(result, "words"); 52 | * int n2 = Hunspell_stem2(result2, result, n1); 53 | */ 54 | 55 | LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n); 56 | 57 | /* generate(result, word, word2) - morphological generation by example(s) */ 58 | 59 | LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word, 60 | const char * word2); 61 | 62 | /* generate(result, word, desc, n) - generation by morph. description(s) 63 | * example: 64 | * char ** result; 65 | * char * affix = "is:plural"; // description depends from dictionaries, too 66 | * int n = Hunspell_generate2(result, "word", &affix, 1); 67 | * for (int i = 0; i < n; i++) printf("%s\n", result[i]); 68 | */ 69 | 70 | LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word, 71 | char** desc, int n); 72 | 73 | /* functions for run-time modification of the dictionary */ 74 | 75 | /* add word to the run-time dictionary */ 76 | 77 | LIBHUNSPELL_DLL_EXPORTED int Hunspell_add(Hunhandle *pHunspell, const char * word); 78 | 79 | /* add word to the run-time dictionary with affix flags of 80 | * the example (a dictionary word): Hunspell will recognize 81 | * affixed forms of the new word, too. 82 | */ 83 | 84 | LIBHUNSPELL_DLL_EXPORTED int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word, const char * example); 85 | 86 | /* remove word from the run-time dictionary */ 87 | 88 | LIBHUNSPELL_DLL_EXPORTED int Hunspell_remove(Hunhandle *pHunspell, const char * word); 89 | 90 | /* free suggestion lists */ 91 | 92 | LIBHUNSPELL_DLL_EXPORTED void Hunspell_free_list(Hunhandle *pHunspell, char *** slst, int n); 93 | 94 | #ifdef __cplusplus 95 | } 96 | #endif 97 | 98 | #endif 99 | -------------------------------------------------------------------------------- /hunspell/src/hunspell/suggestmgr.hxx: -------------------------------------------------------------------------------- 1 | #ifndef _SUGGESTMGR_HXX_ 2 | #define _SUGGESTMGR_HXX_ 3 | 4 | #define MAXSWL 100 5 | #define MAXSWUTF8L (MAXSWL * 4) 6 | #define MAX_ROOTS 100 7 | #define MAX_WORDS 100 8 | #define MAX_GUESS 200 9 | #define MAXNGRAMSUGS 4 10 | #define MAXPHONSUGS 2 11 | #define MAXCOMPOUNDSUGS 3 12 | 13 | // timelimit: max ~1/4 sec (process time on Linux) for a time consuming function 14 | #define TIMELIMIT (CLOCKS_PER_SEC >> 2) 15 | #define MINTIMER 100 16 | #define MAXPLUSTIMER 100 17 | 18 | #define NGRAM_LONGER_WORSE (1 << 0) 19 | #define NGRAM_ANY_MISMATCH (1 << 1) 20 | #define NGRAM_LOWERING (1 << 2) 21 | #define NGRAM_WEIGHTED (1 << 3) 22 | 23 | #include "hunvisapi.h" 24 | 25 | #include "atypes.hxx" 26 | #include "affixmgr.hxx" 27 | #include "hashmgr.hxx" 28 | #include "langnum.hxx" 29 | #include 30 | 31 | enum { LCS_UP, LCS_LEFT, LCS_UPLEFT }; 32 | 33 | class LIBHUNSPELL_DLL_EXPORTED SuggestMgr 34 | { 35 | char * ckey; 36 | int ckeyl; 37 | w_char * ckey_utf; 38 | 39 | char * ctry; 40 | int ctryl; 41 | w_char * ctry_utf; 42 | 43 | AffixMgr* pAMgr; 44 | int maxSug; 45 | struct cs_info * csconv; 46 | int utf8; 47 | int langnum; 48 | int nosplitsugs; 49 | int maxngramsugs; 50 | int maxcpdsugs; 51 | int complexprefixes; 52 | 53 | 54 | public: 55 | SuggestMgr(const char * tryme, int maxn, AffixMgr *aptr); 56 | ~SuggestMgr(); 57 | 58 | int suggest(char*** slst, const char * word, int nsug, int * onlycmpdsug); 59 | int ngsuggest(char ** wlst, char * word, int ns, HashMgr** pHMgr, int md); 60 | int suggest_auto(char*** slst, const char * word, int nsug); 61 | int suggest_stems(char*** slst, const char * word, int nsug); 62 | int suggest_pos_stems(char*** slst, const char * word, int nsug); 63 | 64 | char * suggest_morph(const char * word); 65 | char * suggest_gen(char ** pl, int pln, char * pattern); 66 | char * suggest_morph_for_spelling_error(const char * word); 67 | 68 | private: 69 | int testsug(char** wlst, const char * candidate, int wl, int ns, int cpdsuggest, 70 | int * timer, clock_t * timelimit); 71 | int checkword(const char *, int, int, int *, clock_t *); 72 | int check_forbidden(const char *, int); 73 | 74 | int capchars(char **, const char *, int, int); 75 | int replchars(char**, const char *, int, int); 76 | int doubletwochars(char**, const char *, int, int); 77 | int forgotchar(char **, const char *, int, int); 78 | int swapchar(char **, const char *, int, int); 79 | int longswapchar(char **, const char *, int, int); 80 | int movechar(char **, const char *, int, int); 81 | int extrachar(char **, const char *, int, int); 82 | int badcharkey(char **, const char *, int, int); 83 | int badchar(char **, const char *, int, int); 84 | int twowords(char **, const char *, int, int); 85 | int fixstems(char **, const char *, int); 86 | 87 | int capchars_utf(char **, const w_char *, int wl, int, int); 88 | int doubletwochars_utf(char**, const w_char *, int wl, int, int); 89 | int forgotchar_utf(char**, const w_char *, int wl, int, int); 90 | int extrachar_utf(char**, const w_char *, int wl, int, int); 91 | int badcharkey_utf(char **, const w_char *, int wl, int, int); 92 | int badchar_utf(char **, const w_char *, int wl, int, int); 93 | int swapchar_utf(char **, const w_char *, int wl, int, int); 94 | int longswapchar_utf(char **, const w_char *, int, int, int); 95 | int movechar_utf(char **, const w_char *, int, int, int); 96 | 97 | int mapchars(char**, const char *, int, int); 98 | int map_related(const char *, char *, int, int, char ** wlst, int, int, const mapentry*, int, int *, clock_t *); 99 | int ngram(int n, char * s1, const char * s2, int opt); 100 | int mystrlen(const char * word); 101 | int leftcommonsubstring(char * s1, const char * s2); 102 | int commoncharacterpositions(char * s1, const char * s2, int * is_swap); 103 | void bubblesort( char ** rwd, char ** rwd2, int * rsc, int n); 104 | void lcs(const char * s, const char * s2, int * l1, int * l2, char ** result); 105 | int lcslen(const char * s, const char* s2); 106 | char * suggest_hentry_gen(hentry * rv, char * pattern); 107 | 108 | }; 109 | 110 | #endif 111 | 112 | -------------------------------------------------------------------------------- /hunspell/src/hunspell/hunspell.dsp: -------------------------------------------------------------------------------- 1 | # Microsoft Developer Studio Project File - Name="hunspell" - Package Owner=<4> 2 | # Microsoft Developer Studio Generated Build File, Format Version 6.00 3 | # ** DO NOT EDIT ** 4 | 5 | # TARGTYPE "Win32 (x86) Static Library" 0x0104 6 | 7 | CFG=hunspell - Win32 Debug 8 | !MESSAGE This is not a valid makefile. To build this project using NMAKE, 9 | !MESSAGE use the Export Makefile command and run 10 | !MESSAGE 11 | !MESSAGE NMAKE /f "hunspell.mak". 12 | !MESSAGE 13 | !MESSAGE You can specify a configuration when running NMAKE 14 | !MESSAGE by defining the macro CFG on the command line. For example: 15 | !MESSAGE 16 | !MESSAGE NMAKE /f "hunspell.mak" CFG="hunspell - Win32 Debug" 17 | !MESSAGE 18 | !MESSAGE Possible choices for configuration are: 19 | !MESSAGE 20 | !MESSAGE "hunspell - Win32 Release" (based on "Win32 (x86) Static Library") 21 | !MESSAGE "hunspell - Win32 Debug" (based on "Win32 (x86) Static Library") 22 | !MESSAGE 23 | 24 | # Begin Project 25 | # PROP AllowPerConfigDependencies 0 26 | # PROP Scc_ProjName "" 27 | # PROP Scc_LocalPath "" 28 | CPP=cl.exe 29 | RSC=rc.exe 30 | 31 | !IF "$(CFG)" == "hunspell - Win32 Release" 32 | 33 | # PROP BASE Use_MFC 0 34 | # PROP BASE Use_Debug_Libraries 0 35 | # PROP BASE Output_Dir "Release" 36 | # PROP BASE Intermediate_Dir "Release" 37 | # PROP BASE Target_Dir "" 38 | # PROP Use_MFC 0 39 | # PROP Use_Debug_Libraries 0 40 | # PROP Output_Dir "Release" 41 | # PROP Intermediate_Dir "Release" 42 | # PROP Target_Dir "" 43 | # ADD BASE CPP /nologo /W3 /GX /O2 /D "W32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /YX /FD /c 44 | # ADD CPP /nologo /W3 /GX /O2 /D "W32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /YX /FD /c 45 | # ADD BASE RSC /l 0x40e /d "NDEBUG" 46 | # ADD RSC /l 0x40e /d "NDEBUG" 47 | BSC32=bscmake.exe 48 | # ADD BASE BSC32 /nologo 49 | # ADD BSC32 /nologo 50 | LIB32=link.exe -lib 51 | # ADD BASE LIB32 /nologo 52 | # ADD LIB32 /nologo 53 | 54 | !ELSEIF "$(CFG)" == "hunspell - Win32 Debug" 55 | 56 | # PROP BASE Use_MFC 0 57 | # PROP BASE Use_Debug_Libraries 1 58 | # PROP BASE Output_Dir "Debug" 59 | # PROP BASE Intermediate_Dir "Debug" 60 | # PROP BASE Target_Dir "" 61 | # PROP Use_MFC 0 62 | # PROP Use_Debug_Libraries 1 63 | # PROP Output_Dir "Debug" 64 | # PROP Intermediate_Dir "Debug" 65 | # PROP Target_Dir "" 66 | # ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "W32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /YX /FD /GZ /c 67 | # ADD CPP /nologo /W3 /Gm /GX /ZI /Od /D "W32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /YX /FD /GZ /c 68 | # ADD BASE RSC /l 0x40e /d "_DEBUG" 69 | # ADD RSC /l 0x40e /d "_DEBUG" 70 | BSC32=bscmake.exe 71 | # ADD BASE BSC32 /nologo 72 | # ADD BSC32 /nologo 73 | LIB32=link.exe -lib 74 | # ADD BASE LIB32 /nologo 75 | # ADD LIB32 /nologo 76 | 77 | !ENDIF 78 | 79 | # Begin Target 80 | 81 | # Name "hunspell - Win32 Release" 82 | # Name "hunspell - Win32 Debug" 83 | # Begin Group "Source Files" 84 | 85 | # PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat" 86 | # Begin Source File 87 | 88 | SOURCE=.\affentry.cxx 89 | # End Source File 90 | # Begin Source File 91 | 92 | SOURCE=.\affixmgr.cxx 93 | # End Source File 94 | # Begin Source File 95 | 96 | SOURCE=.\csutil.cxx 97 | # End Source File 98 | # Begin Source File 99 | 100 | SOURCE=.\dictmgr.cxx 101 | # End Source File 102 | # Begin Source File 103 | 104 | SOURCE=.\hashmgr.cxx 105 | # End Source File 106 | # Begin Source File 107 | 108 | SOURCE=.\hunspell.cxx 109 | # End Source File 110 | # Begin Source File 111 | 112 | SOURCE=.\suggestmgr.cxx 113 | # End Source File 114 | # End Group 115 | # Begin Group "Header Files" 116 | 117 | # PROP Default_Filter "h;hpp;hxx;hm;inl" 118 | # Begin Source File 119 | 120 | SOURCE=.\affentry.hxx 121 | # End Source File 122 | # Begin Source File 123 | 124 | SOURCE=.\affixmgr.hxx 125 | # End Source File 126 | # Begin Source File 127 | 128 | SOURCE=.\atypes.hxx 129 | # End Source File 130 | # Begin Source File 131 | 132 | SOURCE=.\baseaffix.hxx 133 | # End Source File 134 | # Begin Source File 135 | 136 | SOURCE=.\csutil.hxx 137 | # End Source File 138 | # Begin Source File 139 | 140 | SOURCE=.\dictmgr.hxx 141 | # End Source File 142 | # Begin Source File 143 | 144 | SOURCE=.\hashmgr.hxx 145 | # End Source File 146 | # Begin Source File 147 | 148 | SOURCE=.\htypes.hxx 149 | # End Source File 150 | # Begin Source File 151 | 152 | SOURCE=.\istrmgr.hxx 153 | # End Source File 154 | # Begin Source File 155 | 156 | SOURCE=.\langnum.hxx 157 | # End Source File 158 | # Begin Source File 159 | 160 | SOURCE=.\hunspell.hxx 161 | # End Source File 162 | # Begin Source File 163 | 164 | SOURCE=.\suggestmgr.hxx 165 | # End Source File 166 | # End Group 167 | # End Target 168 | # End Project 169 | -------------------------------------------------------------------------------- /hunspell/src/hunspell/affentry.hxx: -------------------------------------------------------------------------------- 1 | #ifndef _AFFIX_HXX_ 2 | #define _AFFIX_HXX_ 3 | 4 | #include "hunvisapi.h" 5 | 6 | #include "atypes.hxx" 7 | #include "baseaffix.hxx" 8 | #include "affixmgr.hxx" 9 | 10 | /* A Prefix Entry */ 11 | 12 | class LIBHUNSPELL_DLL_EXPORTED PfxEntry : protected AffEntry 13 | { 14 | AffixMgr* pmyMgr; 15 | 16 | PfxEntry * next; 17 | PfxEntry * nexteq; 18 | PfxEntry * nextne; 19 | PfxEntry * flgnxt; 20 | 21 | public: 22 | 23 | PfxEntry(AffixMgr* pmgr, affentry* dp ); 24 | ~PfxEntry(); 25 | 26 | inline bool allowCross() { return ((opts & aeXPRODUCT) != 0); } 27 | struct hentry * checkword(const char * word, int len, char in_compound, 28 | const FLAG needflag = FLAG_NULL); 29 | 30 | struct hentry * check_twosfx(const char * word, int len, char in_compound, const FLAG needflag = 0); 31 | 32 | char * check_morph(const char * word, int len, char in_compound, 33 | const FLAG needflag = FLAG_NULL); 34 | 35 | char * check_twosfx_morph(const char * word, int len, 36 | char in_compound, const FLAG needflag = FLAG_NULL); 37 | 38 | inline FLAG getFlag() { return aflag; } 39 | inline const char * getKey() { return appnd; } 40 | char * add(const char * word, int len); 41 | 42 | inline short getKeyLen() { return appndl; } 43 | 44 | inline const char * getMorph() { return morphcode; } 45 | 46 | inline const unsigned short * getCont() { return contclass; } 47 | inline short getContLen() { return contclasslen; } 48 | 49 | inline PfxEntry * getNext() { return next; } 50 | inline PfxEntry * getNextNE() { return nextne; } 51 | inline PfxEntry * getNextEQ() { return nexteq; } 52 | inline PfxEntry * getFlgNxt() { return flgnxt; } 53 | 54 | inline void setNext(PfxEntry * ptr) { next = ptr; } 55 | inline void setNextNE(PfxEntry * ptr) { nextne = ptr; } 56 | inline void setNextEQ(PfxEntry * ptr) { nexteq = ptr; } 57 | inline void setFlgNxt(PfxEntry * ptr) { flgnxt = ptr; } 58 | 59 | inline char * nextchar(char * p); 60 | inline int test_condition(const char * st); 61 | }; 62 | 63 | 64 | 65 | 66 | /* A Suffix Entry */ 67 | 68 | class LIBHUNSPELL_DLL_EXPORTED SfxEntry : protected AffEntry 69 | { 70 | AffixMgr* pmyMgr; 71 | char * rappnd; 72 | 73 | SfxEntry * next; 74 | SfxEntry * nexteq; 75 | SfxEntry * nextne; 76 | SfxEntry * flgnxt; 77 | 78 | SfxEntry * l_morph; 79 | SfxEntry * r_morph; 80 | SfxEntry * eq_morph; 81 | 82 | public: 83 | 84 | SfxEntry(AffixMgr* pmgr, affentry* dp ); 85 | ~SfxEntry(); 86 | 87 | inline bool allowCross() { return ((opts & aeXPRODUCT) != 0); } 88 | struct hentry * checkword(const char * word, int len, int optflags, 89 | PfxEntry* ppfx, char ** wlst, int maxSug, int * ns, 90 | // const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, char in_compound=IN_CPD_NOT); 91 | const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, const FLAG badflag = 0); 92 | 93 | struct hentry * check_twosfx(const char * word, int len, int optflags, PfxEntry* ppfx, const FLAG needflag = 0); 94 | 95 | char * check_twosfx_morph(const char * word, int len, int optflags, 96 | PfxEntry* ppfx, const FLAG needflag = FLAG_NULL); 97 | struct hentry * get_next_homonym(struct hentry * he); 98 | struct hentry * get_next_homonym(struct hentry * word, int optflags, PfxEntry* ppfx, 99 | const FLAG cclass, const FLAG needflag); 100 | 101 | 102 | inline FLAG getFlag() { return aflag; } 103 | inline const char * getKey() { return rappnd; } 104 | char * add(const char * word, int len); 105 | 106 | 107 | inline const char * getMorph() { return morphcode; } 108 | 109 | inline const unsigned short * getCont() { return contclass; } 110 | inline short getContLen() { return contclasslen; } 111 | inline const char * getAffix() { return appnd; } 112 | 113 | inline short getKeyLen() { return appndl; } 114 | 115 | inline SfxEntry * getNext() { return next; } 116 | inline SfxEntry * getNextNE() { return nextne; } 117 | inline SfxEntry * getNextEQ() { return nexteq; } 118 | 119 | inline SfxEntry * getLM() { return l_morph; } 120 | inline SfxEntry * getRM() { return r_morph; } 121 | inline SfxEntry * getEQM() { return eq_morph; } 122 | inline SfxEntry * getFlgNxt() { return flgnxt; } 123 | 124 | inline void setNext(SfxEntry * ptr) { next = ptr; } 125 | inline void setNextNE(SfxEntry * ptr) { nextne = ptr; } 126 | inline void setNextEQ(SfxEntry * ptr) { nexteq = ptr; } 127 | inline void setFlgNxt(SfxEntry * ptr) { flgnxt = ptr; } 128 | 129 | inline char * nextchar(char * p); 130 | inline int test_condition(const char * st, const char * begin); 131 | 132 | }; 133 | 134 | #endif 135 | 136 | 137 | -------------------------------------------------------------------------------- /hunspell/src/hunspell/dictmgr.cxx: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "dictmgr.hxx" 8 | 9 | DictMgr::DictMgr(const char * dictpath, const char * etype) : numdict(0) 10 | { 11 | // load list of etype entries 12 | pdentry = (dictentry *)malloc(MAXDICTIONARIES*sizeof(struct dictentry)); 13 | if (pdentry) { 14 | if (parse_file(dictpath, etype)) { 15 | numdict = 0; 16 | // no dictionary.lst found is okay 17 | } 18 | } 19 | } 20 | 21 | 22 | DictMgr::~DictMgr() 23 | { 24 | dictentry * pdict = NULL; 25 | if (pdentry) { 26 | pdict = pdentry; 27 | for (int i=0;ilang) { 29 | free(pdict->lang); 30 | pdict->lang = NULL; 31 | } 32 | if (pdict->region) { 33 | free(pdict->region); 34 | pdict->region=NULL; 35 | } 36 | if (pdict->filename) { 37 | free(pdict->filename); 38 | pdict->filename = NULL; 39 | } 40 | pdict++; 41 | } 42 | free(pdentry); 43 | pdentry = NULL; 44 | pdict = NULL; 45 | } 46 | numdict = 0; 47 | } 48 | 49 | 50 | // read in list of etype entries and build up structure to describe them 51 | int DictMgr::parse_file(const char * dictpath, const char * etype) 52 | { 53 | 54 | int i; 55 | char line[MAXDICTENTRYLEN+1]; 56 | dictentry * pdict = pdentry; 57 | 58 | // open the dictionary list file 59 | FILE * dictlst; 60 | dictlst = fopen(dictpath,"r"); 61 | if (!dictlst) { 62 | return 1; 63 | } 64 | 65 | // step one is to parse the dictionary list building up the 66 | // descriptive structures 67 | 68 | // read in each line ignoring any that dont start with etype 69 | while (fgets(line,MAXDICTENTRYLEN,dictlst)) { 70 | mychomp(line); 71 | 72 | /* parse in a dictionary entry */ 73 | if (strncmp(line,etype,4) == 0) { 74 | if (numdict < MAXDICTIONARIES) { 75 | char * tp = line; 76 | char * piece; 77 | i = 0; 78 | while ((piece=mystrsep(&tp,' '))) { 79 | if (*piece != '\0') { 80 | switch(i) { 81 | case 0: break; 82 | case 1: pdict->lang = mystrdup(piece); break; 83 | case 2: if (strcmp (piece, "ANY") == 0) 84 | pdict->region = mystrdup(""); 85 | else 86 | pdict->region = mystrdup(piece); 87 | break; 88 | case 3: pdict->filename = mystrdup(piece); break; 89 | default: break; 90 | } 91 | i++; 92 | } 93 | free(piece); 94 | } 95 | if (i == 4) { 96 | numdict++; 97 | pdict++; 98 | } else { 99 | switch (i) { 100 | case 3: 101 | free(pdict->region); 102 | pdict->region=NULL; 103 | case 2: //deliberate fallthrough 104 | free(pdict->lang); 105 | pdict->lang=NULL; 106 | default: 107 | break; 108 | } 109 | fprintf(stderr,"dictionary list corruption in line \"%s\"\n",line); 110 | fflush(stderr); 111 | } 112 | } 113 | } 114 | } 115 | fclose(dictlst); 116 | return 0; 117 | } 118 | 119 | // return text encoding of dictionary 120 | int DictMgr::get_list(dictentry ** ppentry) 121 | { 122 | *ppentry = pdentry; 123 | return numdict; 124 | } 125 | 126 | 127 | 128 | // strip strings into token based on single char delimiter 129 | // acts like strsep() but only uses a delim char and not 130 | // a delim string 131 | 132 | char * DictMgr::mystrsep(char ** stringp, const char delim) 133 | { 134 | char * rv = NULL; 135 | char * mp = *stringp; 136 | size_t n = strlen(mp); 137 | if (n > 0) { 138 | char * dp = (char *)memchr(mp,(int)((unsigned char)delim),n); 139 | if (dp) { 140 | *stringp = dp+1; 141 | size_t nc = dp - mp; 142 | rv = (char *) malloc(nc+1); 143 | if (rv) { 144 | memcpy(rv,mp,nc); 145 | *(rv+nc) = '\0'; 146 | } 147 | } else { 148 | rv = (char *) malloc(n+1); 149 | if (rv) { 150 | memcpy(rv, mp, n); 151 | *(rv+n) = '\0'; 152 | *stringp = mp + n; 153 | } 154 | } 155 | } 156 | return rv; 157 | } 158 | 159 | 160 | // replaces strdup with ansi version 161 | char * DictMgr::mystrdup(const char * s) 162 | { 163 | char * d = NULL; 164 | if (s) { 165 | int sl = strlen(s)+1; 166 | d = (char *) malloc(sl); 167 | if (d) memcpy(d,s,sl); 168 | } 169 | return d; 170 | } 171 | 172 | 173 | // remove cross-platform text line end characters 174 | void DictMgr:: mychomp(char * s) 175 | { 176 | int k = strlen(s); 177 | if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0'; 178 | if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0'; 179 | } 180 | 181 | -------------------------------------------------------------------------------- /hunspell/README.hunspell: -------------------------------------------------------------------------------- 1 | About Hunspell 2 | -------------- 3 | 4 | Hunspell is a spell checker and morphological analyzer library and program 5 | designed for languages with rich morphology and complex word compounding or 6 | character encoding. Hunspell interfaces: Ispell-like terminal interface 7 | using Curses library, Ispell pipe interface, OpenOffice.org UNO module. 8 | 9 | Hunspell's code base comes from the OpenOffice.org MySpell 10 | (http://lingucomponent.openoffice.org/MySpell-3.zip). See README.MYSPELL, 11 | AUTHORS.MYSPELL and license.myspell files. 12 | Hunspell is designed to eventually replace Myspell in OpenOffice.org. 13 | 14 | Main features of Hunspell spell checker and morphological analyzer: 15 | 16 | - Unicode support (affix rules work only with the first 65535 Unicode characters) 17 | 18 | - Morphological analysis (in custom item and arrangement style) and stemming 19 | 20 | - Max. 65535 affix classes and twofold affix stripping (for agglutinative 21 | languages, like Azeri, Basque, Estonian, Finnish, Hungarian, Turkish, etc.) 22 | 23 | - Support complex compoundings (for example, Hungarian and German) 24 | 25 | - Support language specific features (for example, special casing of 26 | Azeri and Turkish dotted i, or German sharp s) 27 | 28 | - Handle conditional affixes, circumfixes, fogemorphemes, 29 | forbidden words, pseudoroots and homonyms. 30 | 31 | - Free software (LGPL, GPL, MPL tri-license) 32 | 33 | Compiling on Unix/Linux 34 | ----------------------- 35 | 36 | ./configure 37 | make 38 | make install 39 | 40 | For dictionary development, use the --with-warnings option of configure. 41 | 42 | For interactive user interface of Hunspell executable, use the --with-ui option. 43 | 44 | The developer packages you need to compile Hunspell's interface: 45 | 46 | glibc-devel 47 | 48 | optional developer packages: 49 | 50 | ncurses (need for --with-ui) 51 | readline (for fancy input line editing, 52 | configure parameter: --with-readline) 53 | locale and gettext (but you can also use the 54 | --with-included-gettext configure parameter) 55 | 56 | Hunspell distribution uses new Autoconf (2.59) and Automake (1.9). 57 | 58 | Compiling on Windows 59 | -------------------- 60 | 61 | 1. Compiling with Windows SDK 62 | 63 | Download the free Windows SDK of Microsoft, open a command prompt 64 | window and cd into hunspell/src/win_api. Use the following command 65 | to compile hunspell: 66 | 67 | vcbuild 68 | 69 | 2. Compiling in Cygwin environment 70 | 71 | Download and install Cygwin environment for Windows with the following 72 | extra packages: 73 | 74 | make 75 | gcc-g++ development package 76 | mingw development package (for cygwin.dll free native Windows compilation) 77 | ncurses, readline (for user interface) 78 | iconv (character conversion) 79 | 80 | 2.1. Cygwin1.dll dependent compiling 81 | 82 | Open a Cygwin shell, cd into the hunspell root directory: 83 | 84 | ./configure 85 | make 86 | make install 87 | 88 | For dictionary development, use the --with-warnings option of configure. 89 | 90 | For interactive user interface of Hunspell executable, use the --with-ui option. 91 | 92 | readline configure parameter: --with-readline (for fancy input line editing) 93 | 94 | 1.2. Cygwin1.dll free compiling 95 | 96 | Open a Cygwin shell, cd into the hunspell/src/win_api and 97 | 98 | make -f Makefile.cygwin 99 | 100 | Testing 101 | ------- 102 | 103 | Testing Hunspell (see tests in tests/ subdirectory): 104 | 105 | make check 106 | 107 | or with Valgrind debugger: 108 | 109 | make check 110 | VALGRIND=[Valgrind_tool] make check 111 | 112 | For example: 113 | 114 | make check 115 | VALGRIND=memcheck make check 116 | 117 | Documentation 118 | ------------- 119 | 120 | features and dictionary format: 121 | man 4 hunspell 122 | 123 | man hunspell 124 | hunspell -h 125 | http://hunspell.sourceforge.net 126 | 127 | Usage 128 | ----- 129 | 130 | The src/tools dictionary contains ten executables after compiling 131 | (or some of them are in the src/win_api): 132 | 133 | affixcompress: dictionary generation from large (millions of words) vocabularies 134 | analyze: example of spell checking, stemming and morphological analysis 135 | chmorph: example of automatic morphological generation and conversion 136 | example: example of spell checking and suggestion 137 | hunspell: main program for spell checking and others (see manual) 138 | hunzip: decompressor of hzip format 139 | hzip: compressor of hzip format 140 | makealias: alias compression (Hunspell only, not back compatible with MySpell) 141 | munch: dictionary generation from vocabularies (it needs an affix file, too). 142 | unmunch: list all recognized words of a MySpell dictionary 143 | wordforms: word generation (Hunspell version of unmunch) 144 | 145 | After compiling and installing (see INSTALL) you can 146 | run the Hunspell spell checker (compiled with user interface) 147 | with a Hunspell or Myspell dictionary: 148 | 149 | hunspell -d en_US text.txt 150 | 151 | or without interface: 152 | 153 | hunspell 154 | hunspell -d en_UK -l 164 | 165 | Linking with Hunspell static library: 166 | g++ -lhunspell example.cxx 167 | 168 | Dictionaries 169 | ------------ 170 | 171 | Myspell & Hunspell dictionaries: 172 | http://wiki.services.openoffice.org/wiki/Dictionaries 173 | 174 | Aspell dictionaries (need some conversion): 175 | ftp://ftp.gnu.org/gnu/aspell/dict 176 | Conversion steps: see relevant feature request at http://hunspell.sf.net. 177 | 178 | László Németh 179 | nemeth at OOo 180 | -------------------------------------------------------------------------------- /hunspell/src/hunspell/hunspell.hxx: -------------------------------------------------------------------------------- 1 | #include "hunvisapi.h" 2 | 3 | #include "hashmgr.hxx" 4 | #include "affixmgr.hxx" 5 | #include "suggestmgr.hxx" 6 | #include "langnum.hxx" 7 | 8 | #define SPELL_XML "" 9 | 10 | #define MAXDIC 20 11 | #define MAXSUGGESTION 15 12 | #define MAXSHARPS 5 13 | 14 | #define HUNSPELL_OK (1 << 0) 15 | #define HUNSPELL_OK_WARN (1 << 1) 16 | 17 | #ifndef _MYSPELLMGR_HXX_ 18 | #define _MYSPELLMGR_HXX_ 19 | 20 | class LIBHUNSPELL_DLL_EXPORTED Hunspell 21 | { 22 | AffixMgr* pAMgr; 23 | HashMgr* pHMgr[MAXDIC]; 24 | int maxdic; 25 | SuggestMgr* pSMgr; 26 | bool isnotpath; 27 | char * affixpath; 28 | char * encoding; 29 | struct cs_info * csconv; 30 | int langnum; 31 | int utf8; 32 | int complexprefixes; 33 | char** wordbreak; 34 | 35 | public: 36 | 37 | /* Hunspell(aff, dic) - constructor of Hunspell class 38 | * input: path of affix file and dictionary file 39 | */ 40 | 41 | Hunspell(const char * affpath, const char * dpath, const char * key = NULL, bool notpath = false); 42 | Hunspell(const char * affpath, const char * dpath, bool notpath = false); 43 | ~Hunspell(); 44 | 45 | /* load extra dictionaries (only dic files) */ 46 | int add_dic(const char * dpath, const char * key = NULL); 47 | 48 | /* spell(word) - spellcheck word 49 | * output: 0 = bad word, not 0 = good word 50 | * 51 | * plus output: 52 | * info: information bit array, fields: 53 | * SPELL_COMPOUND = a compound word 54 | * SPELL_FORBIDDEN = an explicit forbidden word 55 | * root: root (stem), when input is a word with affix(es) 56 | */ 57 | 58 | int spell(const char * word, int * info = NULL, char ** root = NULL); 59 | 60 | /* suggest(suggestions, word) - search suggestions 61 | * input: pointer to an array of strings pointer and the (bad) word 62 | * array of strings pointer (here *slst) may not be initialized 63 | * output: number of suggestions in string array, and suggestions in 64 | * a newly allocated array of strings (*slts will be NULL when number 65 | * of suggestion equals 0.) 66 | */ 67 | 68 | int suggest(char*** slst, const char * word); 69 | 70 | /* deallocate suggestion lists */ 71 | 72 | void free_list(char *** slst, int n); 73 | 74 | char * get_dic_encoding(); 75 | 76 | /* morphological functions */ 77 | 78 | /* analyze(result, word) - morphological analysis of the word */ 79 | 80 | int analyze(char*** slst, const char * word); 81 | 82 | /* stem(result, word) - stemmer function */ 83 | 84 | int stem(char*** slst, const char * word); 85 | 86 | /* stem(result, analysis, n) - get stems from a morph. analysis 87 | * example: 88 | * char ** result, result2; 89 | * int n1 = analyze(&result, "words"); 90 | * int n2 = stem(&result2, result, n1); 91 | */ 92 | 93 | int stem(char*** slst, char ** morph, int n); 94 | 95 | /* generate(result, word, word2) - morphological generation by example(s) */ 96 | 97 | int generate(char*** slst, const char * word, const char * word2); 98 | 99 | /* generate(result, word, desc, n) - generation by morph. description(s) 100 | * example: 101 | * char ** result; 102 | * char * affix = "is:plural"; // description depends from dictionaries, too 103 | * int n = generate(&result, "word", &affix, 1); 104 | * for (int i = 0; i < n; i++) printf("%s\n", result[i]); 105 | */ 106 | 107 | int generate(char*** slst, const char * word, char ** desc, int n); 108 | 109 | /* functions for run-time modification of the dictionary */ 110 | 111 | /* add word to the run-time dictionary */ 112 | 113 | int add(const char * word); 114 | 115 | /* add word to the run-time dictionary with affix flags of 116 | * the example (a dictionary word): Hunspell will recognize 117 | * affixed forms of the new word, too. 118 | */ 119 | 120 | int add_with_affix(const char * word, const char * example); 121 | 122 | /* remove word from the run-time dictionary */ 123 | 124 | int remove(const char * word); 125 | 126 | /* other */ 127 | 128 | /* get extra word characters definied in affix file for tokenization */ 129 | const char * get_wordchars(); 130 | unsigned short * get_wordchars_utf16(int * len); 131 | 132 | struct cs_info * get_csconv(); 133 | const char * get_version(); 134 | 135 | int get_langnum() const; 136 | 137 | /* experimental and deprecated functions */ 138 | 139 | #ifdef HUNSPELL_EXPERIMENTAL 140 | /* suffix is an affix flag string, similarly in dictionary files */ 141 | int put_word_suffix(const char * word, const char * suffix); 142 | char * morph_with_correction(const char * word); 143 | 144 | /* spec. suggestions */ 145 | int suggest_auto(char*** slst, const char * word); 146 | int suggest_pos_stems(char*** slst, const char * word); 147 | #endif 148 | 149 | private: 150 | void Init(const char * affpath, const char * dpath, const char * key, bool notpath); 151 | int cleanword(char *, const char *, int * pcaptype, int * pabbrev); 152 | int cleanword2(char *, const char *, w_char *, int * w_len, int * pcaptype, int * pabbrev); 153 | void mkinitcap(char *); 154 | int mkinitcap2(char * p, w_char * u, int nc); 155 | int mkinitsmall2(char * p, w_char * u, int nc); 156 | void mkallcap(char *); 157 | int mkallcap2(char * p, w_char * u, int nc); 158 | void mkallsmall(char *); 159 | int mkallsmall2(char * p, w_char * u, int nc); 160 | struct hentry * checkword(const char *, int * info, char **root); 161 | char * sharps_u8_l1(char * dest, char * source); 162 | hentry * spellsharps(char * base, char *, int, int, char * tmp, int * info, char **root); 163 | int is_keepcase(const hentry * rv); 164 | int insert_sug(char ***slst, char * word, int ns); 165 | void cat_result(char * result, char * st); 166 | char * stem_description(const char * desc); 167 | int spellml(char*** slst, const char * word); 168 | int get_xml_par(char * dest, const char * par, int maxl); 169 | const char * get_xml_pos(const char * s, const char * attr); 170 | int get_xml_list(char ***slst, char * list, const char * tag); 171 | int check_xml_par(const char * q, const char * attr, const char * value); 172 | 173 | }; 174 | 175 | #endif 176 | -------------------------------------------------------------------------------- /hunspell/src/hunspell/hunzip.cxx: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "hunzip.hxx" 6 | 7 | #define CODELEN 65536 8 | #define BASEBITREC 5000 9 | 10 | #define UNCOMPRESSED '\002' 11 | #define MAGIC "hz0" 12 | #define MAGIC_ENCRYPT "hz1" 13 | #define MAGICLEN (sizeof(MAGIC) - 1) 14 | 15 | int Hunzip::fail(const char * err, const char * par) { 16 | fprintf(stderr, err, par); 17 | return -1; 18 | } 19 | 20 | Hunzip::Hunzip(const char * file, const char * key) { 21 | bufsiz = 0; 22 | lastbit = 0; 23 | inc = 0; 24 | outc = 0; 25 | dec = NULL; 26 | fin = NULL; 27 | filename = (char *) malloc(strlen(file) + 1); 28 | if (filename) strcpy(filename, file); 29 | if (getcode(key) == -1) bufsiz = -1; 30 | else bufsiz = getbuf(); 31 | } 32 | 33 | int Hunzip::getcode(const char * key) { 34 | unsigned char c[2]; 35 | int i, j, n, p; 36 | int allocatedbit = BASEBITREC; 37 | const char * enc = key; 38 | 39 | if (!filename) return -1; 40 | 41 | fin = fopen(filename, "rb"); 42 | if (!fin) return -1; 43 | 44 | // read magic number 45 | if ((fread(in, 1, 3, fin) < MAGICLEN) 46 | || !(strncmp(MAGIC, in, MAGICLEN) == 0 || 47 | strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0)) { 48 | return fail(MSG_FORMAT, filename); 49 | } 50 | 51 | // check encryption 52 | if (strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0) { 53 | unsigned char cs; 54 | if (!key) return fail(MSG_KEY, filename); 55 | if (fread(&c, 1, 1, fin) < 1) return fail(MSG_FORMAT, filename); 56 | for (cs = 0; *enc; enc++) cs ^= *enc; 57 | if (cs != c[0]) return fail(MSG_KEY, filename); 58 | enc = key; 59 | } else key = NULL; 60 | 61 | // read record count 62 | if (fread(&c, 1, 2, fin) < 2) return fail(MSG_FORMAT, filename); 63 | 64 | if (key) { 65 | c[0] ^= *enc; 66 | if (*(++enc) == '\0') enc = key; 67 | c[1] ^= *enc; 68 | } 69 | 70 | n = ((int) c[0] << 8) + c[1]; 71 | dec = (struct bit *) malloc(BASEBITREC * sizeof(struct bit)); 72 | if (!dec) return fail(MSG_MEMORY, filename); 73 | dec[0].v[0] = 0; 74 | dec[0].v[1] = 0; 75 | 76 | // read codes 77 | for (i = 0; i < n; i++) { 78 | unsigned char l; 79 | if (fread(c, 1, 2, fin) < 2) return fail(MSG_FORMAT, filename); 80 | if (key) { 81 | if (*(++enc) == '\0') enc = key; 82 | c[0] ^= *enc; 83 | if (*(++enc) == '\0') enc = key; 84 | c[1] ^= *enc; 85 | } 86 | if (fread(&l, 1, 1, fin) < 1) return fail(MSG_FORMAT, filename); 87 | if (key) { 88 | if (*(++enc) == '\0') enc = key; 89 | l ^= *enc; 90 | } 91 | if (fread(in, 1, l/8+1, fin) < (size_t) l/8+1) return fail(MSG_FORMAT, filename); 92 | if (key) for (j = 0; j <= l/8; j++) { 93 | if (*(++enc) == '\0') enc = key; 94 | in[j] ^= *enc; 95 | } 96 | p = 0; 97 | for (j = 0; j < l; j++) { 98 | int b = (in[j/8] & (1 << (7 - (j % 8)))) ? 1 : 0; 99 | int oldp = p; 100 | p = dec[p].v[b]; 101 | if (p == 0) { 102 | lastbit++; 103 | if (lastbit == allocatedbit) { 104 | allocatedbit += BASEBITREC; 105 | dec = (struct bit *) realloc(dec, allocatedbit * sizeof(struct bit)); 106 | } 107 | dec[lastbit].v[0] = 0; 108 | dec[lastbit].v[1] = 0; 109 | dec[oldp].v[b] = lastbit; 110 | p = lastbit; 111 | } 112 | } 113 | dec[p].c[0] = c[0]; 114 | dec[p].c[1] = c[1]; 115 | } 116 | return 0; 117 | } 118 | 119 | Hunzip::~Hunzip() 120 | { 121 | if (dec) free(dec); 122 | if (fin) fclose(fin); 123 | if (filename) free(filename); 124 | } 125 | 126 | int Hunzip::getbuf() { 127 | int p = 0; 128 | int o = 0; 129 | do { 130 | if (inc == 0) inbits = fread(in, 1, BUFSIZE, fin) * 8; 131 | for (; inc < inbits; inc++) { 132 | int b = (in[inc / 8] & (1 << (7 - (inc % 8)))) ? 1 : 0; 133 | int oldp = p; 134 | p = dec[p].v[b]; 135 | if (p == 0) { 136 | if (oldp == lastbit) { 137 | fclose(fin); 138 | fin = NULL; 139 | // add last odd byte 140 | if (dec[lastbit].c[0]) out[o++] = dec[lastbit].c[1]; 141 | return o; 142 | } 143 | out[o++] = dec[oldp].c[0]; 144 | out[o++] = dec[oldp].c[1]; 145 | if (o == BUFSIZE) return o; 146 | p = dec[p].v[b]; 147 | } 148 | } 149 | inc = 0; 150 | } while (inbits == BUFSIZE * 8); 151 | return fail(MSG_FORMAT, filename); 152 | } 153 | 154 | const char * Hunzip::getline() { 155 | char linebuf[BUFSIZE]; 156 | int l = 0, eol = 0, left = 0, right = 0; 157 | if (bufsiz == -1) return NULL; 158 | while (l < bufsiz && !eol) { 159 | linebuf[l++] = out[outc]; 160 | switch (out[outc]) { 161 | case '\t': break; 162 | case 31: { // escape 163 | if (++outc == bufsiz) { 164 | bufsiz = getbuf(); 165 | outc = 0; 166 | } 167 | linebuf[l - 1] = out[outc]; 168 | break; 169 | } 170 | case ' ': break; 171 | default: if (((unsigned char) out[outc]) < 47) { 172 | if (out[outc] > 32) { 173 | right = out[outc] - 31; 174 | if (++outc == bufsiz) { 175 | bufsiz = getbuf(); 176 | outc = 0; 177 | } 178 | } 179 | if (out[outc] == 30) left = 9; else left = out[outc]; 180 | linebuf[l-1] = '\n'; 181 | eol = 1; 182 | } 183 | } 184 | if (++outc == bufsiz) { 185 | outc = 0; 186 | bufsiz = fin ? getbuf(): -1; 187 | } 188 | } 189 | if (right) strcpy(linebuf + l - 1, line + strlen(line) - right - 1); 190 | else linebuf[l] = '\0'; 191 | strcpy(line + left, linebuf); 192 | return line; 193 | } 194 | -------------------------------------------------------------------------------- /hunspell/src/win_api/config.h: -------------------------------------------------------------------------------- 1 | /* config.h.in. Generated from configure.ac by autoheader. */ 2 | 3 | /* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP 4 | systems. This function is required for `alloca.c' support on those systems. 5 | */ 6 | #define CRAY_STACKSEG_END 1 7 | 8 | /* Define to 1 if using `alloca.c'. */ 9 | #define C_ALLOCA 1 10 | 11 | /* Define to 1 if translation of program messages to the user's native 12 | language is requested. */ 13 | #undef ENABLE_NLS 14 | 15 | /* Define to 1 if you have `alloca', as a function or macro. */ 16 | #define HAVE_ALLOCA 1 17 | 18 | /* Define to 1 if you have and it should be used (not on Ultrix). 19 | */ 20 | #define HAVE_ALLOCA_H 1 21 | 22 | /* Define to 1 if you have the header file. */ 23 | #define HAVE_ARGZ_H 1 24 | 25 | /* "Define if you have the header" */ 26 | #undef HAVE_CURSES_H 27 | 28 | /* Define if the GNU dcgettext() function is already present or preinstalled. 29 | */ 30 | #define HAVE_DCGETTEXT 1 31 | 32 | /* Define to 1 if you have the header file. */ 33 | #define HAVE_DLFCN_H 1 34 | 35 | /* Define to 1 if you have the header file. */ 36 | #define HAVE_ERROR_H 1 37 | 38 | /* Define to 1 if you have the header file. */ 39 | #define HAVE_FCNTL_H 1 40 | 41 | /* Define to 1 if you have the `feof_unlocked' function. */ 42 | #define HAVE_FEOF_UNLOCKED 1 43 | 44 | /* Define to 1 if you have the `fgets_unlocked' function. */ 45 | #define HAVE_FGETS_UNLOCKED 1 46 | 47 | /* Define to 1 if you have the `getcwd' function. */ 48 | #define HAVE_GETCWD 1 49 | 50 | /* Define to 1 if you have the `getc_unlocked' function. */ 51 | #define HAVE_GETC_UNLOCKED 1 52 | 53 | /* Define to 1 if you have the `getegid' function. */ 54 | #define HAVE_GETEGID 1 55 | 56 | /* Define to 1 if you have the `geteuid' function. */ 57 | #define HAVE_GETEUID 1 58 | 59 | /* Define to 1 if you have the `getgid' function. */ 60 | #define HAVE_GETGID 1 61 | 62 | /* Define to 1 if you have the `getpagesize' function. */ 63 | #define HAVE_GETPAGESIZE 1 64 | 65 | /* Define if the GNU gettext() function is already present or preinstalled. */ 66 | #define HAVE_GETTEXT 1 67 | 68 | /* Define to 1 if you have the `getuid' function. */ 69 | #define HAVE_GETUID 1 70 | 71 | /* Define if you have the iconv() function. */ 72 | #undef HAVE_ICONV 73 | 74 | /* Define to 1 if you have the header file. */ 75 | #define HAVE_INTTYPES_H 1 76 | 77 | /* Define if you have and nl_langinfo(CODESET). */ 78 | #define HAVE_LANGINFO_CODESET 1 79 | 80 | /* Define if your file defines LC_MESSAGES. */ 81 | #define HAVE_LC_MESSAGES 1 82 | 83 | /* Define to 1 if you have the header file. */ 84 | #define HAVE_LIBINTL_H 1 85 | 86 | /* Define to 1 if you have the header file. */ 87 | #define HAVE_LIMITS_H 1 88 | 89 | /* Define to 1 if you have the header file. */ 90 | #define HAVE_LOCALE_H 1 91 | 92 | /* Define to 1 if you have the `memchr' function. */ 93 | #define HAVE_MEMCHR 1 94 | 95 | /* Define to 1 if you have the header file. */ 96 | #define HAVE_MEMORY_H 1 97 | 98 | /* Define to 1 if you have the `mempcpy' function. */ 99 | #define HAVE_MEMPCPY 1 100 | 101 | /* Define to 1 if you have a working `mmap' system call. */ 102 | #define HAVE_MMAP 1 103 | 104 | /* Define to 1 if you have the `munmap' function. */ 105 | #define HAVE_MUNMAP 1 106 | 107 | /* "Define if you have the header" */ 108 | #define HAVE_NCURSESW_H 1 109 | 110 | /* Define to 1 if you have the header file. */ 111 | #define HAVE_NL_TYPES_H 1 112 | 113 | /* Define to 1 if you have the `putenv' function. */ 114 | #define HAVE_PUTENV 1 115 | 116 | /* "Define if you have fancy command input editing with Readline" */ 117 | #undef HAVE_READLINE 118 | 119 | /* Define to 1 if you have the `setenv' function. */ 120 | #define HAVE_SETENV 1 121 | 122 | /* Define to 1 if you have the `setlocale' function. */ 123 | #define HAVE_SETLOCALE 1 124 | 125 | /* Define to 1 if you have the header file. */ 126 | #define HAVE_STDDEF_H 1 127 | 128 | /* Define to 1 if you have the header file. */ 129 | #define HAVE_STDINT_H 1 130 | 131 | /* Define to 1 if you have the header file. */ 132 | #define HAVE_STDLIB_H 1 133 | 134 | /* Define to 1 if you have the `stpcpy' function. */ 135 | #define HAVE_STPCPY 1 136 | 137 | /* Define to 1 if you have the `strcasecmp' function. */ 138 | #define HAVE_STRCASECMP 1 139 | 140 | /* Define to 1 if you have the `strchr' function. */ 141 | #define HAVE_STRCHR 1 142 | 143 | /* Define to 1 if you have the `strdup' function. */ 144 | #define HAVE_STRDUP 1 145 | 146 | /* Define to 1 if you have the header file. */ 147 | #define HAVE_STRINGS_H 1 148 | 149 | /* Define to 1 if you have the header file. */ 150 | #define HAVE_STRING_H 1 151 | 152 | /* Define to 1 if you have the `strstr' function. */ 153 | #define HAVE_STRSTR 1 154 | 155 | /* Define to 1 if you have the `strtoul' function. */ 156 | #define HAVE_STRTOUL 1 157 | 158 | /* Define to 1 if you have the header file. */ 159 | #define HAVE_SYS_PARAM_H 1 160 | 161 | /* Define to 1 if you have the header file. */ 162 | #define HAVE_SYS_STAT_H 1 163 | 164 | /* Define to 1 if you have the header file. */ 165 | #define HAVE_SYS_TYPES_H 1 166 | 167 | /* Define to 1 if you have the `tsearch' function. */ 168 | #define HAVE_TSEARCH 1 169 | 170 | /* Define to 1 if you have the header file. */ 171 | #define HAVE_UNISTD_H 1 172 | 173 | /* Define to 1 if you have the `__argz_count' function. */ 174 | #define HAVE___ARGZ_COUNT 1 175 | 176 | /* Define to 1 if you have the `__argz_next' function. */ 177 | #define HAVE___ARGZ_NEXT 1 178 | 179 | /* Define to 1 if you have the `__argz_stringify' function. */ 180 | #define HAVE___ARGZ_STRINGIFY 1 181 | 182 | /* "Define if you use exterimental functions" */ 183 | #undef HUNSPELL_EXPERIMENTAL 184 | 185 | /* "Define if you need warning messages" */ 186 | #define HUNSPELL_WARNING_ON 187 | 188 | /* Define as const if the declaration of iconv() needs const. */ 189 | #define ICONV_CONST 1 190 | 191 | /* Name of package */ 192 | #define PACKAGE 193 | 194 | /* Define to the address where bug reports for this package should be sent. */ 195 | #define PACKAGE_BUGREPORT 196 | 197 | /* Define to the full name of this package. */ 198 | #define PACKAGE_NAME 199 | 200 | /* Define to the full name and version of this package. */ 201 | #define PACKAGE_STRING 202 | 203 | /* Define to the one symbol short name of this package. */ 204 | #define PACKAGE_TARNAME 205 | 206 | /* Define to the version of this package. */ 207 | #define PACKAGE_VERSION "1.3.3" 208 | #define VERSION "1.3.3" 209 | -------------------------------------------------------------------------------- /hunspell/src/hunspell/csutil.hxx: -------------------------------------------------------------------------------- 1 | #ifndef __CSUTILHXX__ 2 | #define __CSUTILHXX__ 3 | 4 | #include "hunvisapi.h" 5 | 6 | // First some base level utility routines 7 | 8 | #include 9 | #include "w_char.hxx" 10 | #include "htypes.hxx" 11 | 12 | #ifdef MOZILLA_CLIENT 13 | #include "nscore.h" // for mozalloc headers 14 | #endif 15 | 16 | // casing 17 | #define NOCAP 0 18 | #define INITCAP 1 19 | #define ALLCAP 2 20 | #define HUHCAP 3 21 | #define HUHINITCAP 4 22 | 23 | // default encoding and keystring 24 | #define SPELL_ENCODING "ISO8859-1" 25 | #define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm" 26 | 27 | // default morphological fields 28 | #define MORPH_STEM "st:" 29 | #define MORPH_ALLOMORPH "al:" 30 | #define MORPH_POS "po:" 31 | #define MORPH_DERI_PFX "dp:" 32 | #define MORPH_INFL_PFX "ip:" 33 | #define MORPH_TERM_PFX "tp:" 34 | #define MORPH_DERI_SFX "ds:" 35 | #define MORPH_INFL_SFX "is:" 36 | #define MORPH_TERM_SFX "ts:" 37 | #define MORPH_SURF_PFX "sp:" 38 | #define MORPH_FREQ "fr:" 39 | #define MORPH_PHON "ph:" 40 | #define MORPH_HYPH "hy:" 41 | #define MORPH_PART "pa:" 42 | #define MORPH_FLAG "fl:" 43 | #define MORPH_HENTRY "_H:" 44 | #define MORPH_TAG_LEN strlen(MORPH_STEM) 45 | 46 | #define MSEP_FLD ' ' 47 | #define MSEP_REC '\n' 48 | #define MSEP_ALT '\v' 49 | 50 | // default flags 51 | #define DEFAULTFLAGS 65510 52 | #define FORBIDDENWORD 65510 53 | #define ONLYUPCASEFLAG 65511 54 | 55 | // convert UTF-16 characters to UTF-8 56 | LIBHUNSPELL_DLL_EXPORTED char * u16_u8(char * dest, int size, const w_char * src, int srclen); 57 | 58 | // convert UTF-8 characters to UTF-16 59 | LIBHUNSPELL_DLL_EXPORTED int u8_u16(w_char * dest, int size, const char * src); 60 | 61 | // sort 2-byte vector 62 | LIBHUNSPELL_DLL_EXPORTED void flag_qsort(unsigned short flags[], int begin, int end); 63 | 64 | // binary search in 2-byte vector 65 | LIBHUNSPELL_DLL_EXPORTED int flag_bsearch(unsigned short flags[], unsigned short flag, int right); 66 | 67 | // remove end of line char(s) 68 | LIBHUNSPELL_DLL_EXPORTED void mychomp(char * s); 69 | 70 | // duplicate string 71 | LIBHUNSPELL_DLL_EXPORTED char * mystrdup(const char * s); 72 | 73 | // strcat for limited length destination string 74 | LIBHUNSPELL_DLL_EXPORTED char * mystrcat(char * dest, const char * st, int max); 75 | 76 | // duplicate reverse of string 77 | LIBHUNSPELL_DLL_EXPORTED char * myrevstrdup(const char * s); 78 | 79 | // parse into tokens with char delimiter 80 | LIBHUNSPELL_DLL_EXPORTED char * mystrsep(char ** sptr, const char delim); 81 | // parse into tokens with char delimiter 82 | LIBHUNSPELL_DLL_EXPORTED char * mystrsep2(char ** sptr, const char delim); 83 | 84 | // parse into tokens with char delimiter 85 | LIBHUNSPELL_DLL_EXPORTED char * mystrrep(char *, const char *, const char *); 86 | 87 | // append s to ends of every lines in text 88 | LIBHUNSPELL_DLL_EXPORTED void strlinecat(char * lines, const char * s); 89 | 90 | // tokenize into lines with new line 91 | LIBHUNSPELL_DLL_EXPORTED int line_tok(const char * text, char *** lines, char breakchar); 92 | 93 | // tokenize into lines with new line and uniq in place 94 | LIBHUNSPELL_DLL_EXPORTED char * line_uniq(char * text, char breakchar); 95 | LIBHUNSPELL_DLL_EXPORTED char * line_uniq_app(char ** text, char breakchar); 96 | 97 | // change oldchar to newchar in place 98 | LIBHUNSPELL_DLL_EXPORTED char * tr(char * text, char oldc, char newc); 99 | 100 | // reverse word 101 | LIBHUNSPELL_DLL_EXPORTED int reverseword(char *); 102 | 103 | // reverse word 104 | LIBHUNSPELL_DLL_EXPORTED int reverseword_utf(char *); 105 | 106 | // remove duplicates 107 | LIBHUNSPELL_DLL_EXPORTED int uniqlist(char ** list, int n); 108 | 109 | // free character array list 110 | LIBHUNSPELL_DLL_EXPORTED void freelist(char *** list, int n); 111 | 112 | // character encoding information 113 | struct cs_info { 114 | unsigned char ccase; 115 | unsigned char clower; 116 | unsigned char cupper; 117 | }; 118 | 119 | LIBHUNSPELL_DLL_EXPORTED int initialize_utf_tbl(); 120 | LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl(); 121 | LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c, int langnum); 122 | LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c, int langnum); 123 | LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c); 124 | 125 | LIBHUNSPELL_DLL_EXPORTED struct cs_info * get_current_cs(const char * es); 126 | 127 | // get language identifiers of language codes 128 | LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const char * lang); 129 | 130 | // get characters of the given 8bit encoding with lower- and uppercase forms 131 | LIBHUNSPELL_DLL_EXPORTED char * get_casechars(const char * enc); 132 | 133 | // convert null terminated string to all caps using encoding 134 | LIBHUNSPELL_DLL_EXPORTED void enmkallcap(char * d, const char * p, const char * encoding); 135 | 136 | // convert null terminated string to all little using encoding 137 | LIBHUNSPELL_DLL_EXPORTED void enmkallsmall(char * d, const char * p, const char * encoding); 138 | 139 | // convert null terminated string to have initial capital using encoding 140 | LIBHUNSPELL_DLL_EXPORTED void enmkinitcap(char * d, const char * p, const char * encoding); 141 | 142 | // convert null terminated string to all caps 143 | LIBHUNSPELL_DLL_EXPORTED void mkallcap(char * p, const struct cs_info * csconv); 144 | 145 | // convert null terminated string to all little 146 | LIBHUNSPELL_DLL_EXPORTED void mkallsmall(char * p, const struct cs_info * csconv); 147 | 148 | // convert null terminated string to have initial capital 149 | LIBHUNSPELL_DLL_EXPORTED void mkinitcap(char * p, const struct cs_info * csconv); 150 | 151 | // convert first nc characters of UTF-8 string to little 152 | LIBHUNSPELL_DLL_EXPORTED void mkallsmall_utf(w_char * u, int nc, int langnum); 153 | 154 | // convert first nc characters of UTF-8 string to capital 155 | LIBHUNSPELL_DLL_EXPORTED void mkallcap_utf(w_char * u, int nc, int langnum); 156 | 157 | // get type of capitalization 158 | LIBHUNSPELL_DLL_EXPORTED int get_captype(char * q, int nl, cs_info *); 159 | 160 | // get type of capitalization (UTF-8) 161 | LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(w_char * q, int nl, int langnum); 162 | 163 | // strip all ignored characters in the string 164 | LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int ignored_len); 165 | 166 | // strip all ignored characters in the string 167 | LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars(char * word, char * ignored_chars); 168 | 169 | LIBHUNSPELL_DLL_EXPORTED int parse_string(char * line, char ** out, int ln); 170 | 171 | LIBHUNSPELL_DLL_EXPORTED int parse_array(char * line, char ** out, unsigned short ** out_utf16, 172 | int * out_utf16_len, int utf8, int ln); 173 | 174 | LIBHUNSPELL_DLL_EXPORTED int fieldlen(const char * r); 175 | LIBHUNSPELL_DLL_EXPORTED char * copy_field(char * dest, const char * morph, const char * var); 176 | 177 | LIBHUNSPELL_DLL_EXPORTED int morphcmp(const char * s, const char * t); 178 | 179 | LIBHUNSPELL_DLL_EXPORTED int get_sfxcount(const char * morph); 180 | 181 | // conversion function for protected memory 182 | LIBHUNSPELL_DLL_EXPORTED void store_pointer(char * dest, char * source); 183 | 184 | // conversion function for protected memory 185 | LIBHUNSPELL_DLL_EXPORTED char * get_stored_pointer(const char * s); 186 | 187 | // hash entry macros 188 | LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_DATA(struct hentry *h) 189 | { 190 | char *ret; 191 | if (!h->var) 192 | ret = NULL; 193 | else if (h->var & H_OPT_ALIASM) 194 | ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1); 195 | else 196 | ret = HENTRY_WORD(h) + h->blen + 1; 197 | return ret; 198 | } 199 | 200 | // NULL-free version for warning-free OOo build 201 | LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA2(const struct hentry *h) 202 | { 203 | const char *ret; 204 | if (!h->var) 205 | ret = ""; 206 | else if (h->var & H_OPT_ALIASM) 207 | ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1); 208 | else 209 | ret = HENTRY_WORD(h) + h->blen + 1; 210 | return ret; 211 | } 212 | 213 | LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry *h, const char *p) 214 | { 215 | return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL); 216 | } 217 | 218 | #define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h)) 219 | 220 | #endif 221 | -------------------------------------------------------------------------------- /hunspell/src/hunspell/phonet.cxx: -------------------------------------------------------------------------------- 1 | /* phonetic.c - generic replacement aglogithms for phonetic transformation 2 | Copyright (C) 2000 Bjoern Jacke 3 | 4 | This library is free software; you can redistribute it and/or 5 | modify it under the terms of the GNU Lesser General Public 6 | License version 2.1 as published by the Free Software Foundation; 7 | 8 | This library is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 | Lesser General Public License for more details. 12 | 13 | You should have received a copy of the GNU Lesser General Public 14 | License along with this library; If not, see 15 | . 16 | 17 | Changelog: 18 | 19 | 2000-01-05 Bjoern Jacke 20 | Initial Release insprired by the article about phonetic 21 | transformations out of c't 25/1999 22 | 23 | 2007-07-26 Bjoern Jacke 24 | Released under MPL/GPL/LGPL tri-license for Hunspell 25 | 26 | 2007-08-23 Laszlo Nemeth 27 | Porting from Aspell to Hunspell using C-like structs 28 | */ 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | #include "csutil.hxx" 36 | #include "phonet.hxx" 37 | 38 | void init_phonet_hash(phonetable & parms) 39 | { 40 | int i, k; 41 | 42 | for (i = 0; i < HASHSIZE; i++) { 43 | parms.hash[i] = -1; 44 | } 45 | 46 | for (i = 0; parms.rules[i][0] != '\0'; i += 2) { 47 | /** set hash value **/ 48 | k = (unsigned char) parms.rules[i][0]; 49 | 50 | if (parms.hash[k] < 0) { 51 | parms.hash[k] = i; 52 | } 53 | } 54 | } 55 | 56 | // like strcpy but safe if the strings overlap 57 | // but only if dest < src 58 | static inline void strmove(char * dest, char * src) { 59 | while (*src) 60 | *dest++ = *src++; 61 | *dest = '\0'; 62 | } 63 | 64 | static int myisalpha(char ch) { 65 | if ((unsigned char) ch < 128) return isalpha(ch); 66 | return 1; 67 | } 68 | 69 | /* phonetic transcription algorithm */ 70 | /* see: http://aspell.net/man-html/Phonetic-Code.html */ 71 | /* convert string to uppercase before this call */ 72 | int phonet (const char * inword, char * target, 73 | int len, 74 | phonetable & parms) 75 | { 76 | /** Do phonetic transformation. **/ 77 | /** "len" = length of "inword" incl. '\0'. **/ 78 | 79 | /** result: >= 0: length of "target" **/ 80 | /** otherwise: error **/ 81 | 82 | int i,j,k=0,n,p,z; 83 | int k0,n0,p0=-333,z0; 84 | char c, c0; 85 | const char * s; 86 | typedef unsigned char uchar; 87 | char word[MAXPHONETUTF8LEN + 1]; 88 | if (len == -1) len = strlen(inword); 89 | if (len > MAXPHONETUTF8LEN) return 0; 90 | strcpy(word, inword); 91 | 92 | /** check word **/ 93 | i = j = z = 0; 94 | while ((c = word[i]) != '\0') { 95 | n = parms.hash[(uchar) c]; 96 | z0 = 0; 97 | 98 | if (n >= 0) { 99 | /** check all rules for the same letter **/ 100 | while (parms.rules[n][0] == c) { 101 | 102 | /** check whole string **/ 103 | k = 1; /** number of found letters **/ 104 | p = 5; /** default priority **/ 105 | s = parms.rules[n]; 106 | s++; /** important for (see below) "*(s-1)" **/ 107 | 108 | while (*s != '\0' && word[i+k] == *s 109 | && !isdigit ((unsigned char) *s) && strchr ("(-<^$", *s) == NULL) { 110 | k++; 111 | s++; 112 | } 113 | if (*s == '(') { 114 | /** check letters in "(..)" **/ 115 | if (myisalpha(word[i+k]) // ...could be implied? 116 | && strchr(s+1, word[i+k]) != NULL) { 117 | k++; 118 | while (*s != ')') 119 | s++; 120 | s++; 121 | } 122 | } 123 | p0 = (int) *s; 124 | k0 = k; 125 | while (*s == '-' && k > 1) { 126 | k--; 127 | s++; 128 | } 129 | if (*s == '<') 130 | s++; 131 | if (isdigit ((unsigned char) *s)) { 132 | /** determine priority **/ 133 | p = *s - '0'; 134 | s++; 135 | } 136 | if (*s == '^' && *(s+1) == '^') 137 | s++; 138 | 139 | if (*s == '\0' 140 | || (*s == '^' 141 | && (i == 0 || ! myisalpha(word[i-1])) 142 | && (*(s+1) != '$' 143 | || (! myisalpha(word[i+k0]) ))) 144 | || (*s == '$' && i > 0 145 | && myisalpha(word[i-1]) 146 | && (! myisalpha(word[i+k0]) ))) 147 | { 148 | /** search for followup rules, if: **/ 149 | /** parms.followup and k > 1 and NO '-' in searchstring **/ 150 | c0 = word[i+k-1]; 151 | n0 = parms.hash[(uchar) c0]; 152 | 153 | // if (parms.followup && k > 1 && n0 >= 0 154 | if (k > 1 && n0 >= 0 155 | && p0 != (int) '-' && word[i+k] != '\0') { 156 | /** test follow-up rule for "word[i+k]" **/ 157 | while (parms.rules[n0][0] == c0) { 158 | 159 | /** check whole string **/ 160 | k0 = k; 161 | p0 = 5; 162 | s = parms.rules[n0]; 163 | s++; 164 | while (*s != '\0' && word[i+k0] == *s 165 | && ! isdigit((unsigned char) *s) && strchr("(-<^$",*s) == NULL) { 166 | k0++; 167 | s++; 168 | } 169 | if (*s == '(') { 170 | /** check letters **/ 171 | if (myisalpha(word[i+k0]) 172 | && strchr (s+1, word[i+k0]) != NULL) { 173 | k0++; 174 | while (*s != ')' && *s != '\0') 175 | s++; 176 | if (*s == ')') 177 | s++; 178 | } 179 | } 180 | while (*s == '-') { 181 | /** "k0" gets NOT reduced **/ 182 | /** because "if (k0 == k)" **/ 183 | s++; 184 | } 185 | if (*s == '<') 186 | s++; 187 | if (isdigit ((unsigned char) *s)) { 188 | p0 = *s - '0'; 189 | s++; 190 | } 191 | 192 | if (*s == '\0' 193 | /** *s == '^' cuts **/ 194 | || (*s == '$' && ! myisalpha(word[i+k0]))) 195 | { 196 | if (k0 == k) { 197 | /** this is just a piece of the string **/ 198 | n0 += 2; 199 | continue; 200 | } 201 | 202 | if (p0 < p) { 203 | /** priority too low **/ 204 | n0 += 2; 205 | continue; 206 | } 207 | /** rule fits; stop search **/ 208 | break; 209 | } 210 | n0 += 2; 211 | } /** End of "while (parms.rules[n0][0] == c0)" **/ 212 | 213 | if (p0 >= p && parms.rules[n0][0] == c0) { 214 | n += 2; 215 | continue; 216 | } 217 | } /** end of follow-up stuff **/ 218 | 219 | /** replace string **/ 220 | s = parms.rules[n+1]; 221 | p0 = (parms.rules[n][0] != '\0' 222 | && strchr (parms.rules[n]+1,'<') != NULL) ? 1:0; 223 | if (p0 == 1 && z == 0) { 224 | /** rule with '<' is used **/ 225 | if (j > 0 && *s != '\0' 226 | && (target[j-1] == c || target[j-1] == *s)) { 227 | j--; 228 | } 229 | z0 = 1; 230 | z = 1; 231 | k0 = 0; 232 | while (*s != '\0' && word[i+k0] != '\0') { 233 | word[i+k0] = *s; 234 | k0++; 235 | s++; 236 | } 237 | if (k > k0) 238 | strmove (&word[0]+i+k0, &word[0]+i+k); 239 | 240 | /** new "actual letter" **/ 241 | c = word[i]; 242 | } 243 | else { /** no '<' rule used **/ 244 | i += k - 1; 245 | z = 0; 246 | while (*s != '\0' 247 | && *(s+1) != '\0' && j < len) { 248 | if (j == 0 || target[j-1] != *s) { 249 | target[j] = *s; 250 | j++; 251 | } 252 | s++; 253 | } 254 | /** new "actual letter" **/ 255 | c = *s; 256 | if (parms.rules[n][0] != '\0' 257 | && strstr (parms.rules[n]+1, "^^") != NULL) { 258 | if (c != '\0') { 259 | target[j] = c; 260 | j++; 261 | } 262 | strmove (&word[0], &word[0]+i+1); 263 | i = 0; 264 | z0 = 1; 265 | } 266 | } 267 | break; 268 | } /** end of follow-up stuff **/ 269 | n += 2; 270 | } /** end of while (parms.rules[n][0] == c) **/ 271 | } /** end of if (n >= 0) **/ 272 | if (z0 == 0) { 273 | // if (k && (assert(p0!=-333),!p0) && j < len && c != '\0' 274 | // && (!parms.collapse_result || j == 0 || target[j-1] != c)){ 275 | if (k && !p0 && j < len && c != '\0' 276 | && (1 || j == 0 || target[j-1] != c)){ 277 | /** condense only double letters **/ 278 | target[j] = c; 279 | ///printf("\n setting \n"); 280 | j++; 281 | } 282 | 283 | i++; 284 | z = 0; 285 | k=0; 286 | } 287 | } /** end of while ((c = word[i]) != '\0') **/ 288 | 289 | target[j] = '\0'; 290 | return (j); 291 | 292 | } /** end of function "phonet" **/ 293 | -------------------------------------------------------------------------------- /hunspell/src/hunspell/affixmgr.hxx: -------------------------------------------------------------------------------- 1 | #ifndef _AFFIXMGR_HXX_ 2 | #define _AFFIXMGR_HXX_ 3 | 4 | #include "hunvisapi.h" 5 | 6 | #include 7 | 8 | #include "atypes.hxx" 9 | #include "baseaffix.hxx" 10 | #include "hashmgr.hxx" 11 | #include "phonet.hxx" 12 | #include "replist.hxx" 13 | #include "istrmgr.hxx" 14 | 15 | // check flag duplication 16 | #define dupSFX (1 << 0) 17 | #define dupPFX (1 << 1) 18 | 19 | class PfxEntry; 20 | class SfxEntry; 21 | 22 | class LIBHUNSPELL_DLL_EXPORTED AffixMgr 23 | { 24 | 25 | PfxEntry * pStart[SETSIZE]; 26 | SfxEntry * sStart[SETSIZE]; 27 | PfxEntry * pFlag[SETSIZE]; 28 | SfxEntry * sFlag[SETSIZE]; 29 | HashMgr * pHMgr; 30 | HashMgr ** alldic; 31 | int * maxdic; 32 | char * keystring; 33 | char * trystring; 34 | char * encoding; 35 | struct cs_info * csconv; 36 | int utf8; 37 | int complexprefixes; 38 | FLAG compoundflag; 39 | FLAG compoundbegin; 40 | FLAG compoundmiddle; 41 | FLAG compoundend; 42 | FLAG compoundroot; 43 | FLAG compoundforbidflag; 44 | FLAG compoundpermitflag; 45 | int checkcompounddup; 46 | int checkcompoundrep; 47 | int checkcompoundcase; 48 | int checkcompoundtriple; 49 | int simplifiedtriple; 50 | FLAG forbiddenword; 51 | FLAG nosuggest; 52 | FLAG nongramsuggest; 53 | FLAG needaffix; 54 | int cpdmin; 55 | int numrep; 56 | replentry * reptable; 57 | RepList * iconvtable; 58 | RepList * oconvtable; 59 | int nummap; 60 | mapentry * maptable; 61 | int numbreak; 62 | char ** breaktable; 63 | int numcheckcpd; 64 | patentry * checkcpdtable; 65 | int simplifiedcpd; 66 | int numdefcpd; 67 | flagentry * defcpdtable; 68 | phonetable * phone; 69 | int maxngramsugs; 70 | int maxcpdsugs; 71 | int maxdiff; 72 | int onlymaxdiff; 73 | int nosplitsugs; 74 | int sugswithdots; 75 | int cpdwordmax; 76 | int cpdmaxsyllable; 77 | char * cpdvowels; 78 | w_char * cpdvowels_utf16; 79 | int cpdvowels_utf16_len; 80 | char * cpdsyllablenum; 81 | const char * pfxappnd; // BUG: not stateless 82 | const char * sfxappnd; // BUG: not stateless 83 | FLAG sfxflag; // BUG: not stateless 84 | char * derived; // BUG: not stateless 85 | SfxEntry * sfx; // BUG: not stateless 86 | PfxEntry * pfx; // BUG: not stateless 87 | int checknum; 88 | char * wordchars; 89 | unsigned short * wordchars_utf16; 90 | int wordchars_utf16_len; 91 | char * ignorechars; 92 | unsigned short * ignorechars_utf16; 93 | int ignorechars_utf16_len; 94 | char * version; 95 | char * lang; 96 | int langnum; 97 | FLAG lemma_present; 98 | FLAG circumfix; 99 | FLAG onlyincompound; 100 | FLAG keepcase; 101 | FLAG forceucase; 102 | FLAG warn; 103 | int forbidwarn; 104 | FLAG substandard; 105 | int checksharps; 106 | int fullstrip; 107 | 108 | int havecontclass; // boolean variable 109 | char contclasses[CONTSIZE]; // flags of possible continuing classes (twofold affix) 110 | 111 | public: 112 | 113 | AffixMgr(const char * affpath, HashMgr** ptr, int * md,const char * key = NULL, bool notpath = false); 114 | AffixMgr(const char * affpath, HashMgr** ptr, int * md, bool notpath = false); 115 | ~AffixMgr(); 116 | struct hentry * affix_check(const char * word, int len, 117 | const unsigned short needflag = (unsigned short) 0, 118 | char in_compound = IN_CPD_NOT); 119 | struct hentry * prefix_check(const char * word, int len, 120 | char in_compound, const FLAG needflag = FLAG_NULL); 121 | inline int isSubset(const char * s1, const char * s2); 122 | struct hentry * prefix_check_twosfx(const char * word, int len, 123 | char in_compound, const FLAG needflag = FLAG_NULL); 124 | inline int isRevSubset(const char * s1, const char * end_of_s2, int len); 125 | struct hentry * suffix_check(const char * word, int len, int sfxopts, 126 | PfxEntry* ppfx, char ** wlst, int maxSug, int * ns, 127 | const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, 128 | char in_compound = IN_CPD_NOT); 129 | struct hentry * suffix_check_twosfx(const char * word, int len, 130 | int sfxopts, PfxEntry* ppfx, const FLAG needflag = FLAG_NULL); 131 | 132 | char * affix_check_morph(const char * word, int len, 133 | const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT); 134 | char * prefix_check_morph(const char * word, int len, 135 | char in_compound, const FLAG needflag = FLAG_NULL); 136 | char * suffix_check_morph (const char * word, int len, int sfxopts, 137 | PfxEntry * ppfx, const FLAG cclass = FLAG_NULL, 138 | const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT); 139 | 140 | char * prefix_check_twosfx_morph(const char * word, int len, 141 | char in_compound, const FLAG needflag = FLAG_NULL); 142 | char * suffix_check_twosfx_morph(const char * word, int len, 143 | int sfxopts, PfxEntry * ppfx, const FLAG needflag = FLAG_NULL); 144 | 145 | char * morphgen(char * ts, int wl, const unsigned short * ap, 146 | unsigned short al, char * morph, char * targetmorph, int level); 147 | 148 | int expand_rootword(struct guessword * wlst, int maxn, const char * ts, 149 | int wl, const unsigned short * ap, unsigned short al, char * bad, 150 | int, char *); 151 | 152 | short get_syllable (const char * word, int wlen); 153 | int cpdrep_check(const char * word, int len); 154 | int cpdpat_check(const char * word, int len, hentry * r1, hentry * r2, 155 | const char affixed); 156 | int defcpd_check(hentry *** words, short wnum, hentry * rv, 157 | hentry ** rwords, char all); 158 | int cpdcase_check(const char * word, int len); 159 | inline int candidate_check(const char * word, int len); 160 | void setcminmax(int * cmin, int * cmax, const char * word, int len); 161 | struct hentry * compound_check(const char * word, int len, short wordnum, 162 | short numsyllable, short maxwordnum, short wnum, hentry ** words, 163 | char hu_mov_rule, char is_sug, int * info); 164 | 165 | int compound_check_morph(const char * word, int len, short wordnum, 166 | short numsyllable, short maxwordnum, short wnum, hentry ** words, 167 | char hu_mov_rule, char ** result, char * partresult); 168 | 169 | struct hentry * lookup(const char * word); 170 | int get_numrep() const; 171 | struct replentry * get_reptable() const; 172 | RepList * get_iconvtable() const; 173 | RepList * get_oconvtable() const; 174 | struct phonetable * get_phonetable() const; 175 | int get_nummap() const; 176 | struct mapentry * get_maptable() const; 177 | int get_numbreak() const; 178 | char ** get_breaktable() const; 179 | char * get_encoding(); 180 | int get_langnum() const; 181 | char * get_key_string(); 182 | char * get_try_string() const; 183 | const char * get_wordchars() const; 184 | unsigned short * get_wordchars_utf16(int * len) const; 185 | char * get_ignore() const; 186 | unsigned short * get_ignore_utf16(int * len) const; 187 | int get_compound() const; 188 | FLAG get_compoundflag() const; 189 | FLAG get_compoundbegin() const; 190 | FLAG get_forbiddenword() const; 191 | FLAG get_nosuggest() const; 192 | FLAG get_nongramsuggest() const; 193 | FLAG get_needaffix() const; 194 | FLAG get_onlyincompound() const; 195 | FLAG get_compoundroot() const; 196 | FLAG get_lemma_present() const; 197 | int get_checknum() const; 198 | const char * get_prefix() const; 199 | const char * get_suffix() const; 200 | const char * get_derived() const; 201 | const char * get_version() const; 202 | int have_contclass() const; 203 | int get_utf8() const; 204 | int get_complexprefixes() const; 205 | char * get_suffixed(char ) const; 206 | int get_maxngramsugs() const; 207 | int get_maxcpdsugs() const; 208 | int get_maxdiff() const; 209 | int get_onlymaxdiff() const; 210 | int get_nosplitsugs() const; 211 | int get_sugswithdots(void) const; 212 | FLAG get_keepcase(void) const; 213 | FLAG get_forceucase(void) const; 214 | FLAG get_warn(void) const; 215 | int get_forbidwarn(void) const; 216 | int get_checksharps(void) const; 217 | char * encode_flag(unsigned short aflag) const; 218 | int get_fullstrip() const; 219 | 220 | private: 221 | void Init(const char * affpath, HashMgr** ptr, int * md, const char * key, bool notpath); 222 | int parse_file(const char * affpath, const char * key, bool notpath); 223 | int parse_flag(char * line, unsigned short * out, IStrMgr * af); 224 | int parse_num(char * line, int * out, IStrMgr * af); 225 | int parse_cpdsyllable(char * line, IStrMgr * af); 226 | int parse_reptable(char * line, IStrMgr * af); 227 | int parse_convtable(char * line, IStrMgr * af, RepList ** rl, const char * keyword); 228 | int parse_phonetable(char * line, IStrMgr * af); 229 | int parse_maptable(char * line, IStrMgr * af); 230 | int parse_breaktable(char * line, IStrMgr * af); 231 | int parse_checkcpdtable(char * line, IStrMgr * af); 232 | int parse_defcpdtable(char * line, IStrMgr * af); 233 | int parse_affix(char * line, const char at, IStrMgr * af, char * dupflags); 234 | 235 | void reverse_condition(char *); 236 | void debugflag(char * result, unsigned short flag); 237 | int condlen(char *); 238 | int encodeit(affentry &entry, char * cs); 239 | int build_pfxtree(PfxEntry* pfxptr); 240 | int build_sfxtree(SfxEntry* sfxptr); 241 | int process_pfx_order(); 242 | int process_sfx_order(); 243 | PfxEntry * process_pfx_in_order(PfxEntry * ptr, PfxEntry * nptr); 244 | SfxEntry * process_sfx_in_order(SfxEntry * ptr, SfxEntry * nptr); 245 | int process_pfx_tree_to_list(); 246 | int process_sfx_tree_to_list(); 247 | int redundant_condition(char, char * strip, int stripl, 248 | const char * cond, int); 249 | }; 250 | 251 | #endif 252 | 253 | -------------------------------------------------------------------------------- /test/performance/performance.ts: -------------------------------------------------------------------------------- 1 | import { Nodehun } from 'nodehun' 2 | 3 | const fs = require('fs') 4 | const path = require('path') 5 | // @ts-ignore 6 | const { performance } = require('perf_hooks') 7 | const ChartjsNode = require('chartjs-node'); 8 | 9 | const Nodehun3: typeof Nodehun = require('bindings')('Nodehun') 10 | const Nodehun2 = require('nodehun') 11 | 12 | ///////////////////////////////////////////////////// 13 | // CONFIGURATION 14 | ///////////////////////////////////////////////////// 15 | const samples_spell = 10 16 | const iterations_spell = 100 17 | const samples_suggest = 10 18 | const iterations_suggest = 100 19 | const correct = 'color' 20 | const incorrect = 'colour' 21 | const dictionaryType = 'en_US' 22 | const d = { 23 | affix: fs.readFileSync(path.resolve(__dirname, '../dictionaries/en_us.aff')), 24 | dictionary: fs.readFileSync(path.resolve(__dirname, '../dictionaries/en_us.dic')) 25 | } 26 | ///////////////////////////////////////////////////// 27 | 28 | interface Nodehun2 { 29 | isCorrect: Function 30 | isCorrectSync: Function 31 | spellSuggestions: Function 32 | spellSuggestionsSync: Function 33 | } 34 | 35 | const nodehun3: Nodehun = new Nodehun3(d.affix, d.dictionary) 36 | const nodehun2: Nodehun2 = new Nodehun2(d.affix, d.dictionary) 37 | 38 | const suggestSync3Test = (word: string): number => { 39 | let start = performance.now() 40 | nodehun3.suggestSync(word) 41 | let end = performance.now() 42 | return end - start 43 | } 44 | 45 | const suggestSync2Test = (word: string): number => { 46 | let start = performance.now() 47 | nodehun2.spellSuggestionsSync(word) 48 | let end = performance.now() 49 | return end - start 50 | } 51 | 52 | const suggest3Test = async (word: string) => { 53 | let start = performance.now() 54 | await nodehun3.suggest(word) 55 | let end = performance.now() 56 | return end - start 57 | }; 58 | 59 | const suggest2Test = async (word: string) => { 60 | let start = performance.now() 61 | await new Promise(resolve => nodehun2.spellSuggestions(word, resolve)) 62 | let end = performance.now() 63 | return end - start 64 | } 65 | 66 | const spellSync3Test = (word: string): number => { 67 | let start = performance.now() 68 | nodehun3.spellSync(word) 69 | let end = performance.now() 70 | return end - start 71 | } 72 | 73 | const spellSync2Test = (word: string): number => { 74 | let start = performance.now() 75 | nodehun2.isCorrectSync(word) 76 | let end = performance.now() 77 | return end - start 78 | } 79 | 80 | const spell3Test = async (word: string) => { 81 | let start = performance.now() 82 | await nodehun3.spell(word) 83 | let end = performance.now() 84 | return end - start 85 | }; 86 | 87 | const spell2Test = async (word: string) => { 88 | let start = performance.now() 89 | await new Promise(resolve => nodehun2.isCorrect(word, resolve)) 90 | let end = performance.now() 91 | return end - start 92 | } 93 | 94 | const suggestSyncTest = (iterations: number) => { 95 | let time2_correct = 0 96 | , time3_correct = 0 97 | , time2_incorrect = 0 98 | , time3_incorrect = 0 99 | 100 | for (let i = 0; i < iterations; i++) { 101 | time2_correct += suggestSync2Test(correct) 102 | time2_incorrect += suggestSync2Test(incorrect) 103 | time3_correct += suggestSync3Test(correct) 104 | time3_incorrect += suggestSync3Test(incorrect) 105 | } 106 | 107 | return { 108 | time2_correct, 109 | time2_incorrect, 110 | time3_correct, 111 | time3_incorrect 112 | } 113 | } 114 | 115 | const suggestTest = async (iterations: number) => { 116 | let time2_correct = 0 117 | , time3_correct = 0 118 | , time2_incorrect = 0 119 | , time3_incorrect = 0 120 | 121 | for (let i = 0; i < iterations; i++) { 122 | time2_correct += await suggest2Test(correct) 123 | time2_incorrect += await suggest2Test(incorrect) 124 | time3_correct += await suggest3Test(correct) 125 | time3_incorrect += await suggest3Test(incorrect) 126 | } 127 | 128 | return { 129 | time2_correct, 130 | time2_incorrect, 131 | time3_correct, 132 | time3_incorrect 133 | } 134 | } 135 | 136 | 137 | 138 | const spellSyncTest = (iterations: number) => { 139 | let time2_correct = 0 140 | , time3_correct = 0 141 | , time2_incorrect = 0 142 | , time3_incorrect = 0 143 | 144 | for (let i = 0; i < iterations; i++) { 145 | time2_correct += spellSync2Test(correct) 146 | time2_incorrect += spellSync2Test(incorrect) 147 | time3_correct += spellSync3Test(correct) 148 | time3_incorrect += spellSync3Test(incorrect) 149 | } 150 | 151 | return { 152 | time2_correct, 153 | time2_incorrect, 154 | time3_correct, 155 | time3_incorrect 156 | } 157 | } 158 | 159 | const spellTest = async (iterations: number) => { 160 | let time2_correct = 0 161 | , time3_correct = 0 162 | , time2_incorrect = 0 163 | , time3_incorrect = 0 164 | 165 | for (let i = 0; i < iterations; i++) { 166 | time2_correct += await spell2Test(correct) 167 | time2_incorrect += await spell2Test(incorrect) 168 | time3_correct += await spell3Test(correct) 169 | time3_incorrect += await spell3Test(incorrect) 170 | } 171 | 172 | return { 173 | time2_correct, 174 | time2_incorrect, 175 | time3_correct, 176 | time3_incorrect 177 | } 178 | } 179 | 180 | const standardDeviation = (values: number[]) => { 181 | var avg = average(values) 182 | 183 | var squareDiffs = values.map(function (value) { 184 | var diff = value - avg 185 | var sqrDiff = diff * diff 186 | return sqrDiff 187 | }) 188 | 189 | var avgSquareDiff = average(squareDiffs) 190 | 191 | var stdDev = Math.sqrt(avgSquareDiff) 192 | return stdDev 193 | } 194 | 195 | const average = (data: number[]) => { 196 | var sum = data.reduce(function (sum, value) { 197 | return sum + value 198 | }, 0) 199 | 200 | var avg = sum / data.length 201 | return avg 202 | } 203 | 204 | /** 205 | * Takes an array of objects and returns the average and standard deviation 206 | * for each numeric key 207 | * @param results 208 | */ 209 | const analyzeResults = (results: Array>): Record => { 210 | if (!results || results.length == 0) throw 'empty results' 211 | 212 | const obj = results[0] 213 | const analysis: Record = {} 214 | 215 | Object.keys(obj).forEach(key => { 216 | if (typeof obj[key] == 'number') { 217 | const data = results.map(r => r[key]) 218 | const avg = average(data) 219 | const std = standardDeviation(data) 220 | analysis[key] = { 221 | average: avg, 222 | standardDeviation: std 223 | } 224 | } 225 | }) 226 | 227 | return analysis 228 | } 229 | 230 | const exportGraph = async ( 231 | title: string, 232 | legendLabels: string[], 233 | yAxisLabel: string, 234 | nodehun2_xAxisLabel: string, 235 | nodehun3_xAxisLabel: string, 236 | nodehun2Times: number[], 237 | nodehun3Times: number[], 238 | writeToFile: string 239 | ) => { 240 | return new Promise(resolve => { 241 | const chartNode = new ChartjsNode(600, 400); 242 | 243 | chartNode.drawChart({ 244 | type: 'bar', 245 | options: { 246 | title: { 247 | display: true, 248 | text: title 249 | }, 250 | scales: { 251 | yAxes: [{ 252 | ticks: { 253 | beginAtZero: true 254 | }, 255 | scaleLabel: { 256 | display: true, 257 | labelString: yAxisLabel 258 | } 259 | }], 260 | xAxes: [{ 261 | afterFit: (scale) => { 262 | scale.height = 40; 263 | } 264 | }] 265 | } 266 | }, 267 | data: { 268 | labels: legendLabels, 269 | datasets: [{ 270 | backgroundColor: '#d9e4aa', 271 | label: nodehun2_xAxisLabel, 272 | data: nodehun2Times, 273 | barThickness: 50, 274 | }, { 275 | backgroundColor: '#7ac36a', 276 | label: nodehun3_xAxisLabel, 277 | data: nodehun3Times, 278 | barThickness: 50, 279 | }] 280 | } 281 | }) 282 | .then(() => chartNode.getImageBuffer('image/png')) 283 | .then(() => chartNode.getImageStream('image/png')) 284 | .then(() => chartNode.writeImageToFile('image/png', writeToFile)) 285 | .then(resolve) 286 | }) 287 | } 288 | 289 | const performanceTest = async () => { 290 | console.log(`Running performance test`) 291 | console.log(`...correct word '${correct}'`) 292 | console.log(`...incorrect word '${incorrect}'`) 293 | console.log(`...spelling\n`, 294 | `......iterations ${iterations_spell}\n`, 295 | `......samples ${samples_spell}`) 296 | console.log(`...suggesting\n`, 297 | `......iterations ${iterations_suggest}\n`, 298 | `......samples ${samples_suggest}`) 299 | 300 | const results_spellSync = [] 301 | const results_spell = [] 302 | const results_suggestSync = [] 303 | const results_suggest = [] 304 | 305 | for (let i = 0; i < samples_spell; i++) { 306 | results_spellSync.push(spellSyncTest(iterations_spell)) 307 | results_spell.push(await spellTest(iterations_spell)) 308 | } 309 | 310 | for (let i = 0; i < samples_suggest; i++) { 311 | results_suggestSync.push(suggestSyncTest(iterations_suggest)) 312 | results_suggest.push(await suggestTest(iterations_suggest)) 313 | } 314 | 315 | const analysis_spellSync = analyzeResults(results_spellSync) 316 | const analysis_spell = analyzeResults(results_spell) 317 | const analysis_suggestSync = analyzeResults(results_suggestSync) 318 | const analysis_suggest = analyzeResults(results_suggest) 319 | 320 | console.log('Nodehun3#spellSync vs Nodehun2#isCorrectSync') 321 | console.log(analysis_spellSync) 322 | console.log('Nodehun3#spell vs Nodehun2#isCorrect') 323 | console.log(analysis_spell) 324 | console.log('Nodehun3#suggestSync vs Nodehun2#spellSuggestionsSync') 325 | console.log(analysis_suggestSync) 326 | console.log('Nodehun3#suggest vs Nodehun2#spellSuggestions') 327 | console.log(analysis_suggest) 328 | 329 | await exportGraph( 330 | `Nodehun3#spell[Sync] vs Nodehun2#isCorrect[Sync] (${samples_spell} samples, ${dictionaryType})`, 331 | [ 332 | ` correct word\n"${correct}" (sync)`, 333 | ` correct word\n"${correct}" (async)`, 334 | ` incorrect word\n"${incorrect}" (sync)`, 335 | ` incorrect word\n"${incorrect}" (async)` 336 | ], 337 | `average milliseconds for ${iterations_spell} operations`, 338 | 'Nodehun2 (w/ promise wrapper for async)', 339 | 'Nodehun3', 340 | [ 341 | analysis_spellSync.time2_correct.average, 342 | analysis_spell.time2_correct.average, 343 | analysis_spellSync.time2_incorrect.average, 344 | analysis_spell.time2_incorrect.average 345 | ], 346 | [ 347 | analysis_spellSync.time3_correct.average, 348 | analysis_spell.time3_correct.average, 349 | analysis_spellSync.time3_incorrect.average, 350 | analysis_spell.time3_incorrect.average 351 | ], 352 | './test/performance/spell.png' 353 | ) 354 | 355 | await exportGraph( 356 | `Nodehun3#suggest[Sync] vs Nodehun2#spellSuggestions[Sync] (${samples_suggest} samples, ${dictionaryType})`, 357 | [ 358 | ` correct word\n"${correct}" (sync)`, 359 | ` correct word\n"${correct}" (async)`, 360 | ` incorrect word\n"${incorrect}" (sync)`, 361 | ` incorrect word\n"${incorrect}" (async)` 362 | ], 363 | `average milliseconds for ${iterations_suggest} operations`, 364 | 'Nodehun2 (w/ promise wrapper for async)', 365 | 'Nodehun3', 366 | [ 367 | analysis_suggestSync.time2_correct.average, 368 | analysis_suggest.time2_correct.average, 369 | analysis_suggestSync.time2_incorrect.average, 370 | analysis_suggest.time2_incorrect.average 371 | ], 372 | [ 373 | analysis_suggestSync.time3_correct.average, 374 | analysis_suggest.time3_correct.average, 375 | analysis_suggestSync.time3_incorrect.average, 376 | analysis_suggest.time3_incorrect.average 377 | ], 378 | './test/performance/suggest.png' 379 | ) 380 | 381 | 382 | } 383 | 384 | performanceTest() 385 | 386 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Nodehun 2 | [![npm version](https://badge.fury.io/js/nodehun.svg)](https://badge.fury.io/js/nodehun) [![Build Status](https://travis-ci.org/Wulf/nodehun.svg?branch=master)](https://travis-ci.org/Wulf/nodehun) [![Build status](https://ci.appveyor.com/api/projects/status/9ky5lws4d191qrui/branch/master?svg=true)](https://ci.appveyor.com/project/Wulf/nodehun/branch/master) 3 | 4 | ## Introduction 5 | 6 | Nodehun aims to expose as much of hunspell's functionality as possible in an easy to understand and maintainable way, while also maintaining the performance characteristics expected of a responsible node module. 7 | 8 | ## Features 9 | 10 | * Native performance. 11 | * Exposes all of hunspell's functionality: 12 | * Spell checking, 13 | * suggestions, 14 | * personal dictionaries and word management, 15 | * stems/roots of words, 16 | * morphological generation, and, 17 | * word analysis. 18 | * TypeScript declaration file. 19 | * Synchronous + promise-based async API. 20 | * Extensive unit testing. 21 | * Completely re-written using N-API (thus, stability in future v8 versions) 22 | 23 | ## Installation 24 | 25 | npm install nodehun 26 | 27 | If you run into any build errors, make sure you satisfy the requirements for [`node-gyp`](https://github.com/nodejs/node-gyp#installation). 28 | 29 | ## Quick Start 30 | 31 | ```js 32 | import { Nodehun } from 'nodehun' 33 | 34 | const fs = require('fs') 35 | const affix = fs.readFileSync('path/to/*.aff') 36 | const dictionary = fs.readFileSync('path/to/*.dic') 37 | 38 | const nodehun = new Nodehun(affix, dictionary) 39 | 40 | // Promise example 41 | nodehun.suggest('colour') 42 | .then(suggestions => { }) 43 | 44 | // async/await example 45 | async function example() { 46 | const suggestions = await nodehun.suggest('colour') 47 | } 48 | 49 | // sync example 50 | const suggestions = nodehun.suggestSync('colour') 51 | ``` 52 | 53 | Note: It's probably not a good idea to use `readFileSync` in production. 54 | 55 | 56 | ## Table of Contents 57 | 58 | 1. Important migration notes from v2 -> v3 59 | 2. Examples 60 | * Spell checking 61 | * Spelling suggestions 62 | * Adding a dictionary 63 | * Add a word 64 | * Add a word (with affix) 65 | * Remove a word 66 | * Word stem 67 | * Word analysis 68 | * Word generation 69 | 3. Notes 70 | * Improving performance 71 | * A Warning on Synchronous Methods 72 | * A Note About Open Office Dictionaries 73 | * A Note About Creating Dictionaries 74 | * Where To Get Dictionaries 75 | 4. Development and Contribution 76 | * Scripts 77 | * Notes 78 | * Mentions 79 | 80 | ## Important migration notes from v2 -> v3 81 | 82 | 1. The API now reflects hunspell's API almost exactly. Please see `src/Nodehun.d.ts` for the API exposed by v3. 83 | 84 | 2. Unlike Nodehun2, `suggestSync` for a word spelled correctly returns `null` instead of an empty array. 85 | For example: 86 | 87 | ```js 88 | nodehun2.spellSuggestionsSync('color') // => [] 89 | nodehun3.suggestSync('color') // => null 90 | ``` 91 | 92 | 3. There are performance gains to be seen for those who wrapped the library in promises. 93 | 94 | ![Spelling performance comparison graph](./test/performance/spell.png "Spelling performance comparison graph") 95 | ![Suggestions performance comparison graph](./test/performance/suggest.png "Suggestions performance comparison graph") 96 | 97 | To run the tests on your machine, execute `npm run performance-test` and find the graphs in the `test/performance` folder. 98 | 99 | 4. To continue using the old version, use: 100 | 101 | `npm install --save nodehun@2.0.12` 102 | 103 | Works with Node v11 or lower, but some have reported compilation issues in v10 and v11. 104 | If you plan to use this version, please refer to the [old](https://github.com/Wulf/nodehun/blob/77e4be9e2cde8805061387d4783357c45c582a04/readme.md) readme file. 105 | 106 | 107 | ## Examples 108 | 109 | The following section includes short examples of various exposed operations. 110 | For complete examples, see the `/examples` directory. 111 | 112 | ### Checking for Correctness 113 | Nodehun offers a method that returns true or false if the passed word exists in the dictionary, i.e. is "correct". 114 | 115 | ```js 116 | await nodehun.spell('color') // => true 117 | await nodehun.spell('colour') // => false, assuming en_US dictionary 118 | ``` 119 | 120 | ### Spelling Suggestions 121 | Nodehun also offers a method that returns an array of words that could possibly match a misspelled word, ordered by most likely to be correct. 122 | 123 | ```js 124 | await nodehun.suggest('color') 125 | // => null (since it's correctly spelled) 126 | 127 | await nodehun.suggest('calor') 128 | // => ['carol','valor','color','cal or','cal-or','caloric','calorie'] 129 | ``` 130 | 131 | ### Add Dictionary 132 | Nodehun also can add another dictionary on top of an existing dictionary object at runtime (this means it is not permanent) in order to merge two dictionaries. Once again, please do not actually use `readFileSync`. 133 | 134 | ```js 135 | const en_CA = fs.readFileSync('./path/to/en_CA.dic'); 136 | 137 | await nodehun.suggest('colour') // => [ ...suggestions... ] 138 | // because "colour" is not a defined word in the US English dictionary 139 | await nodehun.addDictionary(en_CA) 140 | await nodehun.suggest('colour') // => null 141 | // (since the word is considered correctly spelled now) 142 | ``` 143 | 144 | ### Add Word 145 | Nodehun can also add a single word to a dictionary at runtime (this means it is not permanent) in order to have a custom runtime dictionary. If you know anything about Hunspell you can also add flags to the word. 146 | 147 | ```js 148 | await nodehun.suggest('colour') // => [ ...suggestions...] 149 | // because "colour" is not a defined word in the US English dictionary 150 | await nodehun.add('colour') 151 | await nodehun.suggest('colour') // => null 152 | // (since 'colour' is correct now) 153 | ``` 154 | 155 | Note: _colouring_ will still be considered incorrect. See the the `addWithAffix` example below. 156 | 157 | ### Add Word (with affix) 158 | Like the method above, except it also applies the example word's affix definition to the new word. 159 | 160 | ```js 161 | await nodehun.suggest('colouring') // => [ ...suggestions...] 162 | // because "colour" is not a defined word in the US English dictionary 163 | await nodehun.addWithAffix('colour', 'color') 164 | await nodehun.suggest('colouring') // => null 165 | // (since 'colouring' is correct now) 166 | ``` 167 | 168 | ### Remove Word 169 | Nodehun can also remove a single word from a dictionary at runtime (this means it is not permanent) in order to have a custom runtime dictionary. If you know anything about Hunspell this method will ignore flags and just strip words that match. 170 | 171 | ```js 172 | await nodehun.suggest('color') // => null (since the word is correctly spelled) 173 | await nodehun.remove('color') 174 | await nodehun.suggest('color') // => ['colon', 'dolor', ...etc ] 175 | ``` 176 | 177 | ### Word Stems 178 | Nodehun exposes the Hunspell `stem` function which analyzes the roots of words. Consult the Hunspell documentation for further understanding. 179 | 180 | ```js 181 | await nodehun.stem('telling') // => [telling, tell] 182 | ``` 183 | 184 | ### Word Analysis 185 | Nodehun exposes the Hunspell `analyze` function which analyzes a word and return a morphological analysis. Consult the Hunspell documentation for further understanding. 186 | 187 | ```js 188 | await nodehun.analyze('telling') 189 | // with the appropriate dictionaries files, it will return: 190 | // => [' st:telling ts:0', ' st:tell ts:0 al:told is:Vg'] 191 | ``` 192 | 193 | ### Word Generation 194 | Nodehun exposes the Hunspell `generate` function which generates a variation of a word by matching the morphological structure of another word. Consult the Hunspell documentation for further understanding. 195 | 196 | ```js 197 | await nodehun.generate('telling', 'ran') // => [ 'told' ] 198 | await nodehun.generate('told', 'run') // => [ 'tell' ] 199 | ``` 200 | 201 | ## Notes 202 | 203 | ### Improving Performance 204 | 205 | If the native performance isn't fast enough for your workload, you can try using an LRU cache for your operations. The idea is to cache the results of the operation and only repeat the operations on cache misses. 206 | 207 | ```js 208 | const LRUCache = require('lru-native2') 209 | 210 | var cache = new LRUCache({ maxElements: 1000 }) 211 | 212 | async function suggestCached() { 213 | let cachedResult = cache.get(word) 214 | if (cachedResult) { 215 | // cache hit 216 | return cachedResult 217 | } else { 218 | // cache miss 219 | let result = await nodehun.suggest(word) 220 | cache.set(word, result) 221 | return result 222 | } 223 | } 224 | 225 | // ... example usage: 226 | 227 | const suggestions = await suggestCached('Wintre') 228 | // now 'wintre' results are cached 229 | 230 | // ... some time later... 231 | 232 | const suggestions = await suggestCached('Wintre') 233 | // => this is fetched from the cache 234 | ``` 235 | 236 | Here are two LRU implementations you can consider: 237 | * [lru-native2](https://github.com/adzerk/node-lru-native) 238 | * [lru-cache](https://github.com/isaacs/node-lru-cache) 239 | 240 | ### A Warning on Synchronous Methods 241 | There are synchronous versions of all the methods listed above, but they are not documented as they are only present for people who really know and understand what they are doing. I highly recommend looking at the C++ source code if you are going to use these methods in a production environment as the locks involved with them can create some counterintuitive situations. For example, if you were to remove a word synchronously while many different suggestion threads were working in the background the remove word method could take seconds to complete while it waits to take control of the read-write lock. This is obviously disastrous in a situation where you would be servicing many requests. 242 | 243 | ### A Note About Open Office Dictionaries 244 | All files must be UTF-8 to work! When you download [open office dictionaries](http://cgit.freedesktop.org/libreoffice/dictionaries/tree/) don't assume that the file is UTF-8 just because it is being served as a UTF-8 file. You may have to convert the file using the `iconv` unix utility (easy enough to do) to UTF-8 in order for the files to work. 245 | 246 | ### A Note About Creating Dictionaries 247 | 248 | If you want to create a new Hunspell dictionary you will need a base affix file. I recommend simply using one of the base affix files from the open office dictionaries for the language you are creating a dictionary for. Once you get around to creating a dictionary read the hunspell documentation to learn how to properly flag the words. However, my guess is that the vast majority of people creating dictionaries out there will be creating a dictionary of proper nouns. Proper nouns simply require the "M" flag. This is what a dictionary of proper nouns might look like: 249 | 250 | Aachen/M 251 | aardvark/SM 252 | Aaren/M 253 | Aarhus/M 254 | Aarika/M 255 | Aaron/M 256 | 257 | Notice that the "S" flag denotes a proper noun that isn't capitalized, otherwise look in the docs. 258 | 259 | ### Where To Get Dictionaries 260 | 261 | The included dictionaries were extracted from Libre Office. The Libre Office versions have a modified aff file that makes generate() and analyze() much more useful. However, any MySpell style dictionary will work. Here are a few sources: 262 | 263 | * [Libre Office dictionaries](http://cgit.freedesktop.org/libreoffice/dictionaries/tree/) 264 | * [Official Aspell dictionaries](http://wordlist.aspell.net/dicts/) 265 | * [Open Office extensions](http://extensions.services.openoffice.org/dictionary) 266 | * [Mozilla Extensions](https://addons.mozilla.org/en-us/firefox/language-tools/) 267 | 268 | Also, check out [@wooorm]()'s UTF-8 dictionary collection [here](https://github.com/wooorm/dictionaries). 269 | 270 | Let the community know if you've found other dictionary repositories! 271 | 272 | # Development and Contribution 273 | 274 | ## Scripts 275 | 276 | The following is a a list of commands and their descriptions which may 277 | help in development. 278 | 279 | `npm start`: to jumpstart the development server. This will automatically recompile the 280 | c++ source when changes are made and run the tests once more. 281 | 282 | `npm run start-test`: if you don't want to continuously compile the c++ source, but do want 283 | the tests to re-run when changes are made to the test files. 284 | 285 | `npm run build`: to compile the addon once. 286 | 287 | `npm test`: to run the tests once. 288 | 289 | `npm run performance-test`: to run the performance tests and output updated graphs. (see `test/performance`) 290 | 291 | ## Notes 292 | 293 | Make `node-gyp` build faster by increasing the number of cores it uses: 294 | 295 | ```bash 296 | export JOBS=max 297 | npm run build # super fast now! 298 | ``` 299 | 300 | ## Mentions 301 | 302 | Special thanks to [@nathanjsweet](https://github.com/nathanjsweet) for his grass roots efforts with this project, including the `hunspell-distributed` package upon which this library relies to provide buffer-based Hunspell initialization. 303 | -------------------------------------------------------------------------------- /hunspell/src/hunspell/config.h: -------------------------------------------------------------------------------- 1 | /* config.h.in. Generated from configure.ac by autoheader. */ 2 | 3 | /* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP 4 | systems. This function is required for `alloca.c' support on those systems. 5 | */ 6 | #undef CRAY_STACKSEG_END 7 | 8 | /* Define to 1 if using `alloca.c'. */ 9 | #undef C_ALLOCA 10 | 11 | /* Define to 1 if translation of program messages to the user's native 12 | language is requested. */ 13 | #undef ENABLE_NLS 14 | 15 | /* Define to 1 if you have `alloca', as a function or macro. */ 16 | #undef HAVE_ALLOCA 17 | 18 | /* Define to 1 if you have and it should be used (not on Ultrix). 19 | */ 20 | #undef HAVE_ALLOCA_H 21 | 22 | /* Define to 1 if you have the `argz_count' function. */ 23 | #undef HAVE_ARGZ_COUNT 24 | 25 | /* Define to 1 if you have the header file. */ 26 | #undef HAVE_ARGZ_H 27 | 28 | /* Define to 1 if you have the `argz_next' function. */ 29 | #undef HAVE_ARGZ_NEXT 30 | 31 | /* Define to 1 if you have the `argz_stringify' function. */ 32 | #undef HAVE_ARGZ_STRINGIFY 33 | 34 | /* Define to 1 if you have the `asprintf' function. */ 35 | #undef HAVE_ASPRINTF 36 | 37 | /* Define to 1 if the compiler understands __builtin_expect. */ 38 | #undef HAVE_BUILTIN_EXPECT 39 | 40 | /* Define to 1 if you have the MacOS X function CFLocaleCopyCurrent in the 41 | CoreFoundation framework. */ 42 | #undef HAVE_CFLOCALECOPYCURRENT 43 | 44 | /* Define to 1 if you have the MacOS X function CFPreferencesCopyAppValue in 45 | the CoreFoundation framework. */ 46 | #undef HAVE_CFPREFERENCESCOPYAPPVALUE 47 | 48 | /* "Define if you have the header" */ 49 | #undef HAVE_CURSES_H 50 | 51 | /* Define if the GNU dcgettext() function is already present or preinstalled. 52 | */ 53 | #undef HAVE_DCGETTEXT 54 | 55 | /* Define to 1 if you have the declaration of `feof_unlocked', and to 0 if you 56 | don't. */ 57 | #undef HAVE_DECL_FEOF_UNLOCKED 58 | 59 | /* Define to 1 if you have the declaration of `fgets_unlocked', and to 0 if 60 | you don't. */ 61 | #undef HAVE_DECL_FGETS_UNLOCKED 62 | 63 | /* Define to 1 if you have the declaration of `getc_unlocked', and to 0 if you 64 | don't. */ 65 | #undef HAVE_DECL_GETC_UNLOCKED 66 | 67 | /* Define to 1 if you have the declaration of `_snprintf', and to 0 if you 68 | don't. */ 69 | #undef HAVE_DECL__SNPRINTF 70 | 71 | /* Define to 1 if you have the declaration of `_snwprintf', and to 0 if you 72 | don't. */ 73 | #undef HAVE_DECL__SNWPRINTF 74 | 75 | /* Define to 1 if you have the header file. */ 76 | #undef HAVE_DLFCN_H 77 | 78 | /* Define to 1 if you have the header file. */ 79 | #undef HAVE_ERROR_H 80 | 81 | /* Define to 1 if you have the header file. */ 82 | #undef HAVE_FCNTL_H 83 | 84 | /* Define to 1 if you have the `fwprintf' function. */ 85 | #undef HAVE_FWPRINTF 86 | 87 | /* Define to 1 if you have the `getcwd' function. */ 88 | #undef HAVE_GETCWD 89 | 90 | /* Define to 1 if you have the `getegid' function. */ 91 | #undef HAVE_GETEGID 92 | 93 | /* Define to 1 if you have the `geteuid' function. */ 94 | #undef HAVE_GETEUID 95 | 96 | /* Define to 1 if you have the `getgid' function. */ 97 | #undef HAVE_GETGID 98 | 99 | /* Define to 1 if you have the `getpagesize' function. */ 100 | #undef HAVE_GETPAGESIZE 101 | 102 | /* Define if the GNU gettext() function is already present or preinstalled. */ 103 | #undef HAVE_GETTEXT 104 | 105 | /* Define to 1 if you have the `getuid' function. */ 106 | #undef HAVE_GETUID 107 | 108 | /* Define if you have the iconv() function and it works. */ 109 | #undef HAVE_ICONV 110 | 111 | /* Define if you have the 'intmax_t' type in or . */ 112 | #undef HAVE_INTMAX_T 113 | 114 | /* Define to 1 if you have the header file. */ 115 | #undef HAVE_INTTYPES_H 116 | 117 | /* Define if exists, doesn't clash with , and 118 | declares uintmax_t. */ 119 | #undef HAVE_INTTYPES_H_WITH_UINTMAX 120 | 121 | /* Define if you have and nl_langinfo(CODESET). */ 122 | #undef HAVE_LANGINFO_CODESET 123 | 124 | /* Define if your file defines LC_MESSAGES. */ 125 | #undef HAVE_LC_MESSAGES 126 | 127 | /* Define to 1 if you have the header file. */ 128 | #undef HAVE_LIBINTL_H 129 | 130 | /* Define to 1 if you have the header file. */ 131 | #undef HAVE_LIMITS_H 132 | 133 | /* Define to 1 if you have the header file. */ 134 | #undef HAVE_LOCALE_H 135 | 136 | /* Define to 1 if the system has the type `long long int'. */ 137 | #undef HAVE_LONG_LONG_INT 138 | 139 | /* Define to 1 if you have the `memchr' function. */ 140 | #undef HAVE_MEMCHR 141 | 142 | /* Define to 1 if you have the header file. */ 143 | #undef HAVE_MEMORY_H 144 | 145 | /* Define to 1 if you have the `mempcpy' function. */ 146 | #undef HAVE_MEMPCPY 147 | 148 | /* Define to 1 if you have a working `mmap' system call. */ 149 | #undef HAVE_MMAP 150 | 151 | /* Define to 1 if you have the `munmap' function. */ 152 | #undef HAVE_MUNMAP 153 | 154 | /* "Define if you have the header" */ 155 | #undef HAVE_NCURSESW_H 156 | 157 | /* Define if you have and it defines the NL_LOCALE_NAME macro if 158 | _GNU_SOURCE is defined. */ 159 | #undef HAVE_NL_LOCALE_NAME 160 | 161 | /* Define if your printf() function supports format strings with positions. */ 162 | #undef HAVE_POSIX_PRINTF 163 | 164 | /* Define if the defines PTHREAD_MUTEX_RECURSIVE. */ 165 | #undef HAVE_PTHREAD_MUTEX_RECURSIVE 166 | 167 | /* Define if the POSIX multithreading library has read/write locks. */ 168 | #undef HAVE_PTHREAD_RWLOCK 169 | 170 | /* Define to 1 if you have the `putenv' function. */ 171 | #undef HAVE_PUTENV 172 | 173 | /* "Define if you have fancy command input editing with Readline" */ 174 | #undef HAVE_READLINE 175 | 176 | /* Define to 1 if you have the `setenv' function. */ 177 | #undef HAVE_SETENV 178 | 179 | /* Define to 1 if you have the `setlocale' function. */ 180 | #undef HAVE_SETLOCALE 181 | 182 | /* Define to 1 if you have the `snprintf' function. */ 183 | #undef HAVE_SNPRINTF 184 | 185 | /* Define to 1 if you have the header file. */ 186 | #undef HAVE_STDDEF_H 187 | 188 | /* Define to 1 if you have the header file. */ 189 | #undef HAVE_STDINT_H 190 | 191 | /* Define if exists, doesn't clash with , and declares 192 | uintmax_t. */ 193 | #undef HAVE_STDINT_H_WITH_UINTMAX 194 | 195 | /* Define to 1 if you have the header file. */ 196 | #undef HAVE_STDLIB_H 197 | 198 | /* Define to 1 if you have the `stpcpy' function. */ 199 | #undef HAVE_STPCPY 200 | 201 | /* Define to 1 if you have the `strcasecmp' function. */ 202 | #undef HAVE_STRCASECMP 203 | 204 | /* Define to 1 if you have the `strchr' function. */ 205 | #undef HAVE_STRCHR 206 | 207 | /* Define to 1 if you have the `strdup' function. */ 208 | #undef HAVE_STRDUP 209 | 210 | /* Define to 1 if you have the header file. */ 211 | #undef HAVE_STRINGS_H 212 | 213 | /* Define to 1 if you have the header file. */ 214 | #undef HAVE_STRING_H 215 | 216 | /* Define to 1 if you have the `strstr' function. */ 217 | #undef HAVE_STRSTR 218 | 219 | /* Define to 1 if you have the `strtoul' function. */ 220 | #undef HAVE_STRTOUL 221 | 222 | /* Define to 1 if you have the header file. */ 223 | #undef HAVE_SYS_PARAM_H 224 | 225 | /* Define to 1 if you have the header file. */ 226 | #undef HAVE_SYS_STAT_H 227 | 228 | /* Define to 1 if you have the header file. */ 229 | #undef HAVE_SYS_TYPES_H 230 | 231 | /* Define to 1 if you have the `tsearch' function. */ 232 | #undef HAVE_TSEARCH 233 | 234 | /* Define if you have the 'uintmax_t' type in or . */ 235 | #undef HAVE_UINTMAX_T 236 | 237 | /* Define to 1 if you have the header file. */ 238 | #undef HAVE_UNISTD_H 239 | 240 | /* Define to 1 if the system has the type `unsigned long long int'. */ 241 | #undef HAVE_UNSIGNED_LONG_LONG_INT 242 | 243 | /* Define to 1 or 0, depending whether the compiler supports simple visibility 244 | declarations. */ 245 | #undef HAVE_VISIBILITY 246 | 247 | /* Define if you have the 'wchar_t' type. */ 248 | #undef HAVE_WCHAR_T 249 | 250 | /* Define to 1 if you have the `wcslen' function. */ 251 | #undef HAVE_WCSLEN 252 | 253 | /* Define if you have the 'wint_t' type. */ 254 | #undef HAVE_WINT_T 255 | 256 | /* Define to 1 if you have the `__fsetlocking' function. */ 257 | #undef HAVE___FSETLOCKING 258 | 259 | /* "Define if you use exterimental functions" */ 260 | #undef HUNSPELL_EXPERIMENTAL 261 | 262 | /* "Define if you need warning messages" */ 263 | #undef HUNSPELL_WARNING_ON 264 | 265 | /* Define as const if the declaration of iconv() needs const. */ 266 | #undef ICONV_CONST 267 | 268 | /* Define if integer division by zero raises signal SIGFPE. */ 269 | #undef INTDIV0_RAISES_SIGFPE 270 | 271 | /* Define to the sub-directory in which libtool stores uninstalled libraries. 272 | */ 273 | #undef LT_OBJDIR 274 | 275 | /* Name of package */ 276 | #undef PACKAGE 277 | 278 | /* Define to the address where bug reports for this package should be sent. */ 279 | #undef PACKAGE_BUGREPORT 280 | 281 | /* Define to the full name of this package. */ 282 | #undef PACKAGE_NAME 283 | 284 | /* Define to the full name and version of this package. */ 285 | #undef PACKAGE_STRING 286 | 287 | /* Define to the one symbol short name of this package. */ 288 | #undef PACKAGE_TARNAME 289 | 290 | /* Define to the home page for this package. */ 291 | #undef PACKAGE_URL 292 | 293 | /* Define to the version of this package. */ 294 | #undef PACKAGE_VERSION 295 | 296 | /* Define if exists and defines unusable PRI* macros. */ 297 | #undef PRI_MACROS_BROKEN 298 | 299 | /* Define if the pthread_in_use() detection is hard. */ 300 | #undef PTHREAD_IN_USE_DETECTION_HARD 301 | 302 | /* Define as the maximum value of type 'size_t', if the system doesn't define 303 | it. */ 304 | #undef SIZE_MAX 305 | 306 | /* If using the C implementation of alloca, define if you know the 307 | direction of stack growth for your system; otherwise it will be 308 | automatically deduced at runtime. 309 | STACK_DIRECTION > 0 => grows toward higher addresses 310 | STACK_DIRECTION < 0 => grows toward lower addresses 311 | STACK_DIRECTION = 0 => direction of growth unknown */ 312 | #undef STACK_DIRECTION 313 | 314 | /* Define to 1 if you have the ANSI C header files. */ 315 | #undef STDC_HEADERS 316 | 317 | /* Define if the POSIX multithreading library can be used. */ 318 | #undef USE_POSIX_THREADS 319 | 320 | /* Define if references to the POSIX multithreading library should be made 321 | weak. */ 322 | #undef USE_POSIX_THREADS_WEAK 323 | 324 | /* Define if the GNU Pth multithreading library can be used. */ 325 | #undef USE_PTH_THREADS 326 | 327 | /* Define if references to the GNU Pth multithreading library should be made 328 | weak. */ 329 | #undef USE_PTH_THREADS_WEAK 330 | 331 | /* Define if the old Solaris multithreading library can be used. */ 332 | #undef USE_SOLARIS_THREADS 333 | 334 | /* Define if references to the old Solaris multithreading library should be 335 | made weak. */ 336 | #undef USE_SOLARIS_THREADS_WEAK 337 | 338 | /* Enable extensions on AIX 3, Interix. */ 339 | #ifndef _ALL_SOURCE 340 | # undef _ALL_SOURCE 341 | #endif 342 | /* Enable GNU extensions on systems that have them. */ 343 | #ifndef _GNU_SOURCE 344 | # undef _GNU_SOURCE 345 | #endif 346 | /* Enable threading extensions on Solaris. */ 347 | #ifndef _POSIX_PTHREAD_SEMANTICS 348 | # undef _POSIX_PTHREAD_SEMANTICS 349 | #endif 350 | /* Enable extensions on HP NonStop. */ 351 | #ifndef _TANDEM_SOURCE 352 | # undef _TANDEM_SOURCE 353 | #endif 354 | /* Enable general extensions on Solaris. */ 355 | #ifndef __EXTENSIONS__ 356 | # undef __EXTENSIONS__ 357 | #endif 358 | 359 | 360 | /* Define if the Win32 multithreading API can be used. */ 361 | #undef USE_WIN32_THREADS 362 | 363 | /* Version number of package */ 364 | #undef VERSION 365 | 366 | /* Define to 1 if on MINIX. */ 367 | #undef _MINIX 368 | 369 | /* Define to 2 if the system does not provide POSIX.1 features except with 370 | this defined. */ 371 | #undef _POSIX_1_SOURCE 372 | 373 | /* Define to 1 if you need to in order for `stat' and other things to work. */ 374 | #undef _POSIX_SOURCE 375 | 376 | /* Define to empty if `const' does not conform to ANSI C. */ 377 | #undef const 378 | 379 | /* Define to `__inline__' or `__inline' if that's what the C compiler 380 | calls it, or to nothing if 'inline' is not supported under any name. */ 381 | #ifndef __cplusplus 382 | #undef inline 383 | #endif 384 | 385 | /* Define as the type of the result of subtracting two pointers, if the system 386 | doesn't define it. */ 387 | #undef ptrdiff_t 388 | 389 | /* Define to `unsigned int' if does not define. */ 390 | #undef size_t 391 | 392 | /* Define to unsigned long or unsigned long long if and 393 | don't define. */ 394 | #undef uintmax_t 395 | 396 | 397 | #define __libc_lock_t gl_lock_t 398 | #define __libc_lock_define gl_lock_define 399 | #define __libc_lock_define_initialized gl_lock_define_initialized 400 | #define __libc_lock_init gl_lock_init 401 | #define __libc_lock_lock gl_lock_lock 402 | #define __libc_lock_unlock gl_lock_unlock 403 | #define __libc_lock_recursive_t gl_recursive_lock_t 404 | #define __libc_lock_define_recursive gl_recursive_lock_define 405 | #define __libc_lock_define_initialized_recursive gl_recursive_lock_define_initialized 406 | #define __libc_lock_init_recursive gl_recursive_lock_init 407 | #define __libc_lock_lock_recursive gl_recursive_lock_lock 408 | #define __libc_lock_unlock_recursive gl_recursive_lock_unlock 409 | #define glthread_in_use libintl_thread_in_use 410 | #define glthread_lock_init libintl_lock_init 411 | #define glthread_lock_lock libintl_lock_lock 412 | #define glthread_lock_unlock libintl_lock_unlock 413 | #define glthread_lock_destroy libintl_lock_destroy 414 | #define glthread_rwlock_init libintl_rwlock_init 415 | #define glthread_rwlock_rdlock libintl_rwlock_rdlock 416 | #define glthread_rwlock_wrlock libintl_rwlock_wrlock 417 | #define glthread_rwlock_unlock libintl_rwlock_unlock 418 | #define glthread_rwlock_destroy libintl_rwlock_destroy 419 | #define glthread_recursive_lock_init libintl_recursive_lock_init 420 | #define glthread_recursive_lock_lock libintl_recursive_lock_lock 421 | #define glthread_recursive_lock_unlock libintl_recursive_lock_unlock 422 | #define glthread_recursive_lock_destroy libintl_recursive_lock_destroy 423 | #define glthread_once libintl_once 424 | #define glthread_once_call libintl_once_call 425 | #define glthread_once_singlethreaded libintl_once_singlethreaded 426 | 427 | -------------------------------------------------------------------------------- /src/Nodehun.cc: -------------------------------------------------------------------------------- 1 | #include "Nodehun.h" 2 | #include 3 | #include 4 | #include "Async/AddDictionaryWorker.cc" 5 | #include "Async/SpellWorker.cc" 6 | #include "Async/SuggestWorker.cc" 7 | #include "Async/AnalyzeWorker.cc" 8 | #include "Async/StemWorker.cc" 9 | #include "Async/GenerateWorker.cc" 10 | #include "Async/AddWorker.cc" 11 | #include "Async/AddWithAffixWorker.cc" 12 | #include "Async/RemoveWorker.cc" 13 | 14 | const std::string INVALID_NUMBER_OF_ARGUMENTS = "Invalid number of arguments."; 15 | const std::string INVALID_FIRST_ARGUMENT = "First argument is invalid (incorrect type)."; 16 | const std::string INVALID_SECOND_ARGUMENT = "Second argument is invalid (incorrect type)."; 17 | const std::string INVALID_CONSTRUCTOR_CALL = "Use the new operator to create an instance of this object."; 18 | 19 | // LOGGING 20 | // #include 21 | // #include 22 | // std::ofstream logFile("log.txt"); 23 | 24 | Napi::FunctionReference Nodehun::constructor; 25 | 26 | Napi::Object Nodehun::Init(Napi::Env env, Napi::Object exports) { 27 | Napi::HandleScope scope(env); 28 | 29 | Napi::Function func = DefineClass(env, "Nodehun", { 30 | InstanceMethod("addDictionary", &Nodehun::addDictionary), 31 | InstanceMethod("addDictionarySync", &Nodehun::addDictionarySync), 32 | InstanceMethod("spell", &Nodehun::spell), 33 | InstanceMethod("spellSync", &Nodehun::spellSync), 34 | InstanceMethod("suggest", &Nodehun::suggest), 35 | InstanceMethod("suggestSync", &Nodehun::suggestSync), 36 | InstanceMethod("analyze", &Nodehun::analyze), 37 | InstanceMethod("analyzeSync", &Nodehun::analyzeSync), 38 | InstanceMethod("stem", &Nodehun::stem), 39 | InstanceMethod("stemSync", &Nodehun::stemSync), 40 | InstanceMethod("generate", &Nodehun::generate), 41 | InstanceMethod("generateSync", &Nodehun::generateSync), 42 | InstanceMethod("add", &Nodehun::add), 43 | InstanceMethod("addSync", &Nodehun::addSync), 44 | InstanceMethod("addWithAffix", &Nodehun::addWithAffix), 45 | InstanceMethod("addWithAffixSync", &Nodehun::addWithAffixSync), 46 | InstanceMethod("remove", &Nodehun::remove), 47 | InstanceMethod("removeSync", &Nodehun::removeSync), 48 | InstanceMethod("getDictionaryEncoding", &Nodehun::getDictionaryEncoding), 49 | InstanceMethod("getWordCharacters", &Nodehun::getWordCharacters), 50 | InstanceMethod("getWordCharactersUTF16", &Nodehun::getWordCharactersUTF16), 51 | InstanceMethod("getVersion", &Nodehun::getVersion) 52 | }); 53 | 54 | constructor = Napi::Persistent(func); 55 | constructor.SuppressDestruct(); 56 | 57 | exports.Set("Nodehun", func); 58 | return exports; 59 | } 60 | 61 | Nodehun::Nodehun(const Napi::CallbackInfo& info) : Napi::ObjectWrap(info) { 62 | Napi::Env env = info.Env(); 63 | Napi::HandleScope scope(env); 64 | 65 | Napi::Buffer affixBuffer = info[0].As>(); 66 | Napi::Buffer dictionaryBuffer = info[1].As>(); 67 | 68 | context = new HunspellContext(new Hunspell(affixBuffer.Data(), dictionaryBuffer.Data(), NULL, true)); 69 | }; 70 | 71 | Nodehun::~Nodehun() { 72 | if (context) { 73 | delete context; 74 | context = NULL; 75 | } 76 | } 77 | 78 | Napi::Object Nodehun::NewInstance(const Napi::CallbackInfo& info) { 79 | Napi::Env env = info.Env(); 80 | Napi::EscapableHandleScope scope(env); 81 | 82 | if (info.Length() != 2) { 83 | Napi::Error::New(env, INVALID_NUMBER_OF_ARGUMENTS).ThrowAsJavaScriptException(); 84 | } else if (!info[0].IsBuffer()) { 85 | Napi::Error::New(env, INVALID_FIRST_ARGUMENT).ThrowAsJavaScriptException(); 86 | } else if (!info[1].IsBuffer()) { 87 | Napi::Error::New(env, INVALID_SECOND_ARGUMENT).ThrowAsJavaScriptException(); 88 | } else if (!info.IsConstructCall()) { 89 | Napi::Error::New(env, INVALID_CONSTRUCTOR_CALL).ThrowAsJavaScriptException(); 90 | } 91 | 92 | if (env.IsExceptionPending()) { 93 | return Napi::Object::New(env); 94 | } else { 95 | Napi::Object obj = constructor.New({info[0], info[1]}); 96 | 97 | return scope.Escape(napi_value(obj)).ToObject(); 98 | } 99 | } 100 | 101 | Napi::Value Nodehun::addDictionarySync(const Napi::CallbackInfo& info) { 102 | Napi::Env env = info.Env(); 103 | 104 | if (info.Length() != 1) { 105 | Napi::Error error = Napi::Error::New(env, INVALID_NUMBER_OF_ARGUMENTS); 106 | error.ThrowAsJavaScriptException(); 107 | return error.Value(); 108 | } else if (!info[0].IsBuffer()) { 109 | Napi::Error error = Napi::Error::New(env, INVALID_FIRST_ARGUMENT); 110 | error.ThrowAsJavaScriptException(); 111 | return error.Value(); 112 | } else { 113 | Napi::Buffer dictionaryBuffer = info[0].As>(); 114 | 115 | std::string dictionary(dictionaryBuffer.Data(), dictionaryBuffer.Length()); 116 | context->instance->add_dic(dictionary.c_str()); 117 | 118 | return env.Undefined(); 119 | } 120 | } 121 | 122 | Napi::Value Nodehun::addDictionary(const Napi::CallbackInfo& info) { 123 | Napi::Env env = info.Env(); 124 | 125 | Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env()); 126 | 127 | if (info.Length() != 1) { 128 | Napi::Error error = Napi::Error::New(env, INVALID_NUMBER_OF_ARGUMENTS); 129 | deferred.Reject(error.Value()); 130 | } else if (!info[0].IsBuffer()) { 131 | Napi::Error error = Napi::Error::New(env, INVALID_FIRST_ARGUMENT); 132 | deferred.Reject(error.Value()); 133 | } else { 134 | Napi::Buffer dictionaryBuffer = info[0].As>(); 135 | 136 | AddDictionaryWorker* worker = new AddDictionaryWorker( 137 | context, 138 | deferred, 139 | std::string(dictionaryBuffer.Data(), dictionaryBuffer.Length()) 140 | ); 141 | 142 | worker->Queue(); 143 | } 144 | 145 | return deferred.Promise(); 146 | } 147 | 148 | Napi::Value Nodehun::spell(const Napi::CallbackInfo& info) { 149 | Napi::Env env = info.Env(); 150 | 151 | Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(env); 152 | 153 | if (info.Length() != 1) { 154 | Napi::Error error = Napi::Error::New(env, INVALID_NUMBER_OF_ARGUMENTS); 155 | deferred.Reject(error.Value()); 156 | } else if (!info[0].IsString()) { 157 | Napi::Error error = Napi::Error::New(env, INVALID_FIRST_ARGUMENT); 158 | deferred.Reject(error.Value()); 159 | } else { 160 | std::string word = info[0].ToString().Utf8Value(); 161 | 162 | SpellWorker* worker = new SpellWorker( 163 | context, 164 | deferred, 165 | word 166 | ); 167 | 168 | worker->Queue(); 169 | } 170 | 171 | return deferred.Promise(); 172 | } 173 | 174 | Napi::Value Nodehun::spellSync(const Napi::CallbackInfo& info) { 175 | Napi::Env env = info.Env(); 176 | Napi::HandleScope scope(env); 177 | 178 | if (info.Length() != 1) { 179 | Napi::Error error = Napi::Error::New(env, INVALID_NUMBER_OF_ARGUMENTS); 180 | error.ThrowAsJavaScriptException(); 181 | return error.Value(); 182 | } else if (!info[0].IsString()) { 183 | Napi::Error error = Napi::Error::New(env, INVALID_FIRST_ARGUMENT); 184 | error.ThrowAsJavaScriptException(); 185 | return error.Value(); 186 | } else { 187 | std::string word = info[0].ToString().Utf8Value(); 188 | 189 | context->lockRead(); 190 | bool correct = context->instance->spell(word.c_str()); 191 | context->unlockRead(); 192 | 193 | return Napi::Boolean::New(env, correct); 194 | } 195 | } 196 | 197 | Napi::Value Nodehun::suggest(const Napi::CallbackInfo& info) { 198 | Napi::Env env = info.Env(); 199 | 200 | Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(env); 201 | 202 | if (info.Length() != 1) { 203 | Napi::Error error = Napi::Error::New(env, INVALID_NUMBER_OF_ARGUMENTS); 204 | deferred.Reject(error.Value()); 205 | } else if (!info[0].IsString()) { 206 | Napi::Error error = Napi::Error::New(env, INVALID_FIRST_ARGUMENT); 207 | deferred.Reject(error.Value()); 208 | } else { 209 | std::string word = info[0].ToString().Utf8Value(); 210 | 211 | SuggestWorker* worker = new SuggestWorker( 212 | context, 213 | deferred, 214 | word 215 | ); 216 | 217 | worker->Queue(); 218 | } 219 | 220 | return deferred.Promise(); 221 | } 222 | 223 | Napi::Value Nodehun::suggestSync(const Napi::CallbackInfo& info) { 224 | Napi::Env env = info.Env(); 225 | 226 | if (info.Length() != 1) { 227 | Napi::Error error = Napi::Error::New(env, INVALID_NUMBER_OF_ARGUMENTS); 228 | error.ThrowAsJavaScriptException(); 229 | return error.Value(); 230 | } else if (!info[0].IsString()) { 231 | Napi::Error error = Napi::Error::New(env, INVALID_FIRST_ARGUMENT); 232 | error.ThrowAsJavaScriptException(); 233 | return error.Value(); 234 | } else { 235 | std::string word = info[0].ToString().Utf8Value(); 236 | 237 | context->lockRead(); 238 | bool isCorrect = this->context->instance->spell(word.c_str()); 239 | 240 | if (isCorrect) { 241 | context->unlockRead(); 242 | return env.Null(); 243 | } 244 | 245 | char** suggestions = NULL; 246 | int length = this->context->instance->suggest(&suggestions, word.c_str()); 247 | context->unlockRead(); 248 | 249 | Napi::Array array = Napi::Array::New(env, length); 250 | for (int i = 0; i < length; i++) { 251 | array.Set(i, Napi::String::New(env, suggestions[i])); 252 | } 253 | 254 | this->context->instance->free_list(&suggestions, length); 255 | 256 | return array; 257 | } 258 | } 259 | 260 | Napi::Value Nodehun::analyze(const Napi::CallbackInfo& info) { 261 | Napi::Env env = info.Env(); 262 | 263 | Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(env); 264 | 265 | if (info.Length() != 1) { 266 | Napi::Error error = Napi::Error::New(env, INVALID_NUMBER_OF_ARGUMENTS); 267 | deferred.Reject(error.Value()); 268 | } else if (!info[0].IsString()) { 269 | Napi::Error error = Napi::Error::New(env, INVALID_FIRST_ARGUMENT); 270 | deferred.Reject(error.Value()); 271 | } else { 272 | std::string word = info[0].ToString().Utf8Value(); 273 | 274 | AnalyzeWorker* worker = new AnalyzeWorker( 275 | context, 276 | deferred, 277 | word 278 | ); 279 | 280 | worker->Queue(); 281 | } 282 | 283 | return deferred.Promise(); 284 | } 285 | 286 | Napi::Value Nodehun::analyzeSync(const Napi::CallbackInfo& info) { 287 | Napi::Env env = info.Env(); 288 | 289 | if (info.Length() != 1) { 290 | Napi::Error error = Napi::Error::New(env, INVALID_NUMBER_OF_ARGUMENTS); 291 | error.ThrowAsJavaScriptException(); 292 | return error.Value(); 293 | } else if (!info[0].IsString()) { 294 | Napi::Error error = Napi::Error::New(env, INVALID_FIRST_ARGUMENT); 295 | error.ThrowAsJavaScriptException(); 296 | return error.Value(); 297 | } else { 298 | std::string word = info[0].ToString().Utf8Value(); 299 | 300 | char** analysis = NULL; 301 | this->context->lockRead(); 302 | int length = this->context->instance->analyze(&analysis, word.c_str()); 303 | this->context->unlockRead(); 304 | 305 | Napi::Array array = Napi::Array::New(env, length); 306 | for (int i = 0; i < length; i++) { 307 | array.Set(i, Napi::String::New(env, analysis[i])); 308 | } 309 | 310 | context->instance->free_list(&analysis, length); 311 | 312 | return array; 313 | } 314 | } 315 | 316 | Napi::Value Nodehun::stem(const Napi::CallbackInfo& info) { 317 | Napi::Env env = info.Env(); 318 | 319 | Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(env); 320 | 321 | if (info.Length() != 1) { 322 | Napi::Error error = Napi::Error::New(env, INVALID_NUMBER_OF_ARGUMENTS); 323 | deferred.Reject(error.Value()); 324 | } else if (!info[0].IsString()) { 325 | Napi::Error error = Napi::Error::New(env, INVALID_FIRST_ARGUMENT); 326 | deferred.Reject(error.Value()); 327 | } else { 328 | std::string word = info[0].ToString().Utf8Value(); 329 | 330 | StemWorker* worker = new StemWorker( 331 | context, 332 | deferred, 333 | word 334 | ); 335 | 336 | worker->Queue(); 337 | } 338 | 339 | return deferred.Promise(); 340 | } 341 | 342 | Napi::Value Nodehun::stemSync(const Napi::CallbackInfo& info) { 343 | Napi::Env env = info.Env(); 344 | 345 | if (info.Length() != 1) { 346 | Napi::Error error = Napi::Error::New(env, INVALID_NUMBER_OF_ARGUMENTS); 347 | error.ThrowAsJavaScriptException(); 348 | return error.Value(); 349 | } else if (!info[0].IsString()) { 350 | Napi::Error error = Napi::Error::New(env, INVALID_FIRST_ARGUMENT); 351 | error.ThrowAsJavaScriptException(); 352 | return error.Value(); 353 | } else { 354 | std::string word = info[0].ToString().Utf8Value(); 355 | 356 | char** stems = NULL; 357 | context->lockRead(); 358 | int length = this->context->instance->stem(&stems, word.c_str()); 359 | context->unlockRead(); 360 | 361 | Napi::Array array = Napi::Array::New(env, length); 362 | for (int i = 0; i < length; i++) { 363 | array.Set(i, Napi::String::New(env, stems[i])); 364 | } 365 | 366 | context->instance->free_list(&stems, length); 367 | 368 | return array; 369 | } 370 | } 371 | 372 | Napi::Value Nodehun::generate(const Napi::CallbackInfo& info) { 373 | Napi::Env env = info.Env(); 374 | 375 | Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(env); 376 | 377 | if (info.Length() != 2) { 378 | Napi::Error error = Napi::Error::New(env, INVALID_NUMBER_OF_ARGUMENTS); 379 | deferred.Reject(error.Value()); 380 | } else if (!info[0].IsString()) { 381 | Napi::Error error = Napi::Error::New(env, INVALID_FIRST_ARGUMENT); 382 | deferred.Reject(error.Value()); 383 | } else if (!info[1].IsString()) { 384 | Napi::Error error = Napi::Error::New(env, INVALID_SECOND_ARGUMENT); 385 | deferred.Reject(error.Value()); 386 | } else { 387 | std::string word = info[0].ToString().Utf8Value(); 388 | std::string example = info[1].ToString().Utf8Value(); 389 | 390 | GenerateWorker* worker = new GenerateWorker( 391 | context, 392 | deferred, 393 | word, 394 | example 395 | ); 396 | 397 | worker->Queue(); 398 | } 399 | 400 | return deferred.Promise(); 401 | } 402 | 403 | Napi::Value Nodehun::generateSync(const Napi::CallbackInfo& info) { 404 | Napi::Env env = info.Env(); 405 | 406 | if (info.Length() != 2) { 407 | Napi::Error error = Napi::Error::New(env, INVALID_NUMBER_OF_ARGUMENTS); 408 | error.ThrowAsJavaScriptException(); 409 | return error.Value(); 410 | } else if (!info[0].IsString()) { 411 | Napi::Error error = Napi::Error::New(env, INVALID_FIRST_ARGUMENT); 412 | error.ThrowAsJavaScriptException(); 413 | return error.Value(); 414 | } else if (!info[1].IsString()) { 415 | Napi::Error error = Napi::Error::New(env, INVALID_SECOND_ARGUMENT); 416 | error.ThrowAsJavaScriptException(); 417 | return error.Value(); 418 | } else { 419 | std::string word = info[0].ToString().Utf8Value(); 420 | std::string example = info[1].ToString().Utf8Value(); 421 | 422 | char** generates = NULL; 423 | context->lockRead(); 424 | int length = this->context->instance->generate( 425 | &generates, 426 | word.c_str(), 427 | example.c_str() 428 | ); 429 | context->unlockRead(); 430 | 431 | Napi::Array array = Napi::Array::New(env, length); 432 | for (int i = 0; i < length; i++) { 433 | array.Set(i, Napi::String::New(env, generates[i])); 434 | } 435 | 436 | context->instance->free_list(&generates, length); 437 | 438 | return array; 439 | } 440 | } 441 | 442 | Napi::Value Nodehun::addSync(const Napi::CallbackInfo& info) { 443 | Napi::Env env = info.Env(); 444 | 445 | if (info.Length() != 1) { 446 | Napi::Error error = Napi::Error::New(env, INVALID_NUMBER_OF_ARGUMENTS); 447 | error.ThrowAsJavaScriptException(); 448 | 449 | return error.Value(); 450 | } else if (!info[0].IsString()) { 451 | Napi::Error error = Napi::Error::New(env, INVALID_FIRST_ARGUMENT); 452 | error.ThrowAsJavaScriptException(); 453 | 454 | return error.Value(); 455 | } else { 456 | std::string word = info[0].ToString().Utf8Value(); 457 | 458 | context->lockWrite(); 459 | context->instance->add(word.c_str()); 460 | context->unlockWrite(); 461 | 462 | return env.Undefined(); 463 | } 464 | 465 | } 466 | 467 | Napi::Value Nodehun::add(const Napi::CallbackInfo& info) { 468 | Napi::Env env = info.Env(); 469 | 470 | Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env()); 471 | 472 | if (info.Length() != 1) { 473 | Napi::Error error = Napi::Error::New(env, INVALID_NUMBER_OF_ARGUMENTS); 474 | deferred.Reject(error.Value()); 475 | } else if (!info[0].IsString()) { 476 | Napi::Error error = Napi::Error::New(env, INVALID_FIRST_ARGUMENT); 477 | deferred.Reject(error.Value()); 478 | } else { 479 | std::string word = info[0].ToString().Utf8Value(); 480 | 481 | AddWorker* worker = new AddWorker( 482 | context, 483 | deferred, 484 | word 485 | ); 486 | 487 | worker->Queue(); 488 | } 489 | 490 | return deferred.Promise(); 491 | } 492 | 493 | Napi::Value Nodehun::addWithAffixSync(const Napi::CallbackInfo& info) { 494 | Napi::Env env = info.Env(); 495 | 496 | if (info.Length() != 2) { 497 | Napi::Error error = Napi::Error::New(env, INVALID_NUMBER_OF_ARGUMENTS); 498 | error.ThrowAsJavaScriptException(); 499 | 500 | return error.Value(); 501 | } else if (!info[0].IsString()) { 502 | Napi::Error error = Napi::Error::New(env, INVALID_FIRST_ARGUMENT); 503 | error.ThrowAsJavaScriptException(); 504 | 505 | return error.Value(); 506 | } else if (!info[1].IsString()) { 507 | Napi::Error error = Napi::Error::New(env, INVALID_SECOND_ARGUMENT); 508 | error.ThrowAsJavaScriptException(); 509 | 510 | return error.Value(); 511 | } else { 512 | std::string word = info[0].ToString().Utf8Value(); 513 | std::string example = info[1].ToString().Utf8Value(); 514 | 515 | context->lockWrite(); 516 | context->instance->add_with_affix(word.c_str(), example.c_str()); 517 | context->unlockWrite(); 518 | 519 | return env.Undefined(); 520 | } 521 | 522 | } 523 | 524 | Napi::Value Nodehun::addWithAffix(const Napi::CallbackInfo& info) { 525 | Napi::Env env = info.Env(); 526 | 527 | Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env()); 528 | 529 | if (info.Length() != 2) { 530 | Napi::Error error = Napi::Error::New(env, INVALID_NUMBER_OF_ARGUMENTS); 531 | deferred.Reject(error.Value()); 532 | } else if (!info[0].IsString()) { 533 | Napi::Error error = Napi::Error::New(env, INVALID_FIRST_ARGUMENT); 534 | deferred.Reject(error.Value()); 535 | } else if (!info[1].IsString()) { 536 | Napi::Error error = Napi::Error::New(env, INVALID_SECOND_ARGUMENT); 537 | deferred.Reject(error.Value()); 538 | } else { 539 | std::string word = info[0].ToString().Utf8Value(); 540 | std::string example = info[1].ToString().Utf8Value(); 541 | 542 | AddWithAffixWorker* worker = new AddWithAffixWorker( 543 | context, 544 | deferred, 545 | word, 546 | example 547 | ); 548 | 549 | worker->Queue(); 550 | } 551 | 552 | return deferred.Promise(); 553 | } 554 | 555 | Napi::Value Nodehun::removeSync(const Napi::CallbackInfo& info) { 556 | Napi::Env env = info.Env(); 557 | 558 | if (info.Length() != 1) { 559 | Napi::Error error = Napi::Error::New(env, INVALID_NUMBER_OF_ARGUMENTS); 560 | error.ThrowAsJavaScriptException(); 561 | 562 | return error.Value(); 563 | } else if (!info[0].IsString()) { 564 | Napi::Error error = Napi::Error::New(env, INVALID_FIRST_ARGUMENT); 565 | error.ThrowAsJavaScriptException(); 566 | 567 | return error.Value(); 568 | } else { 569 | std::string word = info[0].ToString().Utf8Value(); 570 | 571 | context->lockWrite(); 572 | context->instance->remove(word.c_str()); 573 | context->unlockWrite(); 574 | 575 | return env.Undefined(); 576 | } 577 | 578 | } 579 | 580 | Napi::Value Nodehun::remove(const Napi::CallbackInfo& info) { 581 | Napi::Env env = info.Env(); 582 | 583 | Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env()); 584 | 585 | if (info.Length() != 1) { 586 | Napi::Error error = Napi::Error::New(env, INVALID_NUMBER_OF_ARGUMENTS); 587 | deferred.Reject(error.Value()); 588 | } else if (!info[0].IsString()) { 589 | Napi::Error error = Napi::Error::New(env, INVALID_FIRST_ARGUMENT); 590 | deferred.Reject(error.Value()); 591 | } else { 592 | std::string word = info[0].ToString().Utf8Value(); 593 | 594 | RemoveWorker* worker = new RemoveWorker( 595 | context, 596 | deferred, 597 | word 598 | ); 599 | 600 | worker->Queue(); 601 | } 602 | 603 | return deferred.Promise(); 604 | } 605 | 606 | Napi::Value Nodehun::getDictionaryEncoding(const Napi::CallbackInfo& info) { 607 | Napi::Env env = info.Env(); 608 | 609 | if (info.Length() > 0) { 610 | Napi::Error error = Napi::Error::New(env, INVALID_NUMBER_OF_ARGUMENTS); 611 | error.ThrowAsJavaScriptException(); 612 | return error.Value(); 613 | } 614 | 615 | char* encoding = this->context->instance->get_dic_encoding(); 616 | 617 | if (encoding == NULL) { 618 | return env.Undefined(); 619 | } else { 620 | return Napi::String::New(env, encoding); 621 | } 622 | 623 | } 624 | 625 | Napi::Value Nodehun::getWordCharacters(const Napi::CallbackInfo& info) { 626 | Napi::Env env = info.Env(); 627 | 628 | if (info.Length() > 0) { 629 | Napi::Error error = Napi::Error::New(env, INVALID_NUMBER_OF_ARGUMENTS); 630 | error.ThrowAsJavaScriptException(); 631 | return error.Value(); 632 | } 633 | 634 | const char* wordCharacters = this->context->instance->get_wordchars(); 635 | 636 | if (wordCharacters == NULL) { 637 | return env.Undefined(); 638 | } else { 639 | return Napi::String::New(env, wordCharacters); 640 | } 641 | } 642 | 643 | Napi::Value Nodehun::getWordCharactersUTF16(const Napi::CallbackInfo& info) { 644 | Napi::Env env = info.Env(); 645 | 646 | if (info.Length() > 0) { 647 | Napi::Error error = Napi::Error::New(env, INVALID_NUMBER_OF_ARGUMENTS); 648 | error.ThrowAsJavaScriptException(); 649 | return error.Value(); 650 | } 651 | 652 | int length = 0; 653 | unsigned short* chars = this->context->instance->get_wordchars_utf16(&length); 654 | 655 | if (chars == NULL) { 656 | return env.Undefined(); 657 | } else { 658 | return Napi::String::New(env, ((char16_t*) chars), (size_t)length); 659 | } 660 | } 661 | 662 | Napi::Value Nodehun::getVersion(const Napi::CallbackInfo& info) { 663 | Napi::Env env = info.Env(); 664 | 665 | if (info.Length() > 0) { 666 | Napi::Error error = Napi::Error::New(env, INVALID_NUMBER_OF_ARGUMENTS); 667 | error.ThrowAsJavaScriptException(); 668 | return error.Value(); 669 | } 670 | 671 | const char* v = this->context->instance->get_version(); 672 | 673 | if (v == NULL) { 674 | return env.Undefined(); 675 | } else { 676 | return Napi::String::New(env, v); 677 | } 678 | } 679 | --------------------------------------------------------------------------------