├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .npmignore ├── LICENSE.md ├── README.md ├── binding.gyp ├── lib └── spellchecker.js ├── package-lock.json ├── package.json ├── spec ├── dictionaries │ ├── de_DE.aff │ ├── de_DE.dic │ ├── de_DE_frami.aff │ ├── de_DE_frami.dic │ ├── en_US.aff │ ├── en_US.dic │ ├── fr.aff │ ├── fr.dic │ ├── hyph_de_DE.dic │ └── hyph_fr.dic └── spellchecker-spec.coffee ├── src ├── buffers.h ├── main.cc ├── spellchecker.h ├── spellchecker_hunspell.cc ├── spellchecker_hunspell.h ├── spellchecker_linux.cc ├── spellchecker_mac.h ├── spellchecker_mac.mm ├── spellchecker_win.cc ├── spellchecker_win.h ├── transcoder.h ├── transcoder_posix.cc ├── transcoder_win.cc ├── worker.cc └── worker.h └── vendor ├── hunspell ├── AUTHORS ├── BUGS ├── COPYING ├── COPYING.LGPL ├── COPYING.MPL ├── ChangeLog ├── README ├── THANKS └── src │ ├── hunspell │ ├── README │ ├── affentry.cxx │ ├── affentry.hxx │ ├── affixmgr.cxx │ ├── affixmgr.hxx │ ├── atypes.hxx │ ├── baseaffix.hxx │ ├── csutil.cxx │ ├── csutil.hxx │ ├── dictmgr.cxx │ ├── dictmgr.hxx │ ├── filemgr.cxx │ ├── filemgr.hxx │ ├── hashmgr.cxx │ ├── hashmgr.hxx │ ├── htypes.hxx │ ├── hunspell.cxx │ ├── hunspell.dsp │ ├── hunspell.h │ ├── hunspell.hxx │ ├── hunvisapi.h │ ├── hunvisapi.h.in │ ├── hunzip.cxx │ ├── hunzip.hxx │ ├── langnum.hxx │ ├── license.hunspell │ ├── license.myspell │ ├── phonet.cxx │ ├── phonet.hxx │ ├── replist.cxx │ ├── replist.hxx │ ├── suggestmgr.cxx │ ├── suggestmgr.hxx │ ├── utf_info.cxx │ └── w_char.hxx │ └── parsers │ ├── firstparser.cxx │ ├── firstparser.hxx │ ├── htmlparser.cxx │ ├── htmlparser.hxx │ ├── latexparser.cxx │ ├── latexparser.hxx │ ├── manparser.cxx │ ├── manparser.hxx │ ├── testparser.cxx │ ├── textparser.cxx │ └── textparser.hxx └── hunspell_dictionaries ├── README.txt ├── en_US.aff └── en_US.dic /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push] 4 | 5 | env: 6 | CI: true 7 | 8 | jobs: 9 | Test: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v1 13 | - uses: actions/setup-node@v2 14 | with: 15 | node-version: '14' 16 | - name: Install dependencies 17 | run: npm i 18 | - name: Run tests 19 | run: npm test 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | build/ 3 | node_modules/ 4 | npm-debug.log 5 | 6 | package-lock.json 7 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | build/ 2 | spec/ 3 | .npmignore 4 | .gitignore 5 | .DS_Store 6 | npm-debug.log 7 | appveyor.yml 8 | .travis.yml 9 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 GitHub Inc. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ##### Atom and all repositories under Atom will be archived on December 15, 2022. Learn more in our [official announcement](https://github.blog/2022-06-08-sunsetting-atom/) 2 | # SpellChecker Node Module [![CI](https://github.com/atom/node-spellchecker/actions/workflows/ci.yml/badge.svg)](https://github.com/atom/node-spellchecker/actions/workflows/ci.yml) 3 | 4 | Native bindings to [NSSpellChecker](https://developer.apple.com/library/mac/#documentation/cocoa/reference/ApplicationKit/Classes/NSSpellChecker_Class/Reference/Reference.html), [Hunspell](http://hunspell.sourceforge.net/), or the [Windows 8 Spell Check API](https://msdn.microsoft.com/en-us/library/windows/desktop/hh869853(v=vs.85).aspx), depending on your platform. Windows 7 and below as well as Linux will rely on Hunspell. 5 | 6 | ## Installing 7 | 8 | ```bash 9 | npm install spellchecker 10 | ``` 11 | 12 | ## Using 13 | 14 | ```coffeescript 15 | SpellChecker = require 'spellchecker' 16 | ``` 17 | 18 | ### SpellChecker.isMisspelled(word) 19 | 20 | Check if a word is misspelled. 21 | 22 | `word` - String word to check. 23 | 24 | Returns `true` if the word is misspelled, `false` otherwise. 25 | 26 | ### SpellChecker.getCorrectionsForMisspelling(word) 27 | 28 | Get the corrections for a misspelled word. 29 | 30 | `word` - String word to get corrections for. 31 | 32 | Returns a non-null but possibly empty array of string corrections. 33 | 34 | ### SpellChecker.checkSpelling(corpus) 35 | 36 | Identify misspelled words in a corpus of text. 37 | 38 | `corpus` - String corpus of text to spellcheck. 39 | 40 | Returns an Array containing `{start, end}` objects that describe an index range within the original String that contains a misspelled word. 41 | 42 | ### SpellChecker.checkSpellingAsync(corpus) 43 | 44 | Asynchronously identify misspelled words. 45 | 46 | `corpus` - String corpus of text to spellcheck. 47 | 48 | Returns a Promise that resolves with the Array described by `checkSpelling()`. 49 | 50 | ### SpellChecker.add(word) 51 | 52 | Adds a word to the dictionary. 53 | When using Hunspell, this will not modify the .dic file; new words must be added each time the spellchecker is created. Use a custom dictionary file. 54 | 55 | `word` - String word to add. 56 | 57 | Returns nothing. 58 | 59 | ### new Spellchecker() 60 | 61 | In addition to the above functions that are used on a default instance, a new instance of SpellChecker can be instantiated with the use of the `new` operator. The same methods are available with the instance but the dictionary and underlying API can be changed independently from the default instance. 62 | 63 | ```javascript 64 | const checker = new SpellChecker.Spellchecker() 65 | ``` 66 | 67 | #### SpellChecker.Spellchecker.setSpellcheckerType(type) 68 | 69 | Overrides the library selection for checking. Without this, the checker will use [Hunspell](http://hunspell.github.io/) on Linux, the [Spell Checking API](https://docs.microsoft.com/en-us/windows/desktop/intl/spell-checker-api) for Windows, and [NSSpellChecker](https://developer.apple.com/documentation/appkit/nsspellchecker) on Macs. 70 | 71 | If the environment variable `SPELLCHECKER_PREFER_HUNSPELL` is set to any value, the library will fallback to always using the Hunspell implementation. 72 | 73 | This is the same behavior as calling `setSpellcheckerType` with the `USE_SYSTEM_DEFAULTS` constant: 74 | 75 | ```coffeescript 76 | checker = new SpellChecker.Spellchecker 77 | checker.setSpellcheckerType SpellChecker.USE_SYSTEM_DEFAULTS 78 | ``` 79 | 80 | To always use the system API and not fallback to Hunspell regardless of the environment variable, use the `ALWAYS_USE_SYSTEM` constant: 81 | 82 | ```coffeescript 83 | checker = new SpellChecker.Spellchecker 84 | checker.setSpellcheckerType SpellChecker.ALWAYS_USE_SYSTEM 85 | ``` 86 | 87 | Likewise, Hunspell can be forced with the `ALWAYS_USE_HUNSPELL` constant. 88 | 89 | ```javascript 90 | const checker = new SpellChecker.Spellchecker(); 91 | checker.setSpellcheckerType(SpellChecker.ALWAYS_USE_SYSTEM); 92 | ``` 93 | 94 | On Linux, Hunspell is always used regardless of the setting. This method must also be called before any spelling is done otherwise it will throw an error. 95 | 96 | This returns nothing. 97 | -------------------------------------------------------------------------------- /binding.gyp: -------------------------------------------------------------------------------- 1 | { 2 | 'variables': { 3 | 'conditions': [ 4 | ['OS=="mac"', { 5 | 'spellchecker_use_hunspell%': 'true', 6 | }], 7 | ['OS=="linux"', { 8 | 'spellchecker_use_hunspell': 'true', 9 | }], 10 | ['OS=="win"', { 11 | 'spellchecker_use_hunspell': 'true', 12 | }], 13 | ], 14 | }, 15 | 'target_defaults': { 16 | 'cflags_cc': ['-std=c++11'], 17 | 'conditions': [ 18 | ['OS=="win"', { 19 | 'msvs_disabled_warnings': [ 20 | 4267, # conversion from 'size_t' to 'int', possible loss of data 21 | 4530, # C++ exception handler used, but unwind semantics are not enabled 22 | 4506, # no definition for inline function 23 | ], 24 | }], 25 | ['OS=="mac"', { 26 | 'xcode_settings': { 27 | 'CLANG_CXX_LIBRARY': 'libc++', 28 | 'CLANG_CXX_LANGUAGE_STANDARD': 'c++11' 29 | } 30 | }] 31 | ], 32 | }, 33 | 'targets': [ 34 | { 35 | 'target_name': 'spellchecker', 36 | 'include_dirs': [ '=1.0.1", 216 | "coffeestack": ">=1 <2", 217 | "gaze": "~0.3.2", 218 | "jasmine-reporters": ">=0.2.0", 219 | "mkdirp": "~0.3.5", 220 | "requirejs": ">=0.27.1", 221 | "underscore": ">= 1.3.1", 222 | "walkdir": ">= 0.0.1" 223 | } 224 | }, 225 | "jasmine-reporters": { 226 | "version": "2.3.2", 227 | "resolved": "https://registry.npmjs.org/jasmine-reporters/-/jasmine-reporters-2.3.2.tgz", 228 | "integrity": "sha512-u/7AT9SkuZsUfFBLLzbErohTGNsEUCKaQbsVYnLFW1gEuL2DzmBL4n8v90uZsqIqlWvWUgian8J6yOt5Fyk/+A==", 229 | "dev": true, 230 | "requires": { 231 | "mkdirp": "^0.5.1", 232 | "xmldom": "^0.1.22" 233 | }, 234 | "dependencies": { 235 | "mkdirp": { 236 | "version": "0.5.1", 237 | "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.1.tgz", 238 | "integrity": "sha1-MAV0OOrGz3+MR2fzhkjWaX11yQM=", 239 | "dev": true, 240 | "requires": { 241 | "minimist": "0.0.8" 242 | } 243 | } 244 | } 245 | }, 246 | "lru-cache": { 247 | "version": "2.7.3", 248 | "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-2.7.3.tgz", 249 | "integrity": "sha1-bUUk6LlV+V1PW1iFHOId1y+06VI=", 250 | "dev": true 251 | }, 252 | "minimatch": { 253 | "version": "3.0.4", 254 | "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz", 255 | "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==", 256 | "dev": true, 257 | "requires": { 258 | "brace-expansion": "^1.1.7" 259 | } 260 | }, 261 | "minimist": { 262 | "version": "0.0.8", 263 | "resolved": "https://registry.npmjs.org/minimist/-/minimist-0.0.8.tgz", 264 | "integrity": "sha1-hX/Kv8M5fSYluCKCYuhqp6ARsF0=", 265 | "dev": true 266 | }, 267 | "mkdirp": { 268 | "version": "0.3.5", 269 | "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.3.5.tgz", 270 | "integrity": "sha1-3j5fiWHIjHh+4TaN+EmsRBPsqNc=", 271 | "dev": true 272 | }, 273 | "nan": { 274 | "version": "2.14.0", 275 | "resolved": "https://registry.npmjs.org/nan/-/nan-2.14.0.tgz", 276 | "integrity": "sha512-INOFj37C7k3AfaNTtX8RhsTw7qRy7eLET14cROi9+5HAVbbHuIWUHEauBv5qT4Av2tWasiTY1Jw6puUNqRJXQg==" 277 | }, 278 | "once": { 279 | "version": "1.4.0", 280 | "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", 281 | "integrity": "sha1-WDsap3WWHUsROsF9nFC6753Xa9E=", 282 | "dev": true, 283 | "requires": { 284 | "wrappy": "1" 285 | } 286 | }, 287 | "path-is-absolute": { 288 | "version": "1.0.1", 289 | "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", 290 | "integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=", 291 | "dev": true 292 | }, 293 | "requirejs": { 294 | "version": "2.3.6", 295 | "resolved": "https://registry.npmjs.org/requirejs/-/requirejs-2.3.6.tgz", 296 | "integrity": "sha512-ipEzlWQe6RK3jkzikgCupiTbTvm4S0/CAU5GlgptkN5SO6F3u0UD0K18wy6ErDqiCyP4J4YYe1HuAShvsxePLg==", 297 | "dev": true 298 | }, 299 | "rimraf": { 300 | "version": "2.6.3", 301 | "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-2.6.3.tgz", 302 | "integrity": "sha512-mwqeW5XsA2qAejG46gYdENaxXjx9onRNCfn7L0duuP4hCuTIi/QO7PDK07KJfp1d+izWPrzEJDcSqBa0OZQriA==", 303 | "dev": true, 304 | "requires": { 305 | "glob": "^7.1.3" 306 | } 307 | }, 308 | "sigmund": { 309 | "version": "1.0.1", 310 | "resolved": "https://registry.npmjs.org/sigmund/-/sigmund-1.0.1.tgz", 311 | "integrity": "sha1-P/IfGYytIXX587eBhT/ZTQ0ZtZA=", 312 | "dev": true 313 | }, 314 | "source-map": { 315 | "version": "0.1.43", 316 | "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.1.43.tgz", 317 | "integrity": "sha1-wkvBRspRfBRx9drL4lcbK3+eM0Y=", 318 | "dev": true, 319 | "requires": { 320 | "amdefine": ">=0.0.4" 321 | } 322 | }, 323 | "underscore": { 324 | "version": "1.9.1", 325 | "resolved": "https://registry.npmjs.org/underscore/-/underscore-1.9.1.tgz", 326 | "integrity": "sha512-5/4etnCkd9c8gwgowi5/om/mYO5ajCaOgdzj/oW+0eQV9WxKBDZw5+ycmKmeaTXjInS/W0BzpGLo2xR2aBwZdg==", 327 | "dev": true 328 | }, 329 | "underscore-plus": { 330 | "version": "1.6.8", 331 | "resolved": "https://registry.npmjs.org/underscore-plus/-/underscore-plus-1.6.8.tgz", 332 | "integrity": "sha512-88PrCeMKeAAC1L4xjSiiZ3Fg6kZOYrLpLGVPPeqKq/662DfQe/KTSKdSR/Q/tucKNnfW2MNAUGSCkDf8HmXC5Q==", 333 | "dev": true, 334 | "requires": { 335 | "underscore": "~1.8.3" 336 | }, 337 | "dependencies": { 338 | "underscore": { 339 | "version": "1.8.3", 340 | "resolved": "https://registry.npmjs.org/underscore/-/underscore-1.8.3.tgz", 341 | "integrity": "sha1-Tz+1OxBuYJf8+ctBCfKl6b36UCI=", 342 | "dev": true 343 | } 344 | } 345 | }, 346 | "walkdir": { 347 | "version": "0.0.7", 348 | "resolved": "https://registry.npmjs.org/walkdir/-/walkdir-0.0.7.tgz", 349 | "integrity": "sha1-BNoCcKh6d4VAFzzb8KLbSZqNnik=", 350 | "dev": true 351 | }, 352 | "wrappy": { 353 | "version": "1.0.2", 354 | "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", 355 | "integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=", 356 | "dev": true 357 | }, 358 | "xmldom": { 359 | "version": "0.1.27", 360 | "resolved": "https://registry.npmjs.org/xmldom/-/xmldom-0.1.27.tgz", 361 | "integrity": "sha1-1QH5ezvbQDr4757MIFcxh6rawOk=", 362 | "dev": true 363 | } 364 | } 365 | } 366 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "main": "./lib/spellchecker.js", 3 | "name": "spellchecker", 4 | "description": "Bindings to native spellchecker", 5 | "version": "3.7.1", 6 | "license": "MIT", 7 | "repository": { 8 | "type": "git", 9 | "url": "https://github.com/atom/node-spellchecker.git" 10 | }, 11 | "bugs": { 12 | "url": "https://github.com/atom/node-spellchecker/issues" 13 | }, 14 | "homepage": "http://atom.github.io/node-spellchecker", 15 | "scripts": { 16 | "test": "jasmine-focused --captureExceptions --coffee spec/" 17 | }, 18 | "devDependencies": { 19 | "jasmine-focused": "^1.0.7" 20 | }, 21 | "dependencies": { 22 | "any-promise": "^1.3.0", 23 | "nan": "^2.14.0" 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /spec/dictionaries/de_DE_frami.aff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atom/node-spellchecker/1883c560dd0d768db9d797b470e007fa4cb820a6/spec/dictionaries/de_DE_frami.aff -------------------------------------------------------------------------------- /spec/dictionaries/de_DE_frami.dic: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atom/node-spellchecker/1883c560dd0d768db9d797b470e007fa4cb820a6/spec/dictionaries/de_DE_frami.dic -------------------------------------------------------------------------------- /spec/dictionaries/hyph_de_DE.dic: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atom/node-spellchecker/1883c560dd0d768db9d797b470e007fa4cb820a6/spec/dictionaries/hyph_de_DE.dic -------------------------------------------------------------------------------- /src/buffers.h: -------------------------------------------------------------------------------- 1 | #ifndef SRC_BUFFERS_H_ 2 | #define SRC_BUFFERS_H_ 3 | 4 | // Translated into UTF-8, this means that a worst-case UTF-16 5 | // buffer would be double (4 bytes). 6 | #define MAX_UTF8_BUFFER 256 7 | 8 | // When converting UTF-16 to UTF-8, we still have 1-byte characters 9 | // but it can double the length when going from a wide to two bytes. 10 | #define MAX_UTF16_TO_UTF8_BUFFER MAX_UTF8_BUFFER 11 | 12 | // Converting between buffers needs a bit of space also. We need this because 13 | // Windows need to convert a UTF8 buffer to UTF16 and then back to UTF8. 14 | #define MAX_TRANSCODE_BUFFER (MAX_UTF8_BUFFER * 2 + 1) 15 | 16 | #endif // SRC_BUFFERS_H_ 17 | -------------------------------------------------------------------------------- /src/main.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "nan.h" 4 | #include "spellchecker.h" 5 | #include "worker.h" 6 | 7 | using Nan::ObjectWrap; 8 | using namespace spellchecker; 9 | using namespace v8; 10 | 11 | namespace { 12 | 13 | class Spellchecker : public Nan::ObjectWrap { 14 | SpellcheckerImplementation* impl; 15 | 16 | static NAN_METHOD(New) { 17 | Nan::HandleScope scope; 18 | Spellchecker* that = new Spellchecker(); 19 | that->Wrap(info.This()); 20 | 21 | info.GetReturnValue().Set(info.This()); 22 | } 23 | 24 | static NAN_METHOD(SetSpellcheckerType) { 25 | // Pull out the handle to the spellchecker instance. 26 | Nan::HandleScope scope; 27 | 28 | if (info.Length() < 1) { 29 | return Nan::ThrowError("Bad argument: missing mode"); 30 | } 31 | 32 | Spellchecker* that = Nan::ObjectWrap::Unwrap(info.Holder()); 33 | 34 | // If we already have an implementation, then we want to complain because 35 | // we can't handle reinitializing the dictionary paths. 36 | if (that->impl) { 37 | return Nan::ThrowError("Cannot call SetSpellcheckerType after the dictionary has been configured or used"); 38 | } 39 | 40 | // Make sure we have a sane value for our enumeration. 41 | int modeNumber = info[0]->Int32Value(Nan::GetCurrentContext()).ToChecked(); 42 | int spellcheckerType = USE_SYSTEM_DEFAULTS; 43 | 44 | switch (modeNumber) 45 | { 46 | case 0: 47 | break; 48 | case 1: 49 | spellcheckerType = ALWAYS_USE_SYSTEM; 50 | break; 51 | case 2: 52 | spellcheckerType = ALWAYS_USE_HUNSPELL; 53 | break; 54 | default: 55 | return Nan::ThrowError("Bad argument: SetSpellcheckerType must be given 0, 1, or 2 as a parameter"); 56 | } 57 | 58 | // Create a new one with the appropriate checker type. 59 | that->impl = SpellcheckerFactory::CreateSpellchecker(spellcheckerType); 60 | } 61 | 62 | static NAN_METHOD(SetDictionary) { 63 | Nan::HandleScope scope; 64 | 65 | if (info.Length() < 2) { 66 | return Nan::ThrowError("Bad arguments"); 67 | } 68 | 69 | Spellchecker* that = Nan::ObjectWrap::Unwrap(info.Holder()); 70 | 71 | std::string language = *Nan::Utf8String(info[0]); 72 | std::string directory = "."; 73 | if (info.Length() > 1) { 74 | directory = *Nan::Utf8String(info[1]); 75 | } 76 | 77 | // Make sure we have the implementation loaded. 78 | Spellchecker::EnsureLoadedImplementation(that); 79 | 80 | bool result = that->impl->SetDictionary(language, directory); 81 | info.GetReturnValue().Set(Nan::New(result)); 82 | } 83 | 84 | static NAN_METHOD(IsMisspelled) { 85 | Nan::HandleScope scope; 86 | if (info.Length() < 1) { 87 | return Nan::ThrowError("Bad argument"); 88 | } 89 | 90 | Spellchecker* that = Nan::ObjectWrap::Unwrap(info.Holder()); 91 | std::string word = *Nan::Utf8String(info[0]); 92 | 93 | // Make sure we have the implementation loaded. 94 | Spellchecker::EnsureLoadedImplementation(that); 95 | 96 | info.GetReturnValue().Set(Nan::New(that->impl->IsMisspelled(word))); 97 | } 98 | 99 | static NAN_METHOD(CheckSpelling) { 100 | Nan::HandleScope scope; 101 | if (info.Length() < 1) { 102 | return Nan::ThrowError("Bad argument"); 103 | } 104 | 105 | Local string = Local::Cast(info[0]); 106 | if (!string->IsString()) { 107 | return Nan::ThrowError("Bad argument"); 108 | } 109 | 110 | Local result = Nan::New(); 111 | info.GetReturnValue().Set(result); 112 | 113 | if (string->Length() == 0) { 114 | return; 115 | } 116 | 117 | std::vector text(string->Length() + 1); 118 | string->Write( 119 | #if V8_MAJOR_VERSION > 6 120 | info.GetIsolate(), 121 | #endif 122 | reinterpret_cast(text.data())); 123 | 124 | Spellchecker* that = Nan::ObjectWrap::Unwrap(info.Holder()); 125 | 126 | // Make sure we have the implementation loaded. 127 | Spellchecker::EnsureLoadedImplementation(that); 128 | 129 | std::vector misspelled_ranges = that->impl->CheckSpelling(text.data(), text.size()); 130 | 131 | std::vector::const_iterator iter = misspelled_ranges.begin(); 132 | v8::Local context = Nan::GetCurrentContext(); 133 | for (; iter != misspelled_ranges.end(); ++iter) { 134 | size_t index = iter - misspelled_ranges.begin(); 135 | uint32_t start = iter->start, end = iter->end; 136 | 137 | Local misspelled_range = Nan::New(); 138 | misspelled_range->Set(context, Nan::New("start").ToLocalChecked(), Nan::New(start)); 139 | misspelled_range->Set(context, Nan::New("end").ToLocalChecked(), Nan::New(end)); 140 | result->Set(context, index, misspelled_range); 141 | } 142 | } 143 | 144 | static NAN_METHOD(CheckSpellingAsync) { 145 | Nan::HandleScope scope; 146 | if (info.Length() < 2) { 147 | return Nan::ThrowError("Bad argument"); 148 | } 149 | 150 | Local string = Local::Cast(info[0]); 151 | if (!string->IsString()) { 152 | return Nan::ThrowError("Bad argument"); 153 | } 154 | 155 | Nan::Callback *callback = new Nan::Callback(info[1].As()); 156 | 157 | std::vector corpus(string->Length() + 1); 158 | string->Write( 159 | #if V8_MAJOR_VERSION > 6 160 | info.GetIsolate(), 161 | #endif 162 | reinterpret_cast(corpus.data())); 163 | 164 | Spellchecker* that = Nan::ObjectWrap::Unwrap(info.Holder()); 165 | 166 | // Make sure we have the implementation loaded. 167 | Spellchecker::EnsureLoadedImplementation(that); 168 | 169 | CheckSpellingWorker* worker = new CheckSpellingWorker(std::move(corpus), that->impl, callback); 170 | Nan::AsyncQueueWorker(worker); 171 | } 172 | 173 | static NAN_METHOD(Add) { 174 | Nan::HandleScope scope; 175 | if (info.Length() < 1) { 176 | return Nan::ThrowError("Bad argument"); 177 | } 178 | 179 | Spellchecker* that = Nan::ObjectWrap::Unwrap(info.Holder()); 180 | 181 | // Make sure we have the implementation loaded. 182 | Spellchecker::EnsureLoadedImplementation(that); 183 | 184 | std::string word = *Nan::Utf8String(info[0]); 185 | that->impl->Add(word); 186 | return; 187 | } 188 | 189 | static NAN_METHOD(Remove) { 190 | Nan::HandleScope scope; 191 | if (info.Length() < 1) { 192 | return Nan::ThrowError("Bad argument"); 193 | } 194 | 195 | Spellchecker* that = Nan::ObjectWrap::Unwrap(info.Holder()); 196 | 197 | // Make sure we have the implementation loaded. 198 | Spellchecker::EnsureLoadedImplementation(that); 199 | 200 | std::string word = *Nan::Utf8String(info[0]); 201 | that->impl->Remove(word); 202 | return; 203 | } 204 | 205 | static NAN_METHOD(GetAvailableDictionaries) { 206 | Nan::HandleScope scope; 207 | 208 | Spellchecker* that = Nan::ObjectWrap::Unwrap(info.Holder()); 209 | 210 | // Make sure we have the implementation loaded. 211 | Spellchecker::EnsureLoadedImplementation(that); 212 | 213 | std::string path = "."; 214 | if (info.Length() > 0) { 215 | std::string path = *Nan::Utf8String(info[0]); 216 | } 217 | 218 | std::vector dictionaries = 219 | that->impl->GetAvailableDictionaries(path); 220 | 221 | v8::Local context = Nan::GetCurrentContext(); 222 | Local result = Nan::New(dictionaries.size()); 223 | for (size_t i = 0; i < dictionaries.size(); ++i) { 224 | const std::string& dict = dictionaries[i]; 225 | result->Set(context, i, Nan::New(dict.data(), dict.size()).ToLocalChecked()); 226 | } 227 | 228 | info.GetReturnValue().Set(result); 229 | } 230 | 231 | static NAN_METHOD(GetCorrectionsForMisspelling) { 232 | Nan::HandleScope scope; 233 | if (info.Length() < 1) { 234 | return Nan::ThrowError("Bad argument"); 235 | } 236 | 237 | Spellchecker* that = Nan::ObjectWrap::Unwrap(info.Holder()); 238 | 239 | // Make sure we have the implementation loaded. 240 | Spellchecker::EnsureLoadedImplementation(that); 241 | 242 | std::string word = *Nan::Utf8String(info[0]); 243 | std::vector corrections = 244 | that->impl->GetCorrectionsForMisspelling(word); 245 | 246 | Local result = Nan::New(corrections.size()); 247 | v8::Local context = Nan::GetCurrentContext(); 248 | for (size_t i = 0; i < corrections.size(); ++i) { 249 | const std::string& word = corrections[i]; 250 | 251 | Nan::MaybeLocal val = Nan::New(word.data(), word.size()); 252 | result->Set(context, i, val.ToLocalChecked()); 253 | } 254 | 255 | info.GetReturnValue().Set(result); 256 | } 257 | 258 | Spellchecker() { 259 | impl = NULL; 260 | } 261 | 262 | // actual destructor 263 | virtual ~Spellchecker() { 264 | delete impl; 265 | } 266 | 267 | static void EnsureLoadedImplementation(Spellchecker *that) { 268 | if (!that->impl) { 269 | that->impl = SpellcheckerFactory::CreateSpellchecker(USE_SYSTEM_DEFAULTS); 270 | } 271 | } 272 | 273 | public: 274 | static void Init(Local exports) { 275 | Local tpl = Nan::New(Spellchecker::New); 276 | 277 | tpl->SetClassName(Nan::New("Spellchecker").ToLocalChecked()); 278 | tpl->InstanceTemplate()->SetInternalFieldCount(1); 279 | 280 | Nan::SetPrototypeMethod(tpl, "setSpellcheckerType", Spellchecker::SetSpellcheckerType); 281 | Nan::SetPrototypeMethod(tpl, "setDictionary", Spellchecker::SetDictionary); 282 | Nan::SetPrototypeMethod(tpl, "getAvailableDictionaries", Spellchecker::GetAvailableDictionaries); 283 | Nan::SetPrototypeMethod(tpl, "getCorrectionsForMisspelling", Spellchecker::GetCorrectionsForMisspelling); 284 | Nan::SetPrototypeMethod(tpl, "isMisspelled", Spellchecker::IsMisspelled); 285 | Nan::SetPrototypeMethod(tpl, "checkSpelling", Spellchecker::CheckSpelling); 286 | Nan::SetPrototypeMethod(tpl, "checkSpellingAsync", Spellchecker::CheckSpellingAsync); 287 | Nan::SetPrototypeMethod(tpl, "add", Spellchecker::Add); 288 | Nan::SetPrototypeMethod(tpl, "remove", Spellchecker::Remove); 289 | 290 | Isolate* isolate = exports->GetIsolate(); 291 | Local context = isolate->GetCurrentContext(); 292 | Nan::Set(exports, Nan::New("Spellchecker").ToLocalChecked(), tpl->GetFunction(context).ToLocalChecked()); 293 | } 294 | }; 295 | 296 | void Init(Local exports, Local module) { 297 | Spellchecker::Init(exports); 298 | } 299 | 300 | } // namespace 301 | 302 | NODE_MODULE(spellchecker, Init) 303 | -------------------------------------------------------------------------------- /src/spellchecker.h: -------------------------------------------------------------------------------- 1 | #ifndef SRC_SPELLCHECKER_H_ 2 | #define SRC_SPELLCHECKER_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace spellchecker { 10 | 11 | const int USE_SYSTEM_DEFAULTS = 0; 12 | const int ALWAYS_USE_SYSTEM = 1; 13 | const int ALWAYS_USE_HUNSPELL = 2; 14 | 15 | struct MisspelledRange { 16 | size_t start; 17 | size_t end; 18 | }; 19 | 20 | class SpellcheckerImplementation; 21 | 22 | class SpellcheckerThreadView { 23 | public: 24 | SpellcheckerThreadView(SpellcheckerImplementation *impl) : impl{impl} 25 | { 26 | // 27 | } 28 | 29 | virtual ~SpellcheckerThreadView() 30 | { 31 | // 32 | } 33 | 34 | virtual std::vector CheckSpelling(const uint16_t *text, size_t length); 35 | 36 | private: 37 | SpellcheckerImplementation *impl; 38 | }; 39 | 40 | class SpellcheckerImplementation { 41 | public: 42 | virtual bool SetDictionary(const std::string& language, const std::string& path) = 0; 43 | virtual std::vector GetAvailableDictionaries(const std::string& path) = 0; 44 | 45 | // Returns an array containing possible corrections for the word. 46 | virtual std::vector GetCorrectionsForMisspelling(const std::string& word) = 0; 47 | 48 | // Returns true if the word is misspelled. 49 | virtual bool IsMisspelled(const std::string& word) = 0; 50 | 51 | virtual std::vector CheckSpelling(const uint16_t *text, size_t length) = 0; 52 | 53 | // Adds a new word to the dictionary. 54 | // NB: When using Hunspell, this will not modify the .dic file; custom words must be added each 55 | // time the spellchecker is created. Use a custom dictionary file. 56 | virtual void Add(const std::string& word) = 0; 57 | 58 | // Removes a word from the custom dictionary added by Add. 59 | // NB: When using Hunspell, this will not modify the .dic file; custom words must be added each 60 | // time the spellchecker is created. Use a custom dictionary file. 61 | virtual void Remove(const std::string& word) = 0; 62 | 63 | virtual std::unique_ptr CreateThreadView() 64 | { 65 | return std::unique_ptr(new SpellcheckerThreadView(this)); 66 | } 67 | 68 | virtual ~SpellcheckerImplementation() {} 69 | }; 70 | 71 | class SpellcheckerFactory { 72 | public: 73 | static SpellcheckerImplementation* CreateSpellchecker(int spellcheckerType); 74 | }; 75 | 76 | inline std::vector SpellcheckerThreadView::CheckSpelling(const uint16_t *text, size_t length) 77 | { 78 | return impl->CheckSpelling(text, length); 79 | } 80 | 81 | } // namespace spellchecker 82 | 83 | #endif // SRC_SPELLCHECKER_H_ 84 | -------------------------------------------------------------------------------- /src/spellchecker_hunspell.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "../vendor/hunspell/src/hunspell/hunspell.hxx" 6 | #include "spellchecker_hunspell.h" 7 | #include "buffers.h" 8 | 9 | namespace spellchecker { 10 | 11 | HunspellSpellchecker::HunspellSpellchecker() : hunspell(NULL), transcoder(NewUTF16ToUTF8Transcoder()), toDictionaryTranscoder(NULL), fromDictionaryTranscoder(NULL) { } 12 | 13 | HunspellSpellchecker::~HunspellSpellchecker() { 14 | if (hunspell) { 15 | delete hunspell; 16 | } 17 | 18 | if (transcoder) { 19 | FreeTranscoder(transcoder); 20 | } 21 | 22 | if (toDictionaryTranscoder) { 23 | FreeTranscoder(toDictionaryTranscoder); 24 | } 25 | 26 | if (fromDictionaryTranscoder) { 27 | FreeTranscoder(fromDictionaryTranscoder); 28 | } 29 | } 30 | 31 | bool HunspellSpellchecker::SetDictionary(const std::string& language, const std::string& dirname) { 32 | if (hunspell) { 33 | delete hunspell; 34 | hunspell = NULL; 35 | } 36 | 37 | // NB: Hunspell uses underscore to separate language and locale, and Win8 uses 38 | // dash - if they use the wrong one, just silently replace it for them 39 | std::string lang = language; 40 | std::replace(lang.begin(), lang.end(), '-', '_'); 41 | 42 | std::string affixpath = dirname + "/" + lang + ".aff"; 43 | std::string dpath = dirname + "/" + lang + ".dic"; 44 | 45 | // TODO: This code is almost certainly jacked on Win32 for non-ASCII paths 46 | FILE* handle = fopen(dpath.c_str(), "r"); 47 | if (!handle) { 48 | return false; 49 | } 50 | fclose(handle); 51 | 52 | // Create the hunspell object with our dictionary. 53 | hunspell = new Hunspell(affixpath.c_str(), dpath.c_str()); 54 | 55 | // Once we have the dictionary, then we check to see if we need 56 | // an internal conversion. This is needed because Hunspell has 57 | // two modes: in UTF-8 mode, everything is treated as a UTF-8 58 | // string which is what we have. Otherwise, it needs the specific 59 | // encoding of the file. 60 | std::string encoding = hunspell->get_dic_encoding(); 61 | bool isUTF8 = encoding.compare("UTF-8") == 0; 62 | 63 | if (toDictionaryTranscoder) { 64 | FreeTranscoder(toDictionaryTranscoder); 65 | toDictionaryTranscoder = NULL; 66 | } 67 | 68 | if (fromDictionaryTranscoder) { 69 | FreeTranscoder(fromDictionaryTranscoder); 70 | fromDictionaryTranscoder = NULL; 71 | } 72 | 73 | if (!isUTF8) { 74 | toDictionaryTranscoder = NewTranscoder8to8("UTF8", encoding.c_str()); 75 | fromDictionaryTranscoder = NewTranscoder8to8(encoding.c_str(), "UTF8"); 76 | } 77 | 78 | // Return that we successfully created the components. 79 | return true; 80 | } 81 | 82 | std::vector HunspellSpellchecker::GetAvailableDictionaries(const std::string& path) { 83 | return std::vector(); 84 | } 85 | 86 | bool HunspellSpellchecker::IsMisspelled(const std::string& word) { 87 | if (!hunspell) { 88 | return false; 89 | } 90 | 91 | // If the word is too long, then don't do anything. 92 | if (word.length() > MAX_UTF8_BUFFER) { 93 | return false; 94 | } 95 | 96 | // If we have a dictionary transcoder, then we need to transcode 97 | // the input into the encoding the dictionary requires. 98 | std::vector dict_buffer(MAX_TRANSCODE_BUFFER); 99 | bool converted = Transcode8to8(toDictionaryTranscoder, dict_buffer.data(), dict_buffer.size(), word.data(), word.size()); 100 | 101 | if (!converted) { 102 | return false; 103 | } 104 | 105 | // Process the call on the transcoded data. 106 | return hunspell->spell(dict_buffer.data()) == 0; 107 | } 108 | 109 | std::vector HunspellSpellchecker::CheckSpelling(const uint16_t *utf16_text, size_t utf16_length) { 110 | std::vector result; 111 | 112 | if (!hunspell || !transcoder) { 113 | return result; 114 | } 115 | 116 | std::vector utf8_buffer(MAX_UTF16_TO_UTF8_BUFFER); 117 | 118 | enum { 119 | unknown, 120 | in_separator, 121 | in_word, 122 | } state = in_separator; 123 | 124 | // Because all of the strings are UTF-8 because we got them from Chrome that 125 | // way, we need to make sure our iswalpha works on UTF-8 strings. We picked a 126 | // generic locale because we don't pass the locale in. Sadly, "C.utf8" doesn't 127 | // work so we assume that US English is available everywhere. 128 | setlocale(LC_CTYPE, "en_US.UTF-8"); 129 | 130 | // Go through the UTF-16 characters and look for breaks. 131 | for (size_t word_start = 0, i = 0; i < utf16_length; i++) { 132 | uint16_t c = utf16_text[i]; 133 | 134 | switch (state) { 135 | case unknown: 136 | if (iswpunct(c) || iswspace(c)) { 137 | state = in_separator; 138 | } 139 | break; 140 | 141 | case in_separator: 142 | if (iswalpha(c)) { 143 | word_start = i; 144 | state = in_word; 145 | } else if (!iswpunct(c) && !iswspace(c)) { 146 | state = unknown; 147 | } 148 | break; 149 | 150 | case in_word: 151 | if (c == '\'' && iswalpha(utf16_text[i + 1])) { 152 | i++; 153 | } else if (c == 0 || iswpunct(c) || iswspace(c)) { 154 | state = in_separator; 155 | bool converted = TranscodeUTF16ToUTF8(transcoder, (char *)utf8_buffer.data(), utf8_buffer.size(), utf16_text + word_start, i - word_start); 156 | 157 | if (converted) { 158 | // Convert the buffer into a dictionary-specific encoding. 159 | std::vector dict_buffer(MAX_TRANSCODE_BUFFER); 160 | converted = Transcode8to8(toDictionaryTranscoder, dict_buffer.data(), dict_buffer.size(), utf8_buffer.data(), utf8_buffer.size()); 161 | 162 | if (converted) { 163 | // Pass in the dictionary-encoded text for spelling. 164 | if (hunspell->spell(dict_buffer.data()) == 0) { 165 | MisspelledRange range; 166 | range.start = word_start; 167 | range.end = i; 168 | result.push_back(range); 169 | } 170 | } 171 | } 172 | } else if (!iswalpha(c)) { 173 | state = unknown; 174 | } 175 | break; 176 | } 177 | } 178 | 179 | return result; 180 | } 181 | 182 | void HunspellSpellchecker::Add(const std::string& word) { 183 | if (hunspell) { 184 | hunspell->add(word.c_str()); 185 | } 186 | } 187 | 188 | void HunspellSpellchecker::Remove(const std::string& word) { 189 | if (hunspell) { 190 | hunspell->remove(word.c_str()); 191 | } 192 | } 193 | 194 | std::vector HunspellSpellchecker::GetCorrectionsForMisspelling(const std::string& word) { 195 | std::vector corrections; 196 | 197 | if (hunspell) { 198 | // Convert the buffer into a dictionary-specific encoding. 199 | std::vector dict_buffer(MAX_TRANSCODE_BUFFER); 200 | bool converted = Transcode8to8(toDictionaryTranscoder, dict_buffer.data(), dict_buffer.size(), word.data(), word.size()); 201 | 202 | if (converted) { 203 | // Get the suggested on the dictionary-encoded word. 204 | char** slist; 205 | int size = hunspell->suggest(&slist, dict_buffer.data()); 206 | 207 | corrections.reserve(size); 208 | 209 | for (int i = 0; i < size; ++i) { 210 | // The items in the `slist` are still in dictionary encoding. We need to 211 | // convert them back to UTF-8 so Chrome/V8 can play with them properly. 212 | std::string word = slist[i]; 213 | bool converted = Transcode8to8(fromDictionaryTranscoder, dict_buffer.data(), dict_buffer.size(), word.data(), word.size()); 214 | 215 | if (converted) { 216 | // Put this one back in encoded format. 217 | corrections.push_back(dict_buffer.data()); 218 | } else { 219 | // If we couldn't convert, we need to put the poorly encoded one so 220 | // they can see it. 221 | corrections.push_back(slist[i]); 222 | } 223 | } 224 | 225 | hunspell->free_list(&slist, size); 226 | } 227 | } 228 | 229 | return corrections; 230 | } 231 | 232 | } // namespace spellchecker 233 | -------------------------------------------------------------------------------- /src/spellchecker_hunspell.h: -------------------------------------------------------------------------------- 1 | #ifndef SRC_SPELLCHECKER_HUNSPELL_H_ 2 | #define SRC_SPELLCHECKER_HUNSPELL_H_ 3 | 4 | #include "spellchecker.h" 5 | #include "transcoder.h" 6 | 7 | class Hunspell; 8 | 9 | namespace spellchecker { 10 | 11 | class HunspellSpellchecker : public SpellcheckerImplementation { 12 | public: 13 | HunspellSpellchecker(); 14 | ~HunspellSpellchecker(); 15 | 16 | bool SetDictionary(const std::string& language, const std::string& path); 17 | std::vector GetAvailableDictionaries(const std::string& path); 18 | std::vector GetCorrectionsForMisspelling(const std::string& word); 19 | bool IsMisspelled(const std::string& word); 20 | std::vector CheckSpelling(const uint16_t *text, size_t length); 21 | void Add(const std::string& word); 22 | void Remove(const std::string& word); 23 | 24 | private: 25 | Hunspell* hunspell; 26 | Transcoder *transcoder; 27 | Transcoder *toDictionaryTranscoder; 28 | Transcoder *fromDictionaryTranscoder; 29 | }; 30 | 31 | } // namespace spellchecker 32 | 33 | #endif // SRC_SPELLCHECKER_HUNSPELL_H_ 34 | -------------------------------------------------------------------------------- /src/spellchecker_linux.cc: -------------------------------------------------------------------------------- 1 | #include "spellchecker.h" 2 | #include "spellchecker_hunspell.h" 3 | 4 | namespace spellchecker { 5 | 6 | SpellcheckerImplementation* SpellcheckerFactory::CreateSpellchecker(int spellcheckerType) { 7 | return new HunspellSpellchecker(); 8 | } 9 | 10 | } // namespace spellchecker 11 | -------------------------------------------------------------------------------- /src/spellchecker_mac.h: -------------------------------------------------------------------------------- 1 | #ifndef SRC_SPELLCHECKER_MAC_H_ 2 | #define SRC_SPELLCHECKER_MAC_H_ 3 | 4 | #include "spellchecker.h" 5 | 6 | #import 7 | #import 8 | 9 | namespace spellchecker { 10 | 11 | class MacSpellchecker : public SpellcheckerImplementation { 12 | public: 13 | MacSpellchecker(); 14 | ~MacSpellchecker(); 15 | 16 | bool SetDictionary(const std::string& language, const std::string& path); 17 | std::vector GetAvailableDictionaries(const std::string& path); 18 | std::vector GetCorrectionsForMisspelling(const std::string& word); 19 | bool IsMisspelled(const std::string& word); 20 | std::vector CheckSpelling(const uint16_t *text, size_t length); 21 | void Add(const std::string& word); 22 | void Remove(const std::string& word); 23 | 24 | private: 25 | NSSpellChecker* spellChecker; 26 | NSString* spellCheckerLanguage; 27 | 28 | void UpdateGlobalSpellchecker(); 29 | }; 30 | 31 | } // namespace spellchecker 32 | 33 | #endif // SRC_SPELLCHECKER_MAC_H_ 34 | -------------------------------------------------------------------------------- /src/spellchecker_mac.mm: -------------------------------------------------------------------------------- 1 | #include "spellchecker_mac.h" 2 | #include "spellchecker_hunspell.h" 3 | 4 | #import 5 | #import 6 | 7 | namespace spellchecker { 8 | 9 | static NSString* currentGlobalLanguage = nil; 10 | 11 | MacSpellchecker::MacSpellchecker() { 12 | this->spellCheckerLanguage = nil; 13 | this->spellChecker = [NSSpellChecker sharedSpellChecker]; 14 | } 15 | 16 | MacSpellchecker::~MacSpellchecker() { 17 | [this->spellCheckerLanguage release]; 18 | this->spellCheckerLanguage = nil; 19 | } 20 | 21 | bool MacSpellchecker::SetDictionary(const std::string& language, const std::string& path) { 22 | @autoreleasepool { 23 | [this->spellCheckerLanguage release]; 24 | 25 | if (language.length() < 1) { 26 | this->spellCheckerLanguage = nil; 27 | this->UpdateGlobalSpellchecker(); 28 | return true; 29 | } 30 | 31 | this->spellCheckerLanguage = [[NSString alloc] initWithUTF8String: language.c_str()]; 32 | 33 | return [this->spellChecker setLanguage: this->spellCheckerLanguage] == YES; 34 | } 35 | } 36 | 37 | std::vector MacSpellchecker::GetAvailableDictionaries(const std::string& path) { 38 | std::vector ret; 39 | 40 | @autoreleasepool { 41 | NSArray* languages = [this->spellChecker availableLanguages]; 42 | 43 | for (size_t i = 0; i < languages.count; ++i) { 44 | ret.push_back([[languages objectAtIndex:i] UTF8String]); 45 | } 46 | } 47 | 48 | return ret; 49 | } 50 | 51 | bool MacSpellchecker::IsMisspelled(const std::string& word) { 52 | bool result; 53 | 54 | @autoreleasepool { 55 | this->UpdateGlobalSpellchecker(); 56 | 57 | NSString* misspelling = [NSString stringWithUTF8String:word.c_str()]; 58 | NSRange range = [this->spellChecker checkSpellingOfString:misspelling 59 | startingAt:0]; 60 | 61 | result = range.length > 0; 62 | } 63 | 64 | return result; 65 | } 66 | 67 | std::vector MacSpellchecker::CheckSpelling(const uint16_t *text, size_t length) { 68 | std::vector result; 69 | 70 | @autoreleasepool { 71 | this->UpdateGlobalSpellchecker(); 72 | 73 | NSData *data = [[NSData alloc] initWithBytesNoCopy:(void *)(text) length:(length * 2) freeWhenDone:NO]; 74 | NSString* string = [[NSString alloc] initWithData:data encoding:NSUTF16LittleEndianStringEncoding]; 75 | NSArray *misspellings = [this->spellChecker checkString:string 76 | range:NSMakeRange(0, string.length) 77 | types:NSTextCheckingTypeSpelling 78 | options:nil 79 | inSpellDocumentWithTag:0 80 | orthography:nil 81 | wordCount:nil]; 82 | for (NSTextCheckingResult *misspelling in misspellings) { 83 | MisspelledRange range; 84 | range.start = misspelling.range.location; 85 | range.end = misspelling.range.location + misspelling.range.length; 86 | result.push_back(range); 87 | } 88 | } 89 | 90 | return result; 91 | } 92 | 93 | void MacSpellchecker::Add(const std::string& word) { 94 | @autoreleasepool { 95 | this->UpdateGlobalSpellchecker(); 96 | 97 | NSString* newWord = [NSString stringWithUTF8String:word.c_str()]; 98 | [this->spellChecker learnWord:newWord]; 99 | } 100 | } 101 | 102 | void MacSpellchecker::Remove(const std::string& word) { 103 | @autoreleasepool { 104 | this->UpdateGlobalSpellchecker(); 105 | 106 | NSString* newWord = [NSString stringWithUTF8String:word.c_str()]; 107 | [this->spellChecker unlearnWord:newWord]; 108 | } 109 | } 110 | 111 | std::vector MacSpellchecker::GetCorrectionsForMisspelling(const std::string& word) { 112 | std::vector corrections; 113 | 114 | @autoreleasepool { 115 | this->UpdateGlobalSpellchecker(); 116 | 117 | NSString* misspelling = [NSString stringWithUTF8String:word.c_str()]; 118 | NSString* language = [this->spellChecker language]; 119 | NSRange range; 120 | 121 | range.location = 0; 122 | range.length = [misspelling length]; 123 | 124 | NSArray* guesses = [this->spellChecker guessesForWordRange:range 125 | inString:misspelling 126 | language:language 127 | inSpellDocumentWithTag:0]; 128 | 129 | corrections.reserve(guesses.count); 130 | 131 | for (size_t i = 0; i < guesses.count; ++i) { 132 | corrections.push_back([[guesses objectAtIndex:i] UTF8String]); 133 | } 134 | } 135 | 136 | return corrections; 137 | } 138 | 139 | void MacSpellchecker::UpdateGlobalSpellchecker() { 140 | NSString* autoLanguage = @"___AUTO_LANGUAGE"; 141 | NSString* globalLang = currentGlobalLanguage ? currentGlobalLanguage : autoLanguage; 142 | NSString* ourLang = this->spellCheckerLanguage ? this->spellCheckerLanguage : autoLanguage; 143 | 144 | if ([globalLang isEqualToString: ourLang]) { 145 | return; 146 | } 147 | 148 | currentGlobalLanguage = this->spellCheckerLanguage; 149 | if (!this->spellCheckerLanguage) { 150 | [this->spellChecker setAutomaticallyIdentifiesLanguages: YES]; 151 | } else { 152 | [this->spellChecker setAutomaticallyIdentifiesLanguages: NO]; 153 | [this->spellChecker setLanguage: this->spellCheckerLanguage]; 154 | } 155 | } 156 | 157 | SpellcheckerImplementation* SpellcheckerFactory::CreateSpellchecker(int spellcheckerType) { 158 | bool preferHunspell = getenv("SPELLCHECKER_PREFER_HUNSPELL") && spellcheckerType != ALWAYS_USE_SYSTEM; 159 | 160 | if (spellcheckerType != ALWAYS_USE_HUNSPELL && !preferHunspell) { 161 | return new MacSpellchecker(); 162 | } 163 | 164 | return new HunspellSpellchecker(); 165 | } 166 | 167 | } // namespace spellchecker 168 | -------------------------------------------------------------------------------- /src/spellchecker_win.cc: -------------------------------------------------------------------------------- 1 | #define _WINSOCKAPI_ 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "spellchecker.h" 14 | #include "spellchecker_win.h" 15 | #include "spellchecker_hunspell.h" 16 | 17 | // NB: No idea why I have to define this myself, you don't have to in a 18 | // standard console app. 19 | DEFINE_GUID(CLSID_SpellCheckerFactory,0x7AB36653,0x1796,0x484B,0xBD,0xFA,0xE7,0x4F,0x1D,0xB7,0xC1,0xDC); 20 | DEFINE_GUID(IID_ISpellCheckerFactory,0x8E018A9D,0x2415,0x4677,0xBF,0x08,0x79,0x4E,0xA6,0x1F,0x94,0xBB); 21 | 22 | namespace spellchecker { 23 | 24 | LONG g_COMRefcount = 0; 25 | bool g_COMFailed = false; 26 | 27 | std::string ToUTF8(const std::wstring& string) { 28 | if (string.length() < 1) { 29 | return std::string(); 30 | } 31 | 32 | // NB: In the pathological case, each character could expand up 33 | // to 4 bytes in UTF8. 34 | int cbLen = (string.length()+1) * sizeof(char) * 4; 35 | char* buf = new char[cbLen]; 36 | int retLen = WideCharToMultiByte(CP_UTF8, 0, string.c_str(), string.length(), buf, cbLen, NULL, NULL); 37 | buf[retLen] = 0; 38 | 39 | std::string ret; 40 | ret.assign(buf); 41 | return ret; 42 | } 43 | 44 | std::wstring ToWString(const std::string& string) { 45 | if (string.length() < 1) { 46 | return std::wstring(); 47 | } 48 | 49 | // NB: If you got really unlucky, every character could be a two-wchar_t 50 | // surrogate pair 51 | int cchLen = (string.length()+1) * 2; 52 | wchar_t* buf = new wchar_t[cchLen]; 53 | int retLen = MultiByteToWideChar(CP_UTF8, 0, string.c_str(), strlen(string.c_str()), buf, cchLen); 54 | buf[retLen] = 0; 55 | 56 | std::wstring ret; 57 | ret.assign(buf); 58 | return ret; 59 | } 60 | 61 | class Lock { 62 | public: 63 | Lock(uv_mutex_t &m) : m{m} 64 | { 65 | uv_mutex_lock(&this->m); 66 | } 67 | 68 | ~Lock() 69 | { 70 | uv_mutex_unlock(&this->m); 71 | } 72 | 73 | private: 74 | uv_mutex_t &m; 75 | }; 76 | 77 | std::vector DoCheckSpelling(ISpellChecker *spellchecker, const uint16_t *text, size_t length) 78 | { 79 | std::vector result; 80 | 81 | if (spellchecker == NULL) { 82 | return result; 83 | } 84 | 85 | IEnumSpellingError* errors = NULL; 86 | std::wstring wtext(reinterpret_cast(text), length); 87 | if (FAILED(spellchecker->Check(wtext.c_str(), &errors))) { 88 | return result; 89 | } 90 | 91 | ISpellingError *error; 92 | while (errors->Next(&error) == S_OK) { 93 | ULONG start, length; 94 | error->get_StartIndex(&start); 95 | error->get_Length(&length); 96 | 97 | MisspelledRange range; 98 | range.start = start; 99 | range.end = start + length; 100 | result.push_back(range); 101 | error->Release(); 102 | } 103 | 104 | errors->Release(); 105 | return result; 106 | } 107 | 108 | WindowsSpellcheckerThreadView::WindowsSpellcheckerThreadView(WindowsSpellchecker *impl, DWORD spellcheckerCookie) : 109 | SpellcheckerThreadView(impl), 110 | spellchecker{NULL} 111 | { 112 | if (!spellcheckerCookie) { 113 | return; 114 | } 115 | 116 | this->initResult = CoInitializeEx(NULL, COINIT_APARTMENTTHREADED); 117 | if (FAILED(this->initResult)) { 118 | return; 119 | } 120 | 121 | IGlobalInterfaceTable* gTable = NULL; 122 | HRESULT gTableRes = CoCreateInstance( 123 | CLSID_StdGlobalInterfaceTable, NULL, CLSCTX_INPROC_SERVER, IID_IGlobalInterfaceTable, 124 | reinterpret_cast(&gTable)); 125 | if (FAILED(gTableRes) || !gTable) { 126 | return; 127 | } 128 | 129 | Lock tableLock(impl->GetGlobalTableMutex()); 130 | HRESULT intRes = gTable->GetInterfaceFromGlobal(spellcheckerCookie, __uuidof(ISpellChecker), 131 | reinterpret_cast(&this->spellchecker)); 132 | if (FAILED(intRes)) { 133 | this->spellchecker = NULL; 134 | } 135 | } 136 | 137 | WindowsSpellcheckerThreadView::~WindowsSpellcheckerThreadView() 138 | { 139 | if (spellchecker != NULL) { 140 | spellchecker->Release(); 141 | } 142 | 143 | if (SUCCEEDED(initResult)) { 144 | CoUninitialize(); 145 | } 146 | } 147 | 148 | std::vector WindowsSpellcheckerThreadView::CheckSpelling(const uint16_t *text, size_t length) 149 | { 150 | return DoCheckSpelling(spellchecker, text, length); 151 | } 152 | 153 | WindowsSpellchecker::WindowsSpellchecker() { 154 | this->gTable = NULL; 155 | this->currentSpellcheckerCookie = 0; 156 | this->spellcheckerFactory = NULL; 157 | this->currentSpellchecker = NULL; 158 | 159 | if (InterlockedIncrement(&g_COMRefcount) == 1) { 160 | g_COMFailed = FAILED(CoInitializeEx(NULL, COINIT_APARTMENTTHREADED)); 161 | if (g_COMFailed) return; 162 | } 163 | 164 | // NB: This will fail on < Win8 165 | HRESULT hr = CoCreateInstance( 166 | CLSID_SpellCheckerFactory, NULL, CLSCTX_INPROC_SERVER, IID_ISpellCheckerFactory, 167 | reinterpret_cast(&this->spellcheckerFactory)); 168 | 169 | if (FAILED(hr)) { 170 | this->spellcheckerFactory = NULL; 171 | } 172 | 173 | HRESULT gTableRes = CoCreateInstance(CLSID_StdGlobalInterfaceTable, NULL, CLSCTX_INPROC_SERVER, 174 | IID_IGlobalInterfaceTable, reinterpret_cast(&gTable)); 175 | if (FAILED(gTableRes)) { 176 | this->gTable = NULL; 177 | } 178 | 179 | gTableMutexOk = uv_mutex_init(&this->gTableMutex) == 0; 180 | } 181 | 182 | WindowsSpellchecker::~WindowsSpellchecker() { 183 | if (this->currentSpellcheckerCookie) { 184 | this->gTable->RevokeInterfaceFromGlobal(this->currentSpellcheckerCookie); 185 | this->currentSpellcheckerCookie = 0; 186 | } 187 | 188 | if (this->currentSpellchecker) { 189 | this->currentSpellchecker->Release(); 190 | this->currentSpellchecker = NULL; 191 | } 192 | 193 | if (this->spellcheckerFactory) { 194 | this->spellcheckerFactory->Release(); 195 | this->spellcheckerFactory = NULL; 196 | } 197 | 198 | if (this->gTable) { 199 | this->gTable->Release(); 200 | this->gTable = NULL; 201 | } 202 | 203 | if (this->gTableMutexOk) { 204 | uv_mutex_destroy(&this->gTableMutex); 205 | } 206 | 207 | if (InterlockedDecrement(&g_COMRefcount) == 0) { 208 | CoUninitialize(); 209 | } 210 | } 211 | 212 | bool WindowsSpellchecker::IsSupported() { 213 | return !(g_COMFailed || (this->spellcheckerFactory == NULL)); 214 | } 215 | 216 | bool WindowsSpellchecker::SetDictionary(const std::string& language, const std::string& path) { 217 | if (!this->spellcheckerFactory) { 218 | return false; 219 | } 220 | 221 | if (this->currentSpellcheckerCookie) { 222 | Lock tableLock(this->gTableMutex); 223 | this->gTable->RevokeInterfaceFromGlobal(this->currentSpellcheckerCookie); 224 | this->currentSpellcheckerCookie = 0; 225 | } 226 | 227 | if (this->currentSpellchecker != NULL) { 228 | this->currentSpellchecker->Release(); 229 | this->currentSpellchecker = NULL; 230 | this->currentSpellcheckerCookie = 0; 231 | } 232 | 233 | // Figure out if we have a dictionary installed for the language they want 234 | // NB: Hunspell uses underscore to separate language and locale, and Win8 uses 235 | // dash - if they use the wrong one, just silently replace it for them 236 | std::string lang = language; 237 | std::replace(lang.begin(), lang.end(), '_', '-'); 238 | 239 | std::wstring wlanguage = ToWString(lang); 240 | BOOL isSupported; 241 | 242 | if (FAILED(this->spellcheckerFactory->IsSupported(wlanguage.c_str(), &isSupported))) { 243 | return false; 244 | } 245 | 246 | if (!isSupported) return false; 247 | 248 | if (FAILED(this->spellcheckerFactory->CreateSpellChecker(wlanguage.c_str(), &this->currentSpellchecker))) { 249 | return false; 250 | } 251 | 252 | IUnknown* unknown = NULL; 253 | HRESULT queryRes = this->currentSpellchecker->QueryInterface(IID_IUnknown, reinterpret_cast(&unknown)); 254 | if (FAILED(queryRes) || !unknown) { 255 | this->currentSpellchecker->Release(); 256 | this->currentSpellchecker = NULL; 257 | this->currentSpellcheckerCookie = 0; 258 | return false; 259 | } 260 | 261 | HRESULT regResult = S_OK; 262 | { 263 | Lock tableLock(this->gTableMutex); 264 | regResult = this->gTable->RegisterInterfaceInGlobal(unknown, __uuidof(ISpellChecker*), 265 | &this->currentSpellcheckerCookie); 266 | } 267 | unknown->Release(); 268 | if (FAILED(regResult) || !this->currentSpellcheckerCookie) { 269 | this->currentSpellchecker->Release(); 270 | this->currentSpellchecker = NULL; 271 | return false; 272 | } 273 | 274 | return true; 275 | } 276 | 277 | std::vector WindowsSpellchecker::GetAvailableDictionaries(const std::string& path) { 278 | HRESULT hr; 279 | 280 | if (!this->spellcheckerFactory) { 281 | return std::vector(); 282 | } 283 | 284 | IEnumString* langList; 285 | if (FAILED(hr = this->spellcheckerFactory->get_SupportedLanguages(&langList))) { 286 | return std::vector(); 287 | } 288 | 289 | std::vector ret; 290 | LPOLESTR str; 291 | while (langList->Next(1, &str, NULL) == S_OK) { 292 | std::wstring wlang; 293 | wlang.assign(str); 294 | ret.push_back(ToUTF8(wlang)); 295 | 296 | CoTaskMemFree(str); 297 | } 298 | 299 | langList->Release(); 300 | return ret; 301 | } 302 | 303 | bool WindowsSpellchecker::IsMisspelled(const std::string& word) { 304 | if (this->currentSpellchecker == NULL) { 305 | return false; 306 | } 307 | 308 | IEnumSpellingError* errors = NULL; 309 | std::wstring wword = ToWString(word); 310 | if (FAILED(this->currentSpellchecker->Check(wword.c_str(), &errors))) { 311 | return false; 312 | } 313 | 314 | bool ret; 315 | 316 | ISpellingError* dontcare; 317 | HRESULT hr = errors->Next(&dontcare); 318 | 319 | switch (hr) { 320 | case S_OK: 321 | // S_OK == There are errors to examine 322 | ret = true; 323 | dontcare->Release(); 324 | break; 325 | case S_FALSE: 326 | // Worked, but error free 327 | ret = false; 328 | break; 329 | default: 330 | // Something went pear-shaped 331 | ret = false; 332 | break; 333 | } 334 | 335 | errors->Release(); 336 | return ret; 337 | } 338 | 339 | std::vector WindowsSpellchecker::CheckSpelling(const uint16_t *text, size_t length) { 340 | return DoCheckSpelling(currentSpellchecker, text, length); 341 | } 342 | 343 | void WindowsSpellchecker::Add(const std::string& word) { 344 | if (this->currentSpellchecker == NULL) { 345 | return; 346 | } 347 | 348 | std::wstring wword = ToWString(word); 349 | this->currentSpellchecker->Add(wword.c_str()); 350 | } 351 | 352 | void WindowsSpellchecker::Remove(const std::string& word) { 353 | // NB: ISpellChecker has no way to remove words from the dictionary 354 | return; 355 | } 356 | 357 | 358 | std::vector WindowsSpellchecker::GetCorrectionsForMisspelling(const std::string& word) { 359 | if (this->currentSpellchecker == NULL) { 360 | return std::vector(); 361 | } 362 | 363 | std::wstring& wword = ToWString(word); 364 | IEnumString* words = NULL; 365 | 366 | HRESULT hr = this->currentSpellchecker->Suggest(wword.c_str(), &words); 367 | 368 | if (FAILED(hr)) { 369 | return std::vector(); 370 | } 371 | 372 | // NB: S_FALSE == word is spelled correctly 373 | if (hr == S_FALSE) { 374 | words->Release(); 375 | return std::vector(); 376 | } 377 | 378 | std::vector ret; 379 | 380 | LPOLESTR correction; 381 | while (words->Next(1, &correction, NULL) == S_OK) { 382 | std::wstring wcorr; 383 | wcorr.assign(correction); 384 | ret.push_back(ToUTF8(wcorr)); 385 | 386 | CoTaskMemFree(correction); 387 | } 388 | 389 | words->Release(); 390 | return ret; 391 | } 392 | 393 | std::unique_ptr WindowsSpellchecker::CreateThreadView() { 394 | return std::unique_ptr( 395 | new WindowsSpellcheckerThreadView(this, this->currentSpellcheckerCookie) 396 | ); 397 | } 398 | 399 | uv_mutex_t &WindowsSpellchecker::GetGlobalTableMutex() 400 | { 401 | return this->gTableMutex; 402 | } 403 | 404 | SpellcheckerImplementation* SpellcheckerFactory::CreateSpellchecker(int spellcheckerType) { 405 | bool preferHunspell = getenv("SPELLCHECKER_PREFER_HUNSPELL") && spellcheckerType != ALWAYS_USE_SYSTEM; 406 | 407 | if (spellcheckerType != ALWAYS_USE_HUNSPELL && !preferHunspell) { 408 | WindowsSpellchecker* ret = new WindowsSpellchecker(); 409 | 410 | if (ret->IsSupported()) { 411 | return ret; 412 | } 413 | 414 | delete ret; 415 | } 416 | 417 | return new HunspellSpellchecker(); 418 | } 419 | 420 | } // namespace spellchecker 421 | -------------------------------------------------------------------------------- /src/spellchecker_win.h: -------------------------------------------------------------------------------- 1 | #ifndef SRC_SPELLCHECKER_WIN_H_ 2 | #define SRC_SPELLCHECKER_WIN_H_ 3 | 4 | #define _WINSOCKAPI_ 5 | 6 | #include 7 | #include 8 | 9 | #include "spellchecker.h" 10 | 11 | namespace spellchecker { 12 | 13 | class WindowsSpellchecker; 14 | 15 | class WindowsSpellcheckerThreadView : public SpellcheckerThreadView { 16 | public: 17 | WindowsSpellcheckerThreadView(WindowsSpellchecker *impl, DWORD spellcheckerCookie); 18 | ~WindowsSpellcheckerThreadView() override; 19 | 20 | std::vector CheckSpelling(const uint16_t *text, size_t length) override; 21 | 22 | private: 23 | HRESULT initResult; 24 | ISpellChecker* spellchecker; 25 | }; 26 | 27 | class WindowsSpellchecker : public SpellcheckerImplementation { 28 | public: 29 | bool IsSupported(); 30 | 31 | WindowsSpellchecker(); 32 | ~WindowsSpellchecker(); 33 | 34 | bool SetDictionary(const std::string& language, const std::string& path); 35 | std::vector GetAvailableDictionaries(const std::string& path); 36 | 37 | std::vector GetCorrectionsForMisspelling(const std::string& word); 38 | bool IsMisspelled(const std::string& word); 39 | std::vector CheckSpelling(const uint16_t *text, size_t length); 40 | void Add(const std::string& word); 41 | void Remove(const std::string& word); 42 | 43 | std::unique_ptr CreateThreadView(); 44 | uv_mutex_t &GetGlobalTableMutex(); 45 | 46 | private: 47 | uv_mutex_t gTableMutex; 48 | bool gTableMutexOk; 49 | IGlobalInterfaceTable* gTable; 50 | DWORD currentSpellcheckerCookie; 51 | 52 | ISpellChecker* currentSpellchecker; 53 | ISpellCheckerFactory* spellcheckerFactory; 54 | }; 55 | 56 | } // namespace spellchecker 57 | 58 | #endif // SRC_SPELLCHECKER_MAC_H_ 59 | -------------------------------------------------------------------------------- /src/transcoder.h: -------------------------------------------------------------------------------- 1 | #ifndef SRC_TRANSCODER_H_ 2 | #define SRC_TRANSCODER_H_ 3 | 4 | #include 5 | #include 6 | 7 | namespace spellchecker { 8 | 9 | struct Transcoder; 10 | 11 | // Used to convert from V8's UTF-16 strings into UTF-8. 12 | Transcoder *NewUTF16ToUTF8Transcoder(); 13 | 14 | // Used to convert from UTF-8 into the dictionary-specific format. 15 | Transcoder *NewTranscoder8to8(const char *from_encoding, const char *to_encoding); 16 | 17 | void FreeTranscoder(Transcoder *); 18 | 19 | // Transcodes UTF-16 to UTF-8. 20 | bool TranscodeUTF16ToUTF8(const Transcoder *, char *out, size_t out_length, const uint16_t *in, size_t in_length); 21 | 22 | // Transcode UTF-8 to a specified format. 23 | bool Transcode8to8(const Transcoder *, char *out, size_t out_length, const char *in, size_t in_length); 24 | 25 | } // namespace spellchecker 26 | 27 | #endif // SRC_TRANSCODER_H_ 28 | -------------------------------------------------------------------------------- /src/transcoder_posix.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "buffers.h" 6 | 7 | namespace spellchecker { 8 | 9 | struct Transcoder { 10 | iconv_t conversion; 11 | }; 12 | 13 | static int IsBigEndian(void) { 14 | union { 15 | uint16_t integer; 16 | char bytes[2]; 17 | } two_byte_value; 18 | 19 | two_byte_value.integer = {0x0102}; 20 | return two_byte_value.bytes[0] == 1; 21 | } 22 | 23 | Transcoder *NewUTF16ToUTF8Transcoder() { 24 | const char *to_encoding = "UTF-8"; 25 | const char *from_encoding = IsBigEndian() ? "UTF-16BE" : "UTF-16LE"; 26 | iconv_t conversion = iconv_open(to_encoding, from_encoding); 27 | if (conversion == (iconv_t)-1) { 28 | return NULL; 29 | } 30 | 31 | Transcoder *result = new Transcoder(); 32 | result->conversion = conversion; 33 | return result; 34 | } 35 | 36 | Transcoder *NewTranscoder8to8(const char *from_encoding, const char *to_encoding) { 37 | iconv_t conversion = iconv_open(to_encoding, from_encoding); 38 | 39 | if (conversion == (iconv_t)-1) { 40 | return NULL; 41 | } 42 | 43 | Transcoder *result = new Transcoder(); 44 | result->conversion = conversion; 45 | return result; 46 | } 47 | 48 | void FreeTranscoder(Transcoder *transcoder) { 49 | iconv_close(transcoder->conversion); 50 | delete transcoder; 51 | } 52 | 53 | bool TranscodeUTF16ToUTF8(const Transcoder *transcoder, char *out, size_t out_bytes, const uint16_t *in, size_t in_length) { 54 | char *utf16_word = reinterpret_cast(const_cast(in)); 55 | size_t utf16_bytes = in_length * (sizeof(uint16_t) / sizeof(char)); 56 | 57 | size_t iconv_result = iconv( 58 | transcoder->conversion, 59 | &utf16_word, 60 | &utf16_bytes, 61 | &out, 62 | &out_bytes 63 | ); 64 | 65 | if (iconv_result == static_cast(-1)) { 66 | return false; 67 | } 68 | 69 | *out = '\0'; 70 | 71 | // Make sure the transcoded length doesn't exceed our buffers. 72 | return strlen(out) <= MAX_UTF8_BUFFER; 73 | } 74 | 75 | bool Transcode8to8(const Transcoder *transcoder, char *out, size_t out_length, const char *in, size_t in_length) { 76 | // If the transcoder is NULL, then we just copy the input buffer into the 77 | // output buffer and return the results without transcoding. We assume that 78 | // the callers had made sure the word is not longer than the output buffer. 79 | char *utf8_word = reinterpret_cast(const_cast(in)); 80 | 81 | if (!transcoder) { 82 | // Copy the string and add the terminating character. 83 | std::memcpy(out, in, in_length); 84 | out[in_length] = '\0'; 85 | } else { 86 | // We have a transcoder, so transcode the contents. 87 | size_t iconv_result = iconv( 88 | transcoder->conversion, 89 | &utf8_word, 90 | &in_length, 91 | &out, 92 | &out_length 93 | ); 94 | 95 | if (iconv_result == static_cast(-1)) { 96 | return false; 97 | } 98 | 99 | *out = '\0'; 100 | } 101 | 102 | // Make sure the transcoded length doesn't exceed our buffers. 103 | return strlen(out) <= MAX_UTF8_BUFFER; 104 | } 105 | 106 | } // namespace spellchecker 107 | -------------------------------------------------------------------------------- /src/transcoder_win.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include "transcoder.h" 7 | #include "buffers.h" 8 | 9 | namespace spellchecker { 10 | 11 | struct Transcoder { 12 | UINT from_code_page; 13 | UINT to_code_page; 14 | }; 15 | 16 | // Pulled from csutil.cxx. 17 | void toAsciiLowerAndRemoveNonAlphanumeric(const char* pName, char* pBuf) 18 | { 19 | while ( *pName ) 20 | { 21 | /* A-Z */ 22 | if ( (*pName >= 0x41) && (*pName <= 0x5A) ) 23 | { 24 | *pBuf = (*pName)+0x20; /* toAsciiLower */ 25 | pBuf++; 26 | } 27 | /* a-z, 0-9 */ 28 | else if ( ((*pName >= 0x61) && (*pName <= 0x7A)) || 29 | ((*pName >= 0x30) && (*pName <= 0x39)) ) 30 | { 31 | *pBuf = *pName; 32 | pBuf++; 33 | } 34 | 35 | pName++; 36 | } 37 | 38 | *pBuf = '\0'; 39 | } 40 | 41 | UINT GetCodePage(const char *dictionary_encoding) { 42 | // Convert into a normalized form. We have to do this because humans are so 43 | // inconsistent with their encoding lines and we want to make this as 44 | // tolerant as possible. 45 | char *encoding = new char[strlen(dictionary_encoding)+1]; 46 | toAsciiLowerAndRemoveNonAlphanumeric(dictionary_encoding, encoding); 47 | 48 | // The translation of code pages comes from: 49 | // https://docs.microsoft.com/en-us/windows/desktop/intl/code-page-identifiers 50 | if (!_stricmp(encoding, "utf8")) { 51 | return CP_UTF8; 52 | } 53 | 54 | if (!_stricmp(encoding, "iso88591")) { 55 | return 28591; 56 | } 57 | 58 | if (!_stricmp(encoding, "iso88592")) { 59 | return 28592; 60 | } 61 | 62 | if (!_stricmp(encoding, "iso88593")) { 63 | return 28593; 64 | } 65 | 66 | if (!_stricmp(encoding, "iso88594")) { 67 | return 28594; 68 | } 69 | 70 | if (!_stricmp(encoding, "iso88595")) { 71 | return 28595; 72 | } 73 | 74 | if (!_stricmp(encoding, "iso88596")) { 75 | return 28596; 76 | } 77 | 78 | if (!_stricmp(encoding, "iso88597")) { 79 | return 28597; 80 | } 81 | 82 | if (!_stricmp(encoding, "iso88598")) { 83 | return 28598; 84 | } 85 | 86 | if (!_stricmp(encoding, "iso88599")) { 87 | return 28599; 88 | } 89 | 90 | if (!_stricmp(encoding, "iso885910")) { 91 | return 28600; 92 | } 93 | 94 | if (!_stricmp(encoding, "iso885911") || 95 | !_stricmp(encoding, "tis620")) { 96 | return 28601; 97 | } 98 | 99 | if (!_stricmp(encoding, "iso885913")) { 100 | return 28603; 101 | } 102 | 103 | if (!_stricmp(encoding, "iso885914")) { 104 | return 28604; 105 | } 106 | 107 | if (!_stricmp(encoding, "iso885915")) { 108 | return 28605; 109 | } 110 | 111 | if (!_stricmp(encoding, "koi8r") || 112 | !_stricmp(encoding, "koi8u")) { 113 | return 20866; 114 | } 115 | 116 | if (!_stricmp(encoding, "cp1251") || 117 | !_stricmp(encoding, "microsoftcp1251")) { 118 | return 1251; 119 | } 120 | 121 | if (!_stricmp(encoding, "xisciide") || 122 | !_stricmp(encoding, "xisciias") || 123 | !_stricmp(encoding, "isciidevangari")) { 124 | return 57002; 125 | } 126 | 127 | return -1; 128 | } 129 | 130 | Transcoder* NewUTF16ToUTF8Transcoder() { 131 | return new Transcoder(); 132 | } 133 | 134 | Transcoder* NewTranscoder8to8(const char *from_encoding, const char *to_encoding) { 135 | Transcoder *result = new Transcoder(); 136 | result->from_code_page = GetCodePage(from_encoding); 137 | result->to_code_page = GetCodePage(to_encoding); 138 | return result; 139 | } 140 | 141 | void FreeTranscoder(Transcoder *transcoder) { 142 | delete transcoder; 143 | } 144 | 145 | bool TranscodeUTF16ToUTF8(const Transcoder *transcoder, char *out, size_t out_length, const uint16_t *in, size_t in_length) { 146 | int length = WideCharToMultiByte(CP_UTF8, 0, reinterpret_cast(in), in_length, out, out_length, NULL, NULL); 147 | out[length] = '\0'; 148 | 149 | // Make sure the transcoded length doesn't exceed our buffers. 150 | return strlen(out) <= MAX_UTF8_BUFFER; 151 | } 152 | 153 | bool Transcode8to8(const Transcoder *transcoder, char *out, size_t out_length, const char *in, size_t in_length) { 154 | // If the transcoder is NULL, then we just copy the input buffer into the 155 | // output buffer and return the results without transcoding. We assume that 156 | // the callers had made sure the word is not longer than the output buffer. 157 | if (!transcoder) { 158 | // Copy the string and add the terminating character. 159 | std::memcpy(out, in, in_length); 160 | out[in_length] = '\0'; 161 | } else { 162 | // There is no easy way to convert from these two formats, so we convert from 163 | // the input format into UTF-16 (wstring) first which appears to be the "right" 164 | // way of doing this. 165 | std::vector utf16_buffer(256); 166 | int utf16_length; 167 | 168 | if (transcoder->from_code_page < 0) { 169 | return false; 170 | } 171 | 172 | utf16_length = MultiByteToWideChar(transcoder->from_code_page, 0, in, in_length, (LPWSTR)utf16_buffer.data(), utf16_buffer.size()); 173 | 174 | // From the UTF-16 string, we convert it into our new page. 175 | // With the outgoing format, we need to convert it into something from the 176 | // wstring. 177 | int length = WideCharToMultiByte(transcoder->to_code_page, 0, reinterpret_cast(utf16_buffer.data()), utf16_length, out, out_length, NULL, NULL); 178 | out[length] = '\0'; 179 | } 180 | 181 | // Make sure the transcoded length doesn't exceed our buffers. 182 | return strlen(out) <= MAX_UTF8_BUFFER; 183 | } 184 | 185 | } // namespace spellchecker 186 | -------------------------------------------------------------------------------- /src/worker.cc: -------------------------------------------------------------------------------- 1 | #include "worker.h" 2 | 3 | #include "nan.h" 4 | #include "spellchecker.h" 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | CheckSpellingWorker::CheckSpellingWorker( 11 | std::vector&& corpus, 12 | SpellcheckerImplementation* impl, 13 | Nan::Callback* callback 14 | ) : AsyncWorker(callback), corpus(std::move(corpus)), impl(impl) 15 | { 16 | // No-op 17 | } 18 | 19 | CheckSpellingWorker::~CheckSpellingWorker() 20 | { 21 | // No-op 22 | } 23 | 24 | void CheckSpellingWorker::Execute() { 25 | std::unique_ptr view = impl->CreateThreadView(); 26 | misspelled_ranges = view->CheckSpelling(corpus.data(), corpus.size()); 27 | } 28 | 29 | void CheckSpellingWorker::HandleOKCallback() { 30 | Nan::HandleScope scope; 31 | 32 | v8::Local context = Nan::GetCurrentContext(); 33 | Local result = Nan::New(); 34 | for (auto iter = misspelled_ranges.begin(); iter != misspelled_ranges.end(); ++iter) { 35 | size_t index = iter - misspelled_ranges.begin(); 36 | uint32_t start = iter->start, end = iter->end; 37 | 38 | Local misspelled_range = Nan::New(); 39 | misspelled_range->Set(context, Nan::New("start").ToLocalChecked(), Nan::New(start)); 40 | misspelled_range->Set(context, Nan::New("end").ToLocalChecked(), Nan::New(end)); 41 | result->Set(context, index, misspelled_range); 42 | } 43 | 44 | Local argv[] = { Nan::Null(), result }; 45 | callback->Call(2, argv); 46 | } 47 | -------------------------------------------------------------------------------- /src/worker.h: -------------------------------------------------------------------------------- 1 | #ifndef WORKER_H 2 | #define WORKER_H 3 | 4 | #include "nan.h" 5 | #include "spellchecker.h" 6 | 7 | #include 8 | 9 | using namespace spellchecker; 10 | using namespace v8; 11 | 12 | class CheckSpellingWorker : public Nan::AsyncWorker { 13 | public: 14 | CheckSpellingWorker(std::vector &&corpus, SpellcheckerImplementation* impl, Nan::Callback* callback); 15 | ~CheckSpellingWorker(); 16 | 17 | void Execute(); 18 | void HandleOKCallback(); 19 | private: 20 | const std::vector corpus; 21 | SpellcheckerImplementation* impl; 22 | std::vector misspelled_ranges; 23 | }; 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /vendor/hunspell/AUTHORS: -------------------------------------------------------------------------------- 1 | Author of Hunspell: 2 | Németh László nemeth (at) OpenOffice.org 3 | 4 | Hunspell based on OpenOffice.org's Myspell. MySpell's author: 5 | Kevin Hendricks kevin.hendricks (at) sympatico.ca 6 | -------------------------------------------------------------------------------- /vendor/hunspell/BUGS: -------------------------------------------------------------------------------- 1 | * Interactive interface has some visualization problem with long lines 2 | 3 | * Experimental -U, -u options don't support Unicode. 4 | 5 | * Compound handling is not thread safe in Hungarian specific code. 6 | -------------------------------------------------------------------------------- /vendor/hunspell/COPYING: -------------------------------------------------------------------------------- 1 | GPL 2.0/LGPL 2.1/MPL 1.1 tri-license 2 | 3 | The contents of this software may be used under the terms of 4 | the GNU General Public License Version 2 or later (the "GPL"), or 5 | the GNU Lesser General Public License Version 2.1 or later (the "LGPL", 6 | see COPYING.LGPL) or (excepting the LGPLed GNU gettext library in the 7 | intl/ directory) the Mozilla Public License Version 1.1 or later 8 | (the "MPL", see COPYING.MPL). 9 | 10 | Software distributed under these licenses is distributed on an "AS IS" basis, 11 | WITHOUT WARRANTY OF ANY KIND, either express or implied. See the licences 12 | for the specific language governing rights and limitations under the licenses. 13 | -------------------------------------------------------------------------------- /vendor/hunspell/README: -------------------------------------------------------------------------------- 1 | About Hunspell 2 | -------------- 3 | 4 | Hunspell is a spell checker and morphological analyzer library and program 5 | designed for languages with rich morphology and complex word compounding or 6 | character encoding. Hunspell interfaces: Ispell-like terminal interface 7 | using Curses library, Ispell pipe interface, OpenOffice.org UNO module. 8 | 9 | Hunspell's code base comes from the OpenOffice.org MySpell 10 | (http://lingucomponent.openoffice.org/MySpell-3.zip). See README.MYSPELL, 11 | AUTHORS.MYSPELL and license.myspell files. 12 | Hunspell is designed to eventually replace Myspell in OpenOffice.org. 13 | 14 | Main features of Hunspell spell checker and morphological analyzer: 15 | 16 | - Unicode support (affix rules work only with the first 65535 Unicode characters) 17 | 18 | - Morphological analysis (in custom item and arrangement style) and stemming 19 | 20 | - Max. 65535 affix classes and twofold affix stripping (for agglutinative 21 | languages, like Azeri, Basque, Estonian, Finnish, Hungarian, Turkish, etc.) 22 | 23 | - Support complex compoundings (for example, Hungarian and German) 24 | 25 | - Support language specific features (for example, special casing of 26 | Azeri and Turkish dotted i, or German sharp s) 27 | 28 | - Handle conditional affixes, circumfixes, fogemorphemes, 29 | forbidden words, pseudoroots and homonyms. 30 | 31 | - Free software (LGPL, GPL, MPL tri-license) 32 | 33 | Compiling on Unix/Linux 34 | ----------------------- 35 | 36 | ./configure 37 | make 38 | make install 39 | 40 | For dictionary development, use the --with-warnings option of configure. 41 | 42 | For interactive user interface of Hunspell executable, use the --with-ui option. 43 | 44 | The developer packages you need to compile Hunspell's interface: 45 | 46 | glibc-devel 47 | 48 | optional developer packages: 49 | 50 | ncurses (need for --with-ui) 51 | readline (for fancy input line editing, 52 | configure parameter: --with-readline) 53 | locale and gettext (but you can also use the 54 | --with-included-gettext configure parameter) 55 | 56 | Hunspell distribution uses new Autoconf (2.59) and Automake (1.9). 57 | 58 | Compiling on Windows 59 | -------------------- 60 | 61 | 1. Compiling with Windows SDK 62 | 63 | Download the free Windows SDK of Microsoft, open a command prompt 64 | window and cd into hunspell/src/win_api. Use the following command 65 | to compile hunspell: 66 | 67 | vcbuild 68 | 69 | 2. Compiling in Cygwin environment 70 | 71 | Download and install Cygwin environment for Windows with the following 72 | extra packages: 73 | 74 | make 75 | gcc-g++ development package 76 | mingw development package (for cygwin.dll free native Windows compilation) 77 | ncurses, readline (for user interface) 78 | iconv (character conversion) 79 | 80 | 2.1. Cygwin1.dll dependent compiling 81 | 82 | Open a Cygwin shell, cd into the hunspell root directory: 83 | 84 | ./configure 85 | make 86 | make install 87 | 88 | For dictionary development, use the --with-warnings option of configure. 89 | 90 | For interactive user interface of Hunspell executable, use the --with-ui option. 91 | 92 | readline configure parameter: --with-readline (for fancy input line editing) 93 | 94 | 1.2. Cygwin1.dll free compiling 95 | 96 | Open a Cygwin shell, cd into the hunspell/src/win_api and 97 | 98 | make -f Makefile.cygwin 99 | 100 | Testing 101 | ------- 102 | 103 | Testing Hunspell (see tests in tests/ subdirectory): 104 | 105 | make check 106 | 107 | or with Valgrind debugger: 108 | 109 | make check 110 | VALGRIND=[Valgrind_tool] make check 111 | 112 | For example: 113 | 114 | make check 115 | VALGRIND=memcheck make check 116 | 117 | Documentation 118 | ------------- 119 | 120 | features and dictionary format: 121 | man 4 hunspell 122 | 123 | man hunspell 124 | hunspell -h 125 | http://hunspell.sourceforge.net 126 | 127 | Usage 128 | ----- 129 | 130 | The src/tools dictionary contains ten executables after compiling 131 | (or some of them are in the src/win_api): 132 | 133 | affixcompress: dictionary generation from large (millions of words) vocabularies 134 | analyze: example of spell checking, stemming and morphological analysis 135 | chmorph: example of automatic morphological generation and conversion 136 | example: example of spell checking and suggestion 137 | hunspell: main program for spell checking and others (see manual) 138 | hunzip: decompressor of hzip format 139 | hzip: compressor of hzip format 140 | makealias: alias compression (Hunspell only, not back compatible with MySpell) 141 | munch: dictionary generation from vocabularies (it needs an affix file, too). 142 | unmunch: list all recognized words of a MySpell dictionary 143 | wordforms: word generation (Hunspell version of unmunch) 144 | 145 | After compiling and installing (see INSTALL) you can 146 | run the Hunspell spell checker (compiled with user interface) 147 | with a Hunspell or Myspell dictionary: 148 | 149 | hunspell -d en_US text.txt 150 | 151 | or without interface: 152 | 153 | hunspell 154 | hunspell -d en_UK -l 164 | 165 | Linking with Hunspell static library: 166 | g++ -lhunspell example.cxx 167 | 168 | Dictionaries 169 | ------------ 170 | 171 | Myspell & Hunspell dictionaries: 172 | http://wiki.services.openoffice.org/wiki/Dictionaries 173 | 174 | Aspell dictionaries (need some conversion): 175 | ftp://ftp.gnu.org/gnu/aspell/dict 176 | Conversion steps: see relevant feature request at http://hunspell.sf.net. 177 | 178 | László Németh 179 | nemeth at OOo 180 | -------------------------------------------------------------------------------- /vendor/hunspell/THANKS: -------------------------------------------------------------------------------- 1 | Many thanks to the following contributors and supporters: 2 | 3 | Mehmet Akin 4 | Göran Andersson 5 | Lars Aronsson 6 | Ruud Baars 7 | Bartkó Zoltán 8 | Mathias Bauer 9 | Bencsáth Boldizsár 10 | Bíró Árpád 11 | Ingo H. de Boer 12 | Simon Brouwer 13 | Jeppe Bundsgaard 14 | Ginn Chen 15 | Aaron Digulla 16 | Dmitri Gabinski 17 | Dvornik László 18 | David Einstein 19 | Rene Engelhard 20 | Frederik Fouvry 21 | Flemming Frandsen 22 | Serge Gautherie 23 | Marek Gleń 24 | Gavins at OOo 25 | Gefferth András 26 | Godó Ferenc 27 | Goldman Eleonóra 28 | Steinar H. Gunderson 29 | Halácsy Péter 30 | Chris Halls 31 | Khaled Hosny 32 | Izsók András 33 | Björn Jacke 34 | Mike Tian-Jian Jiang 35 | Dafydd Jones 36 | Ryan Jones 37 | Jean-Christophe Helary 38 | Kevin Hendricks 39 | Martin Hollmichel 40 | Pavel Janík 41 | John Winters 42 | Mohamed Kebdani 43 | Kelemen Gábor 44 | Shewangizaw Gulilat 45 | Kéménczy Kálmán 46 | Dan Kenigsberg 47 | Pham Ngoc Khanh 48 | Khiraly László 49 | Koblinger Egmont 50 | Kornai András 51 | Tor Lillqvist 52 | Christian Lohmaier 53 | Robert Longson 54 | Marot at SF dot net 55 | Mark McClain 56 | Caolan McNamara 57 | Michael Meeks 58 | Moheb Mekhaiel 59 | Laurie Mercer 60 | Ladislav Michnovič 61 | Ellis Miller 62 | Giuseppe Modugno 63 | János Mohácsi 64 | Bram Moolenaar 65 | Daniel Naber 66 | Nagy Viktor 67 | John Nisly 68 | Noll János 69 | S Page 70 | Christophe Paris 71 | Malcolm Parsons 72 | Sylvain Paschein 73 | Volkov Peter 74 | Bryan Petty 75 | Harri Pitkänen 76 | Davide Prina 77 | Kevin F. Quinn 78 | Erdal Ronahi 79 | Olivier Ronez 80 | Bernhard Rosenkraenzer 81 | Sarlós Tamás 82 | Thobias Schlemmer 83 | Jan Seeger 84 | Jose da Silva 85 | Paulo Ney de Souza 86 | Roland Smith 87 | Munzir Taha 88 | Timeless at bemail dot org 89 | Tímár András 90 | Tonal at OOo 91 | Török László 92 | Trón Viktor 93 | Gianluca Turconi 94 | Ryan VanderMeulen 95 | Varga Dániel 96 | Elio Voci 97 | Miha Vrhovnik 98 | Martijn Wargers 99 | Michel Weimerskirch 100 | Brett Wilson 101 | Friedel Wolff 102 | Daniel Yacob 103 | Gábor Zahemszky 104 | Taha Zerrouki 105 | and others (see also AUTHORS.myspell) 106 | 107 | FSF.hu Foundation 108 | http://www.fsf.hu 109 | 110 | MOKK Research Centre 111 | Budapest University of Technology and Economics 112 | Sociology and Communications Department 113 | http://www.mokk.bme.hu 114 | 115 | Hungarian Ministry of Informatics and Telecommunications 116 | 117 | IMEDIA Kft. 118 | http://www.imedia.hu 119 | 120 | OpenOffice.org community 121 | http://www.openoffice.org 122 | 123 | OpenTaal Foundation, Netherlands and 124 | Dutch Language Union (Nederlandse Taalunie) 125 | http://opentaal.org 126 | 127 | UHU-Linux Kft. 128 | 129 | Thanks, 130 | 131 | Németh László 132 | nemeth at OOo 133 | -------------------------------------------------------------------------------- /vendor/hunspell/src/hunspell/README: -------------------------------------------------------------------------------- 1 | Hunspell spell checker and morphological analyser library 2 | 3 | Documentation, tests, examples: http://hunspell.sourceforge.net 4 | 5 | Author of Hunspell: 6 | László Németh (nemethl (at) gyorsposta.hu) 7 | 8 | Hunspell based on OpenOffice.org's Myspell. MySpell's author: 9 | Kevin Hendricks (kevin.hendricks (at) sympatico.ca) 10 | 11 | License: GPL 2.0/LGPL 2.1/MPL 1.1 tri-license 12 | 13 | The contents of this library may be used under the terms of 14 | the GNU General Public License Version 2 or later (the "GPL"), or 15 | the GNU Lesser General Public License Version 2.1 or later (the "LGPL", 16 | see http://gnu.org/copyleft/lesser.html) or the Mozilla Public License 17 | Version 1.1 or later (the "MPL", see http://mozilla.org/MPL/MPL-1.1.html). 18 | 19 | Software distributed under these licenses is distributed on an "AS IS" basis, 20 | WITHOUT WARRANTY OF ANY KIND, either express or implied. See the licences 21 | for the specific language governing rights and limitations under the licenses. 22 | -------------------------------------------------------------------------------- /vendor/hunspell/src/hunspell/affentry.hxx: -------------------------------------------------------------------------------- 1 | #ifndef _AFFIX_HXX_ 2 | #define _AFFIX_HXX_ 3 | 4 | #include "hunvisapi.h" 5 | 6 | #include "atypes.hxx" 7 | #include "baseaffix.hxx" 8 | #include "affixmgr.hxx" 9 | 10 | /* A Prefix Entry */ 11 | 12 | class LIBHUNSPELL_DLL_EXPORTED PfxEntry : protected AffEntry 13 | { 14 | AffixMgr* pmyMgr; 15 | 16 | PfxEntry * next; 17 | PfxEntry * nexteq; 18 | PfxEntry * nextne; 19 | PfxEntry * flgnxt; 20 | 21 | public: 22 | 23 | PfxEntry(AffixMgr* pmgr, affentry* dp ); 24 | ~PfxEntry(); 25 | 26 | inline bool allowCross() { return ((opts & aeXPRODUCT) != 0); } 27 | struct hentry * checkword(const char * word, int len, char in_compound, 28 | const FLAG needflag = FLAG_NULL); 29 | 30 | struct hentry * check_twosfx(const char * word, int len, char in_compound, const FLAG needflag = NULL); 31 | 32 | char * check_morph(const char * word, int len, char in_compound, 33 | const FLAG needflag = FLAG_NULL); 34 | 35 | char * check_twosfx_morph(const char * word, int len, 36 | char in_compound, const FLAG needflag = FLAG_NULL); 37 | 38 | inline FLAG getFlag() { return aflag; } 39 | inline const char * getKey() { return appnd; } 40 | char * add(const char * word, int len); 41 | 42 | inline short getKeyLen() { return appndl; } 43 | 44 | inline const char * getMorph() { return morphcode; } 45 | 46 | inline const unsigned short * getCont() { return contclass; } 47 | inline short getContLen() { return contclasslen; } 48 | 49 | inline PfxEntry * getNext() { return next; } 50 | inline PfxEntry * getNextNE() { return nextne; } 51 | inline PfxEntry * getNextEQ() { return nexteq; } 52 | inline PfxEntry * getFlgNxt() { return flgnxt; } 53 | 54 | inline void setNext(PfxEntry * ptr) { next = ptr; } 55 | inline void setNextNE(PfxEntry * ptr) { nextne = ptr; } 56 | inline void setNextEQ(PfxEntry * ptr) { nexteq = ptr; } 57 | inline void setFlgNxt(PfxEntry * ptr) { flgnxt = ptr; } 58 | 59 | inline char * nextchar(char * p); 60 | inline int test_condition(const char * st); 61 | }; 62 | 63 | 64 | 65 | 66 | /* A Suffix Entry */ 67 | 68 | class LIBHUNSPELL_DLL_EXPORTED SfxEntry : protected AffEntry 69 | { 70 | AffixMgr* pmyMgr; 71 | char * rappnd; 72 | 73 | SfxEntry * next; 74 | SfxEntry * nexteq; 75 | SfxEntry * nextne; 76 | SfxEntry * flgnxt; 77 | 78 | SfxEntry * l_morph; 79 | SfxEntry * r_morph; 80 | SfxEntry * eq_morph; 81 | 82 | public: 83 | 84 | SfxEntry(AffixMgr* pmgr, affentry* dp ); 85 | ~SfxEntry(); 86 | 87 | inline bool allowCross() { return ((opts & aeXPRODUCT) != 0); } 88 | struct hentry * checkword(const char * word, int len, int optflags, 89 | PfxEntry* ppfx, char ** wlst, int maxSug, int * ns, 90 | // const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, char in_compound=IN_CPD_NOT); 91 | const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, const FLAG badflag = 0); 92 | 93 | struct hentry * check_twosfx(const char * word, int len, int optflags, PfxEntry* ppfx, const FLAG needflag = NULL); 94 | 95 | char * check_twosfx_morph(const char * word, int len, int optflags, 96 | PfxEntry* ppfx, const FLAG needflag = FLAG_NULL); 97 | struct hentry * get_next_homonym(struct hentry * he); 98 | struct hentry * get_next_homonym(struct hentry * word, int optflags, PfxEntry* ppfx, 99 | const FLAG cclass, const FLAG needflag); 100 | 101 | 102 | inline FLAG getFlag() { return aflag; } 103 | inline const char * getKey() { return rappnd; } 104 | char * add(const char * word, int len); 105 | 106 | 107 | inline const char * getMorph() { return morphcode; } 108 | 109 | inline const unsigned short * getCont() { return contclass; } 110 | inline short getContLen() { return contclasslen; } 111 | inline const char * getAffix() { return appnd; } 112 | 113 | inline short getKeyLen() { return appndl; } 114 | 115 | inline SfxEntry * getNext() { return next; } 116 | inline SfxEntry * getNextNE() { return nextne; } 117 | inline SfxEntry * getNextEQ() { return nexteq; } 118 | 119 | inline SfxEntry * getLM() { return l_morph; } 120 | inline SfxEntry * getRM() { return r_morph; } 121 | inline SfxEntry * getEQM() { return eq_morph; } 122 | inline SfxEntry * getFlgNxt() { return flgnxt; } 123 | 124 | inline void setNext(SfxEntry * ptr) { next = ptr; } 125 | inline void setNextNE(SfxEntry * ptr) { nextne = ptr; } 126 | inline void setNextEQ(SfxEntry * ptr) { nexteq = ptr; } 127 | inline void setFlgNxt(SfxEntry * ptr) { flgnxt = ptr; } 128 | 129 | inline char * nextchar(char * p); 130 | inline int test_condition(const char * st, const char * begin); 131 | 132 | }; 133 | 134 | #endif 135 | 136 | 137 | -------------------------------------------------------------------------------- /vendor/hunspell/src/hunspell/affixmgr.hxx: -------------------------------------------------------------------------------- 1 | #ifndef _AFFIXMGR_HXX_ 2 | #define _AFFIXMGR_HXX_ 3 | 4 | #include "hunvisapi.h" 5 | 6 | #include 7 | 8 | #include "atypes.hxx" 9 | #include "baseaffix.hxx" 10 | #include "hashmgr.hxx" 11 | #include "phonet.hxx" 12 | #include "replist.hxx" 13 | 14 | // check flag duplication 15 | #define dupSFX (1 << 0) 16 | #define dupPFX (1 << 1) 17 | 18 | class PfxEntry; 19 | class SfxEntry; 20 | 21 | class LIBHUNSPELL_DLL_EXPORTED AffixMgr 22 | { 23 | 24 | PfxEntry * pStart[SETSIZE]; 25 | SfxEntry * sStart[SETSIZE]; 26 | PfxEntry * pFlag[SETSIZE]; 27 | SfxEntry * sFlag[SETSIZE]; 28 | HashMgr * pHMgr; 29 | HashMgr ** alldic; 30 | int * maxdic; 31 | char * keystring; 32 | char * trystring; 33 | char * encoding; 34 | struct cs_info * csconv; 35 | int utf8; 36 | int complexprefixes; 37 | FLAG compoundflag; 38 | FLAG compoundbegin; 39 | FLAG compoundmiddle; 40 | FLAG compoundend; 41 | FLAG compoundroot; 42 | FLAG compoundforbidflag; 43 | FLAG compoundpermitflag; 44 | int checkcompounddup; 45 | int checkcompoundrep; 46 | int checkcompoundcase; 47 | int checkcompoundtriple; 48 | int simplifiedtriple; 49 | FLAG forbiddenword; 50 | FLAG nosuggest; 51 | FLAG nongramsuggest; 52 | FLAG needaffix; 53 | int cpdmin; 54 | int numrep; 55 | replentry * reptable; 56 | RepList * iconvtable; 57 | RepList * oconvtable; 58 | int nummap; 59 | mapentry * maptable; 60 | int numbreak; 61 | char ** breaktable; 62 | int numcheckcpd; 63 | patentry * checkcpdtable; 64 | int simplifiedcpd; 65 | int numdefcpd; 66 | flagentry * defcpdtable; 67 | phonetable * phone; 68 | int maxngramsugs; 69 | int maxcpdsugs; 70 | int maxdiff; 71 | int onlymaxdiff; 72 | int nosplitsugs; 73 | int sugswithdots; 74 | int cpdwordmax; 75 | int cpdmaxsyllable; 76 | char * cpdvowels; 77 | w_char * cpdvowels_utf16; 78 | int cpdvowels_utf16_len; 79 | char * cpdsyllablenum; 80 | const char * pfxappnd; // BUG: not stateless 81 | const char * sfxappnd; // BUG: not stateless 82 | FLAG sfxflag; // BUG: not stateless 83 | char * derived; // BUG: not stateless 84 | SfxEntry * sfx; // BUG: not stateless 85 | PfxEntry * pfx; // BUG: not stateless 86 | int checknum; 87 | char * wordchars; 88 | unsigned short * wordchars_utf16; 89 | int wordchars_utf16_len; 90 | char * ignorechars; 91 | unsigned short * ignorechars_utf16; 92 | int ignorechars_utf16_len; 93 | char * version; 94 | char * lang; 95 | int langnum; 96 | FLAG lemma_present; 97 | FLAG circumfix; 98 | FLAG onlyincompound; 99 | FLAG keepcase; 100 | FLAG forceucase; 101 | FLAG warn; 102 | int forbidwarn; 103 | FLAG substandard; 104 | int checksharps; 105 | int fullstrip; 106 | 107 | int havecontclass; // boolean variable 108 | char contclasses[CONTSIZE]; // flags of possible continuing classes (twofold affix) 109 | 110 | public: 111 | 112 | AffixMgr(const char * affpath, HashMgr** ptr, int * md, 113 | const char * key = NULL); 114 | ~AffixMgr(); 115 | struct hentry * affix_check(const char * word, int len, 116 | const unsigned short needflag = (unsigned short) 0, 117 | char in_compound = IN_CPD_NOT); 118 | struct hentry * prefix_check(const char * word, int len, 119 | char in_compound, const FLAG needflag = FLAG_NULL); 120 | inline int isSubset(const char * s1, const char * s2); 121 | struct hentry * prefix_check_twosfx(const char * word, int len, 122 | char in_compound, const FLAG needflag = FLAG_NULL); 123 | inline int isRevSubset(const char * s1, const char * end_of_s2, int len); 124 | struct hentry * suffix_check(const char * word, int len, int sfxopts, 125 | PfxEntry* ppfx, char ** wlst, int maxSug, int * ns, 126 | const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, 127 | char in_compound = IN_CPD_NOT); 128 | struct hentry * suffix_check_twosfx(const char * word, int len, 129 | int sfxopts, PfxEntry* ppfx, const FLAG needflag = FLAG_NULL); 130 | 131 | char * affix_check_morph(const char * word, int len, 132 | const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT); 133 | char * prefix_check_morph(const char * word, int len, 134 | char in_compound, const FLAG needflag = FLAG_NULL); 135 | char * suffix_check_morph (const char * word, int len, int sfxopts, 136 | PfxEntry * ppfx, const FLAG cclass = FLAG_NULL, 137 | const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT); 138 | 139 | char * prefix_check_twosfx_morph(const char * word, int len, 140 | char in_compound, const FLAG needflag = FLAG_NULL); 141 | char * suffix_check_twosfx_morph(const char * word, int len, 142 | int sfxopts, PfxEntry * ppfx, const FLAG needflag = FLAG_NULL); 143 | 144 | char * morphgen(char * ts, int wl, const unsigned short * ap, 145 | unsigned short al, char * morph, char * targetmorph, int level); 146 | 147 | int expand_rootword(struct guessword * wlst, int maxn, const char * ts, 148 | int wl, const unsigned short * ap, unsigned short al, char * bad, 149 | int, char *); 150 | 151 | short get_syllable (const char * word, int wlen); 152 | int cpdrep_check(const char * word, int len); 153 | int cpdpat_check(const char * word, int len, hentry * r1, hentry * r2, 154 | const char affixed); 155 | int defcpd_check(hentry *** words, short wnum, hentry * rv, 156 | hentry ** rwords, char all); 157 | int cpdcase_check(const char * word, int len); 158 | inline int candidate_check(const char * word, int len); 159 | void setcminmax(int * cmin, int * cmax, const char * word, int len); 160 | struct hentry * compound_check(const char * word, int len, short wordnum, 161 | short numsyllable, short maxwordnum, short wnum, hentry ** words, 162 | char hu_mov_rule, char is_sug, int * info); 163 | 164 | int compound_check_morph(const char * word, int len, short wordnum, 165 | short numsyllable, short maxwordnum, short wnum, hentry ** words, 166 | char hu_mov_rule, char ** result, char * partresult); 167 | 168 | struct hentry * lookup(const char * word); 169 | int get_numrep() const; 170 | struct replentry * get_reptable() const; 171 | RepList * get_iconvtable() const; 172 | RepList * get_oconvtable() const; 173 | struct phonetable * get_phonetable() const; 174 | int get_nummap() const; 175 | struct mapentry * get_maptable() const; 176 | int get_numbreak() const; 177 | char ** get_breaktable() const; 178 | char * get_encoding(); 179 | int get_langnum() const; 180 | char * get_key_string(); 181 | char * get_try_string() const; 182 | const char * get_wordchars() const; 183 | unsigned short * get_wordchars_utf16(int * len) const; 184 | char * get_ignore() const; 185 | unsigned short * get_ignore_utf16(int * len) const; 186 | int get_compound() const; 187 | FLAG get_compoundflag() const; 188 | FLAG get_compoundbegin() const; 189 | FLAG get_forbiddenword() const; 190 | FLAG get_nosuggest() const; 191 | FLAG get_nongramsuggest() const; 192 | FLAG get_needaffix() const; 193 | FLAG get_onlyincompound() const; 194 | FLAG get_compoundroot() const; 195 | FLAG get_lemma_present() const; 196 | int get_checknum() const; 197 | const char * get_prefix() const; 198 | const char * get_suffix() const; 199 | const char * get_derived() const; 200 | const char * get_version() const; 201 | int have_contclass() const; 202 | int get_utf8() const; 203 | int get_complexprefixes() const; 204 | char * get_suffixed(char ) const; 205 | int get_maxngramsugs() const; 206 | int get_maxcpdsugs() const; 207 | int get_maxdiff() const; 208 | int get_onlymaxdiff() const; 209 | int get_nosplitsugs() const; 210 | int get_sugswithdots(void) const; 211 | FLAG get_keepcase(void) const; 212 | FLAG get_forceucase(void) const; 213 | FLAG get_warn(void) const; 214 | int get_forbidwarn(void) const; 215 | int get_checksharps(void) const; 216 | char * encode_flag(unsigned short aflag) const; 217 | int get_fullstrip() const; 218 | 219 | private: 220 | int parse_file(const char * affpath, const char * key); 221 | int parse_flag(char * line, unsigned short * out, FileMgr * af); 222 | int parse_num(char * line, int * out, FileMgr * af); 223 | int parse_cpdsyllable(char * line, FileMgr * af); 224 | int parse_reptable(char * line, FileMgr * af); 225 | int parse_convtable(char * line, FileMgr * af, RepList ** rl, const char * keyword); 226 | int parse_phonetable(char * line, FileMgr * af); 227 | int parse_maptable(char * line, FileMgr * af); 228 | int parse_breaktable(char * line, FileMgr * af); 229 | int parse_checkcpdtable(char * line, FileMgr * af); 230 | int parse_defcpdtable(char * line, FileMgr * af); 231 | int parse_affix(char * line, const char at, FileMgr * af, char * dupflags); 232 | 233 | void reverse_condition(char *); 234 | void debugflag(char * result, unsigned short flag); 235 | int condlen(char *); 236 | int encodeit(affentry &entry, char * cs); 237 | int build_pfxtree(PfxEntry* pfxptr); 238 | int build_sfxtree(SfxEntry* sfxptr); 239 | int process_pfx_order(); 240 | int process_sfx_order(); 241 | PfxEntry * process_pfx_in_order(PfxEntry * ptr, PfxEntry * nptr); 242 | SfxEntry * process_sfx_in_order(SfxEntry * ptr, SfxEntry * nptr); 243 | int process_pfx_tree_to_list(); 244 | int process_sfx_tree_to_list(); 245 | int redundant_condition(char, char * strip, int stripl, 246 | const char * cond, int); 247 | }; 248 | 249 | #endif 250 | 251 | -------------------------------------------------------------------------------- /vendor/hunspell/src/hunspell/atypes.hxx: -------------------------------------------------------------------------------- 1 | #ifndef _ATYPES_HXX_ 2 | #define _ATYPES_HXX_ 3 | 4 | #ifndef HUNSPELL_WARNING 5 | #include 6 | #ifdef HUNSPELL_WARNING_ON 7 | #define HUNSPELL_WARNING fprintf 8 | #else 9 | // empty inline function to switch off warnings (instead of the C99 standard variadic macros) 10 | static inline void HUNSPELL_WARNING(FILE *, const char *, ...) {} 11 | #endif 12 | #endif 13 | 14 | // HUNSTEM def. 15 | #define HUNSTEM 16 | 17 | #include "hashmgr.hxx" 18 | #include "w_char.hxx" 19 | 20 | #define SETSIZE 256 21 | #define CONTSIZE 65536 22 | #define MAXWORDLEN 100 23 | #define MAXWORDUTF8LEN 256 24 | 25 | // affentry options 26 | #define aeXPRODUCT (1 << 0) 27 | #define aeUTF8 (1 << 1) 28 | #define aeALIASF (1 << 2) 29 | #define aeALIASM (1 << 3) 30 | #define aeLONGCOND (1 << 4) 31 | 32 | // compound options 33 | #define IN_CPD_NOT 0 34 | #define IN_CPD_BEGIN 1 35 | #define IN_CPD_END 2 36 | #define IN_CPD_OTHER 3 37 | 38 | // info options 39 | #define SPELL_COMPOUND (1 << 0) 40 | #define SPELL_FORBIDDEN (1 << 1) 41 | #define SPELL_ALLCAP (1 << 2) 42 | #define SPELL_NOCAP (1 << 3) 43 | #define SPELL_INITCAP (1 << 4) 44 | #define SPELL_ORIGCAP (1 << 5) 45 | #define SPELL_WARN (1 << 6) 46 | 47 | #define MAXLNLEN 8192 48 | 49 | #define MINCPDLEN 3 50 | #define MAXCOMPOUND 10 51 | #define MAXCONDLEN 20 52 | #define MAXCONDLEN_1 (MAXCONDLEN - sizeof(char *)) 53 | 54 | #define MAXACC 1000 55 | 56 | #define FLAG unsigned short 57 | #define FLAG_NULL 0x00 58 | #define FREE_FLAG(a) a = 0 59 | 60 | #define TESTAFF( a, b , c ) flag_bsearch((unsigned short *) a, (unsigned short) b, c) 61 | 62 | struct affentry 63 | { 64 | char * strip; 65 | char * appnd; 66 | unsigned char stripl; 67 | unsigned char appndl; 68 | char numconds; 69 | char opts; 70 | unsigned short aflag; 71 | unsigned short * contclass; 72 | short contclasslen; 73 | union { 74 | char conds[MAXCONDLEN]; 75 | struct { 76 | char conds1[MAXCONDLEN_1]; 77 | char * conds2; 78 | } l; 79 | } c; 80 | char * morphcode; 81 | }; 82 | 83 | struct guessword { 84 | char * word; 85 | bool allow; 86 | char * orig; 87 | }; 88 | 89 | struct mapentry { 90 | char ** set; 91 | int len; 92 | }; 93 | 94 | struct flagentry { 95 | FLAG * def; 96 | int len; 97 | }; 98 | 99 | struct patentry { 100 | char * pattern; 101 | char * pattern2; 102 | char * pattern3; 103 | FLAG cond; 104 | FLAG cond2; 105 | }; 106 | 107 | #endif 108 | -------------------------------------------------------------------------------- /vendor/hunspell/src/hunspell/baseaffix.hxx: -------------------------------------------------------------------------------- 1 | #ifndef _BASEAFF_HXX_ 2 | #define _BASEAFF_HXX_ 3 | 4 | #include "hunvisapi.h" 5 | 6 | class LIBHUNSPELL_DLL_EXPORTED AffEntry 7 | { 8 | protected: 9 | char * appnd; 10 | char * strip; 11 | unsigned char appndl; 12 | unsigned char stripl; 13 | char numconds; 14 | char opts; 15 | unsigned short aflag; 16 | union { 17 | char conds[MAXCONDLEN]; 18 | struct { 19 | char conds1[MAXCONDLEN_1]; 20 | char * conds2; 21 | } l; 22 | } c; 23 | char * morphcode; 24 | unsigned short * contclass; 25 | short contclasslen; 26 | }; 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /vendor/hunspell/src/hunspell/csutil.hxx: -------------------------------------------------------------------------------- 1 | #ifndef __CSUTILHXX__ 2 | #define __CSUTILHXX__ 3 | 4 | #include "hunvisapi.h" 5 | 6 | // First some base level utility routines 7 | 8 | #include 9 | #include "w_char.hxx" 10 | #include "htypes.hxx" 11 | 12 | #ifdef MOZILLA_CLIENT 13 | #include "nscore.h" // for mozalloc headers 14 | #endif 15 | 16 | // casing 17 | #define NOCAP 0 18 | #define INITCAP 1 19 | #define ALLCAP 2 20 | #define HUHCAP 3 21 | #define HUHINITCAP 4 22 | 23 | // default encoding and keystring 24 | #define SPELL_ENCODING "ISO8859-1" 25 | #define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm" 26 | 27 | // default morphological fields 28 | #define MORPH_STEM "st:" 29 | #define MORPH_ALLOMORPH "al:" 30 | #define MORPH_POS "po:" 31 | #define MORPH_DERI_PFX "dp:" 32 | #define MORPH_INFL_PFX "ip:" 33 | #define MORPH_TERM_PFX "tp:" 34 | #define MORPH_DERI_SFX "ds:" 35 | #define MORPH_INFL_SFX "is:" 36 | #define MORPH_TERM_SFX "ts:" 37 | #define MORPH_SURF_PFX "sp:" 38 | #define MORPH_FREQ "fr:" 39 | #define MORPH_PHON "ph:" 40 | #define MORPH_HYPH "hy:" 41 | #define MORPH_PART "pa:" 42 | #define MORPH_FLAG "fl:" 43 | #define MORPH_HENTRY "_H:" 44 | #define MORPH_TAG_LEN strlen(MORPH_STEM) 45 | 46 | #define MSEP_FLD ' ' 47 | #define MSEP_REC '\n' 48 | #define MSEP_ALT '\v' 49 | 50 | // default flags 51 | #define DEFAULTFLAGS 65510 52 | #define FORBIDDENWORD 65510 53 | #define ONLYUPCASEFLAG 65511 54 | 55 | // convert UTF-16 characters to UTF-8 56 | LIBHUNSPELL_DLL_EXPORTED char * u16_u8(char * dest, int size, const w_char * src, int srclen); 57 | 58 | // convert UTF-8 characters to UTF-16 59 | LIBHUNSPELL_DLL_EXPORTED int u8_u16(w_char * dest, int size, const char * src); 60 | 61 | // sort 2-byte vector 62 | LIBHUNSPELL_DLL_EXPORTED void flag_qsort(unsigned short flags[], int begin, int end); 63 | 64 | // binary search in 2-byte vector 65 | LIBHUNSPELL_DLL_EXPORTED int flag_bsearch(unsigned short flags[], unsigned short flag, int right); 66 | 67 | // remove end of line char(s) 68 | LIBHUNSPELL_DLL_EXPORTED void mychomp(char * s); 69 | 70 | // duplicate string 71 | LIBHUNSPELL_DLL_EXPORTED char * mystrdup(const char * s); 72 | 73 | // strcat for limited length destination string 74 | LIBHUNSPELL_DLL_EXPORTED char * mystrcat(char * dest, const char * st, int max); 75 | 76 | // duplicate reverse of string 77 | LIBHUNSPELL_DLL_EXPORTED char * myrevstrdup(const char * s); 78 | 79 | // parse into tokens with char delimiter 80 | LIBHUNSPELL_DLL_EXPORTED char * mystrsep(char ** sptr, const char delim); 81 | // parse into tokens with char delimiter 82 | LIBHUNSPELL_DLL_EXPORTED char * mystrsep2(char ** sptr, const char delim); 83 | 84 | // parse into tokens with char delimiter 85 | LIBHUNSPELL_DLL_EXPORTED char * mystrrep(char *, const char *, const char *); 86 | 87 | // append s to ends of every lines in text 88 | LIBHUNSPELL_DLL_EXPORTED void strlinecat(char * lines, const char * s); 89 | 90 | // tokenize into lines with new line 91 | LIBHUNSPELL_DLL_EXPORTED int line_tok(const char * text, char *** lines, char breakchar); 92 | 93 | // tokenize into lines with new line and uniq in place 94 | LIBHUNSPELL_DLL_EXPORTED char * line_uniq(char * text, char breakchar); 95 | LIBHUNSPELL_DLL_EXPORTED char * line_uniq_app(char ** text, char breakchar); 96 | 97 | // change oldchar to newchar in place 98 | LIBHUNSPELL_DLL_EXPORTED char * tr(char * text, char oldc, char newc); 99 | 100 | // reverse word 101 | LIBHUNSPELL_DLL_EXPORTED int reverseword(char *); 102 | 103 | // reverse word 104 | LIBHUNSPELL_DLL_EXPORTED int reverseword_utf(char *); 105 | 106 | // remove duplicates 107 | LIBHUNSPELL_DLL_EXPORTED int uniqlist(char ** list, int n); 108 | 109 | // free character array list 110 | LIBHUNSPELL_DLL_EXPORTED void freelist(char *** list, int n); 111 | 112 | // character encoding information 113 | struct cs_info { 114 | unsigned char ccase; 115 | unsigned char clower; 116 | unsigned char cupper; 117 | }; 118 | 119 | LIBHUNSPELL_DLL_EXPORTED int initialize_utf_tbl(); 120 | LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl(); 121 | LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c, int langnum); 122 | LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c, int langnum); 123 | LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c); 124 | 125 | LIBHUNSPELL_DLL_EXPORTED struct cs_info * get_current_cs(const char * es); 126 | 127 | // get language identifiers of language codes 128 | LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const char * lang); 129 | 130 | // get characters of the given 8bit encoding with lower- and uppercase forms 131 | LIBHUNSPELL_DLL_EXPORTED char * get_casechars(const char * enc); 132 | 133 | // convert null terminated string to all caps using encoding 134 | LIBHUNSPELL_DLL_EXPORTED void enmkallcap(char * d, const char * p, const char * encoding); 135 | 136 | // convert null terminated string to all little using encoding 137 | LIBHUNSPELL_DLL_EXPORTED void enmkallsmall(char * d, const char * p, const char * encoding); 138 | 139 | // convert null terminated string to have initial capital using encoding 140 | LIBHUNSPELL_DLL_EXPORTED void enmkinitcap(char * d, const char * p, const char * encoding); 141 | 142 | // convert null terminated string to all caps 143 | LIBHUNSPELL_DLL_EXPORTED void mkallcap(char * p, const struct cs_info * csconv); 144 | 145 | // convert null terminated string to all little 146 | LIBHUNSPELL_DLL_EXPORTED void mkallsmall(char * p, const struct cs_info * csconv); 147 | 148 | // convert null terminated string to have initial capital 149 | LIBHUNSPELL_DLL_EXPORTED void mkinitcap(char * p, const struct cs_info * csconv); 150 | 151 | // convert first nc characters of UTF-8 string to little 152 | LIBHUNSPELL_DLL_EXPORTED void mkallsmall_utf(w_char * u, int nc, int langnum); 153 | 154 | // convert first nc characters of UTF-8 string to capital 155 | LIBHUNSPELL_DLL_EXPORTED void mkallcap_utf(w_char * u, int nc, int langnum); 156 | 157 | // get type of capitalization 158 | LIBHUNSPELL_DLL_EXPORTED int get_captype(char * q, int nl, cs_info *); 159 | 160 | // get type of capitalization (UTF-8) 161 | LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(w_char * q, int nl, int langnum); 162 | 163 | // strip all ignored characters in the string 164 | LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int ignored_len); 165 | 166 | // strip all ignored characters in the string 167 | LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars(char * word, char * ignored_chars); 168 | 169 | LIBHUNSPELL_DLL_EXPORTED int parse_string(char * line, char ** out, int ln); 170 | 171 | LIBHUNSPELL_DLL_EXPORTED int parse_array(char * line, char ** out, unsigned short ** out_utf16, 172 | int * out_utf16_len, int utf8, int ln); 173 | 174 | LIBHUNSPELL_DLL_EXPORTED int fieldlen(const char * r); 175 | LIBHUNSPELL_DLL_EXPORTED char * copy_field(char * dest, const char * morph, const char * var); 176 | 177 | LIBHUNSPELL_DLL_EXPORTED int morphcmp(const char * s, const char * t); 178 | 179 | LIBHUNSPELL_DLL_EXPORTED int get_sfxcount(const char * morph); 180 | 181 | // conversion function for protected memory 182 | LIBHUNSPELL_DLL_EXPORTED void store_pointer(char * dest, char * source); 183 | 184 | // conversion function for protected memory 185 | LIBHUNSPELL_DLL_EXPORTED char * get_stored_pointer(const char * s); 186 | 187 | // hash entry macros 188 | LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_DATA(struct hentry *h) 189 | { 190 | char *ret; 191 | if (!h->var) 192 | ret = NULL; 193 | else if (h->var & H_OPT_ALIASM) 194 | ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1); 195 | else 196 | ret = HENTRY_WORD(h) + h->blen + 1; 197 | return ret; 198 | } 199 | 200 | // NULL-free version for warning-free OOo build 201 | LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA2(const struct hentry *h) 202 | { 203 | const char *ret; 204 | if (!h->var) 205 | ret = ""; 206 | else if (h->var & H_OPT_ALIASM) 207 | ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1); 208 | else 209 | ret = HENTRY_WORD(h) + h->blen + 1; 210 | return ret; 211 | } 212 | 213 | LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry *h, const char *p) 214 | { 215 | return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL); 216 | } 217 | 218 | #define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h)) 219 | 220 | #endif 221 | -------------------------------------------------------------------------------- /vendor/hunspell/src/hunspell/dictmgr.cxx: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "dictmgr.hxx" 8 | 9 | DictMgr::DictMgr(const char * dictpath, const char * etype) : numdict(0) 10 | { 11 | // load list of etype entries 12 | pdentry = (dictentry *)malloc(MAXDICTIONARIES*sizeof(struct dictentry)); 13 | if (pdentry) { 14 | if (parse_file(dictpath, etype)) { 15 | numdict = 0; 16 | // no dictionary.lst found is okay 17 | } 18 | } 19 | } 20 | 21 | 22 | DictMgr::~DictMgr() 23 | { 24 | dictentry * pdict = NULL; 25 | if (pdentry) { 26 | pdict = pdentry; 27 | for (int i=0;ilang) { 29 | free(pdict->lang); 30 | pdict->lang = NULL; 31 | } 32 | if (pdict->region) { 33 | free(pdict->region); 34 | pdict->region=NULL; 35 | } 36 | if (pdict->filename) { 37 | free(pdict->filename); 38 | pdict->filename = NULL; 39 | } 40 | pdict++; 41 | } 42 | free(pdentry); 43 | pdentry = NULL; 44 | pdict = NULL; 45 | } 46 | numdict = 0; 47 | } 48 | 49 | 50 | // read in list of etype entries and build up structure to describe them 51 | int DictMgr::parse_file(const char * dictpath, const char * etype) 52 | { 53 | 54 | int i; 55 | char line[MAXDICTENTRYLEN+1]; 56 | dictentry * pdict = pdentry; 57 | 58 | // open the dictionary list file 59 | FILE * dictlst; 60 | dictlst = fopen(dictpath,"r"); 61 | if (!dictlst) { 62 | return 1; 63 | } 64 | 65 | // step one is to parse the dictionary list building up the 66 | // descriptive structures 67 | 68 | // read in each line ignoring any that dont start with etype 69 | while (fgets(line,MAXDICTENTRYLEN,dictlst)) { 70 | mychomp(line); 71 | 72 | /* parse in a dictionary entry */ 73 | if (strncmp(line,etype,4) == 0) { 74 | if (numdict < MAXDICTIONARIES) { 75 | char * tp = line; 76 | char * piece; 77 | i = 0; 78 | while ((piece=mystrsep(&tp,' '))) { 79 | if (*piece != '\0') { 80 | switch(i) { 81 | case 0: break; 82 | case 1: pdict->lang = mystrdup(piece); break; 83 | case 2: if (strcmp (piece, "ANY") == 0) 84 | pdict->region = mystrdup(""); 85 | else 86 | pdict->region = mystrdup(piece); 87 | break; 88 | case 3: pdict->filename = mystrdup(piece); break; 89 | default: break; 90 | } 91 | i++; 92 | } 93 | free(piece); 94 | } 95 | if (i == 4) { 96 | numdict++; 97 | pdict++; 98 | } else { 99 | switch (i) { 100 | case 3: 101 | free(pdict->region); 102 | pdict->region=NULL; 103 | case 2: //deliberate fallthrough 104 | free(pdict->lang); 105 | pdict->lang=NULL; 106 | default: 107 | break; 108 | } 109 | fprintf(stderr,"dictionary list corruption in line \"%s\"\n",line); 110 | fflush(stderr); 111 | } 112 | } 113 | } 114 | } 115 | fclose(dictlst); 116 | return 0; 117 | } 118 | 119 | // return text encoding of dictionary 120 | int DictMgr::get_list(dictentry ** ppentry) 121 | { 122 | *ppentry = pdentry; 123 | return numdict; 124 | } 125 | 126 | 127 | 128 | // strip strings into token based on single char delimiter 129 | // acts like strsep() but only uses a delim char and not 130 | // a delim string 131 | 132 | char * DictMgr::mystrsep(char ** stringp, const char delim) 133 | { 134 | char * rv = NULL; 135 | char * mp = *stringp; 136 | size_t n = strlen(mp); 137 | if (n > 0) { 138 | char * dp = (char *)memchr(mp,(int)((unsigned char)delim),n); 139 | if (dp) { 140 | *stringp = dp+1; 141 | size_t nc = dp - mp; 142 | rv = (char *) malloc(nc+1); 143 | if (rv) { 144 | memcpy(rv,mp,nc); 145 | *(rv+nc) = '\0'; 146 | } 147 | } else { 148 | rv = (char *) malloc(n+1); 149 | if (rv) { 150 | memcpy(rv, mp, n); 151 | *(rv+n) = '\0'; 152 | *stringp = mp + n; 153 | } 154 | } 155 | } 156 | return rv; 157 | } 158 | 159 | 160 | // replaces strdup with ansi version 161 | char * DictMgr::mystrdup(const char * s) 162 | { 163 | char * d = NULL; 164 | if (s) { 165 | int sl = strlen(s)+1; 166 | d = (char *) malloc(sl); 167 | if (d) memcpy(d,s,sl); 168 | } 169 | return d; 170 | } 171 | 172 | 173 | // remove cross-platform text line end characters 174 | void DictMgr:: mychomp(char * s) 175 | { 176 | int k = strlen(s); 177 | if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0'; 178 | if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0'; 179 | } 180 | 181 | -------------------------------------------------------------------------------- /vendor/hunspell/src/hunspell/dictmgr.hxx: -------------------------------------------------------------------------------- 1 | #ifndef _DICTMGR_HXX_ 2 | #define _DICTMGR_HXX_ 3 | 4 | #include "hunvisapi.h" 5 | 6 | #define MAXDICTIONARIES 100 7 | #define MAXDICTENTRYLEN 1024 8 | 9 | struct dictentry { 10 | char * filename; 11 | char * lang; 12 | char * region; 13 | }; 14 | 15 | 16 | class LIBHUNSPELL_DLL_EXPORTED DictMgr 17 | { 18 | 19 | int numdict; 20 | dictentry * pdentry; 21 | 22 | public: 23 | 24 | DictMgr(const char * dictpath, const char * etype); 25 | ~DictMgr(); 26 | int get_list(dictentry** ppentry); 27 | 28 | private: 29 | int parse_file(const char * dictpath, const char * etype); 30 | char * mystrsep(char ** stringp, const char delim); 31 | char * mystrdup(const char * s); 32 | void mychomp(char * s); 33 | 34 | }; 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /vendor/hunspell/src/hunspell/filemgr.cxx: -------------------------------------------------------------------------------- 1 | #include "license.hunspell" 2 | #include "license.myspell" 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "filemgr.hxx" 9 | 10 | int FileMgr::fail(const char * err, const char * par) { 11 | fprintf(stderr, err, par); 12 | return -1; 13 | } 14 | 15 | FileMgr::FileMgr(const char * file, const char * key) { 16 | linenum = 0; 17 | hin = NULL; 18 | fin = fopen(file, "r"); 19 | if (!fin) { 20 | // check hzipped file 21 | char * st = (char *) malloc(strlen(file) + strlen(HZIP_EXTENSION) + 1); 22 | if (st) { 23 | strcpy(st, file); 24 | strcat(st, HZIP_EXTENSION); 25 | hin = new Hunzip(st, key); 26 | free(st); 27 | } 28 | } 29 | if (!fin && !hin) fail(MSG_OPEN, file); 30 | } 31 | 32 | FileMgr::~FileMgr() 33 | { 34 | if (fin) fclose(fin); 35 | if (hin) delete hin; 36 | } 37 | 38 | char * FileMgr::getline() { 39 | const char * l; 40 | linenum++; 41 | if (fin) return fgets(in, BUFSIZE - 1, fin); 42 | if (hin && (l = hin->getline())) return strcpy(in, l); 43 | linenum--; 44 | return NULL; 45 | } 46 | 47 | int FileMgr::getlinenum() { 48 | return linenum; 49 | } 50 | -------------------------------------------------------------------------------- /vendor/hunspell/src/hunspell/filemgr.hxx: -------------------------------------------------------------------------------- 1 | /* file manager class - read lines of files [filename] OR [filename.hz] */ 2 | #ifndef _FILEMGR_HXX_ 3 | #define _FILEMGR_HXX_ 4 | 5 | #include "hunvisapi.h" 6 | 7 | #include "hunzip.hxx" 8 | #include 9 | 10 | class LIBHUNSPELL_DLL_EXPORTED FileMgr 11 | { 12 | protected: 13 | FILE * fin; 14 | Hunzip * hin; 15 | char in[BUFSIZE + 50]; // input buffer 16 | int fail(const char * err, const char * par); 17 | int linenum; 18 | 19 | public: 20 | FileMgr(const char * filename, const char * key = NULL); 21 | ~FileMgr(); 22 | char * getline(); 23 | int getlinenum(); 24 | }; 25 | #endif 26 | -------------------------------------------------------------------------------- /vendor/hunspell/src/hunspell/hashmgr.hxx: -------------------------------------------------------------------------------- 1 | #ifndef _HASHMGR_HXX_ 2 | #define _HASHMGR_HXX_ 3 | 4 | #include "hunvisapi.h" 5 | 6 | #include 7 | 8 | #include "htypes.hxx" 9 | #include "filemgr.hxx" 10 | 11 | enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI }; 12 | 13 | class LIBHUNSPELL_DLL_EXPORTED HashMgr 14 | { 15 | int tablesize; 16 | struct hentry ** tableptr; 17 | int userword; 18 | flag flag_mode; 19 | int complexprefixes; 20 | int utf8; 21 | unsigned short forbiddenword; 22 | int langnum; 23 | char * enc; 24 | char * lang; 25 | struct cs_info * csconv; 26 | char * ignorechars; 27 | unsigned short * ignorechars_utf16; 28 | int ignorechars_utf16_len; 29 | int numaliasf; // flag vector `compression' with aliases 30 | unsigned short ** aliasf; 31 | unsigned short * aliasflen; 32 | int numaliasm; // morphological desciption `compression' with aliases 33 | char ** aliasm; 34 | 35 | 36 | public: 37 | HashMgr(const char * tpath, const char * apath, const char * key = NULL); 38 | ~HashMgr(); 39 | 40 | struct hentry * lookup(const char *) const; 41 | int hash(const char *) const; 42 | struct hentry * walk_hashtable(int & col, struct hentry * hp) const; 43 | 44 | int add(const char * word); 45 | int add_with_affix(const char * word, const char * pattern); 46 | int remove(const char * word); 47 | int decode_flags(unsigned short ** result, char * flags, FileMgr * af); 48 | unsigned short decode_flag(const char * flag); 49 | char * encode_flag(unsigned short flag); 50 | int is_aliasf(); 51 | int get_aliasf(int index, unsigned short ** fvec, FileMgr * af); 52 | int is_aliasm(); 53 | char * get_aliasm(int index); 54 | 55 | private: 56 | int get_clen_and_captype(const char * word, int wbl, int * captype); 57 | int load_tables(const char * tpath, const char * key); 58 | int add_word(const char * word, int wbl, int wcl, unsigned short * ap, 59 | int al, const char * desc, bool onlyupcase); 60 | int load_config(const char * affpath, const char * key); 61 | int parse_aliasf(char * line, FileMgr * af); 62 | int add_hidden_capitalized_word(char * word, int wbl, int wcl, 63 | unsigned short * flags, int al, char * dp, int captype); 64 | int parse_aliasm(char * line, FileMgr * af); 65 | int remove_forbidden_flag(const char * word); 66 | 67 | }; 68 | 69 | #endif 70 | -------------------------------------------------------------------------------- /vendor/hunspell/src/hunspell/htypes.hxx: -------------------------------------------------------------------------------- 1 | #ifndef _HTYPES_HXX_ 2 | #define _HTYPES_HXX_ 3 | 4 | #define ROTATE_LEN 5 5 | 6 | #define ROTATE(v,q) \ 7 | (v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q))-1)); 8 | 9 | // hentry options 10 | #define H_OPT (1 << 0) 11 | #define H_OPT_ALIASM (1 << 1) 12 | #define H_OPT_PHON (1 << 2) 13 | 14 | // see also csutil.hxx 15 | #define HENTRY_WORD(h) &(h->word[0]) 16 | 17 | // approx. number of user defined words 18 | #define USERWORD 1000 19 | 20 | struct hentry 21 | { 22 | unsigned char blen; // word length in bytes 23 | unsigned char clen; // word length in characters (different for UTF-8 enc.) 24 | short alen; // length of affix flag vector 25 | unsigned short * astr; // affix flag vector 26 | struct hentry * next; // next word with same hash code 27 | struct hentry * next_homonym; // next homonym word (with same hash code) 28 | char var; // variable fields (only for special pronounciation yet) 29 | char word[1]; // variable-length word (8-bit or UTF-8 encoding) 30 | }; 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /vendor/hunspell/src/hunspell/hunspell.dsp: -------------------------------------------------------------------------------- 1 | # Microsoft Developer Studio Project File - Name="hunspell" - Package Owner=<4> 2 | # Microsoft Developer Studio Generated Build File, Format Version 6.00 3 | # ** DO NOT EDIT ** 4 | 5 | # TARGTYPE "Win32 (x86) Static Library" 0x0104 6 | 7 | CFG=hunspell - Win32 Debug 8 | !MESSAGE This is not a valid makefile. To build this project using NMAKE, 9 | !MESSAGE use the Export Makefile command and run 10 | !MESSAGE 11 | !MESSAGE NMAKE /f "hunspell.mak". 12 | !MESSAGE 13 | !MESSAGE You can specify a configuration when running NMAKE 14 | !MESSAGE by defining the macro CFG on the command line. For example: 15 | !MESSAGE 16 | !MESSAGE NMAKE /f "hunspell.mak" CFG="hunspell - Win32 Debug" 17 | !MESSAGE 18 | !MESSAGE Possible choices for configuration are: 19 | !MESSAGE 20 | !MESSAGE "hunspell - Win32 Release" (based on "Win32 (x86) Static Library") 21 | !MESSAGE "hunspell - Win32 Debug" (based on "Win32 (x86) Static Library") 22 | !MESSAGE 23 | 24 | # Begin Project 25 | # PROP AllowPerConfigDependencies 0 26 | # PROP Scc_ProjName "" 27 | # PROP Scc_LocalPath "" 28 | CPP=cl.exe 29 | RSC=rc.exe 30 | 31 | !IF "$(CFG)" == "hunspell - Win32 Release" 32 | 33 | # PROP BASE Use_MFC 0 34 | # PROP BASE Use_Debug_Libraries 0 35 | # PROP BASE Output_Dir "Release" 36 | # PROP BASE Intermediate_Dir "Release" 37 | # PROP BASE Target_Dir "" 38 | # PROP Use_MFC 0 39 | # PROP Use_Debug_Libraries 0 40 | # PROP Output_Dir "Release" 41 | # PROP Intermediate_Dir "Release" 42 | # PROP Target_Dir "" 43 | # ADD BASE CPP /nologo /W3 /GX /O2 /D "W32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /YX /FD /c 44 | # ADD CPP /nologo /W3 /GX /O2 /D "W32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /YX /FD /c 45 | # ADD BASE RSC /l 0x40e /d "NDEBUG" 46 | # ADD RSC /l 0x40e /d "NDEBUG" 47 | BSC32=bscmake.exe 48 | # ADD BASE BSC32 /nologo 49 | # ADD BSC32 /nologo 50 | LIB32=link.exe -lib 51 | # ADD BASE LIB32 /nologo 52 | # ADD LIB32 /nologo 53 | 54 | !ELSEIF "$(CFG)" == "hunspell - Win32 Debug" 55 | 56 | # PROP BASE Use_MFC 0 57 | # PROP BASE Use_Debug_Libraries 1 58 | # PROP BASE Output_Dir "Debug" 59 | # PROP BASE Intermediate_Dir "Debug" 60 | # PROP BASE Target_Dir "" 61 | # PROP Use_MFC 0 62 | # PROP Use_Debug_Libraries 1 63 | # PROP Output_Dir "Debug" 64 | # PROP Intermediate_Dir "Debug" 65 | # PROP Target_Dir "" 66 | # ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "W32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /YX /FD /GZ /c 67 | # ADD CPP /nologo /W3 /Gm /GX /ZI /Od /D "W32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /YX /FD /GZ /c 68 | # ADD BASE RSC /l 0x40e /d "_DEBUG" 69 | # ADD RSC /l 0x40e /d "_DEBUG" 70 | BSC32=bscmake.exe 71 | # ADD BASE BSC32 /nologo 72 | # ADD BSC32 /nologo 73 | LIB32=link.exe -lib 74 | # ADD BASE LIB32 /nologo 75 | # ADD LIB32 /nologo 76 | 77 | !ENDIF 78 | 79 | # Begin Target 80 | 81 | # Name "hunspell - Win32 Release" 82 | # Name "hunspell - Win32 Debug" 83 | # Begin Group "Source Files" 84 | 85 | # PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat" 86 | # Begin Source File 87 | 88 | SOURCE=.\affentry.cxx 89 | # End Source File 90 | # Begin Source File 91 | 92 | SOURCE=.\affixmgr.cxx 93 | # End Source File 94 | # Begin Source File 95 | 96 | SOURCE=.\csutil.cxx 97 | # End Source File 98 | # Begin Source File 99 | 100 | SOURCE=.\dictmgr.cxx 101 | # End Source File 102 | # Begin Source File 103 | 104 | SOURCE=.\hashmgr.cxx 105 | # End Source File 106 | # Begin Source File 107 | 108 | SOURCE=.\hunspell.cxx 109 | # End Source File 110 | # Begin Source File 111 | 112 | SOURCE=.\suggestmgr.cxx 113 | # End Source File 114 | # End Group 115 | # Begin Group "Header Files" 116 | 117 | # PROP Default_Filter "h;hpp;hxx;hm;inl" 118 | # Begin Source File 119 | 120 | SOURCE=.\affentry.hxx 121 | # End Source File 122 | # Begin Source File 123 | 124 | SOURCE=.\affixmgr.hxx 125 | # End Source File 126 | # Begin Source File 127 | 128 | SOURCE=.\atypes.hxx 129 | # End Source File 130 | # Begin Source File 131 | 132 | SOURCE=.\baseaffix.hxx 133 | # End Source File 134 | # Begin Source File 135 | 136 | SOURCE=.\csutil.hxx 137 | # End Source File 138 | # Begin Source File 139 | 140 | SOURCE=.\dictmgr.hxx 141 | # End Source File 142 | # Begin Source File 143 | 144 | SOURCE=.\hashmgr.hxx 145 | # End Source File 146 | # Begin Source File 147 | 148 | SOURCE=.\htypes.hxx 149 | # End Source File 150 | # Begin Source File 151 | 152 | SOURCE=.\langnum.hxx 153 | # End Source File 154 | # Begin Source File 155 | 156 | SOURCE=.\hunspell.hxx 157 | # End Source File 158 | # Begin Source File 159 | 160 | SOURCE=.\suggestmgr.hxx 161 | # End Source File 162 | # End Group 163 | # End Target 164 | # End Project 165 | -------------------------------------------------------------------------------- /vendor/hunspell/src/hunspell/hunspell.h: -------------------------------------------------------------------------------- 1 | #ifndef _MYSPELLMGR_H_ 2 | #define _MYSPELLMGR_H_ 3 | 4 | #include "hunvisapi.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | typedef struct Hunhandle Hunhandle; 11 | 12 | LIBHUNSPELL_DLL_EXPORTED Hunhandle *Hunspell_create(const char * affpath, const char * dpath); 13 | 14 | LIBHUNSPELL_DLL_EXPORTED Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath, 15 | const char * key); 16 | 17 | LIBHUNSPELL_DLL_EXPORTED void Hunspell_destroy(Hunhandle *pHunspell); 18 | 19 | /* spell(word) - spellcheck word 20 | * output: 0 = bad word, not 0 = good word 21 | */ 22 | LIBHUNSPELL_DLL_EXPORTED int Hunspell_spell(Hunhandle *pHunspell, const char *); 23 | 24 | LIBHUNSPELL_DLL_EXPORTED char *Hunspell_get_dic_encoding(Hunhandle *pHunspell); 25 | 26 | /* suggest(suggestions, word) - search suggestions 27 | * input: pointer to an array of strings pointer and the (bad) word 28 | * array of strings pointer (here *slst) may not be initialized 29 | * output: number of suggestions in string array, and suggestions in 30 | * a newly allocated array of strings (*slts will be NULL when number 31 | * of suggestion equals 0.) 32 | */ 33 | LIBHUNSPELL_DLL_EXPORTED int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word); 34 | 35 | /* morphological functions */ 36 | 37 | /* analyze(result, word) - morphological analysis of the word */ 38 | 39 | LIBHUNSPELL_DLL_EXPORTED int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word); 40 | 41 | /* stem(result, word) - stemmer function */ 42 | 43 | LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word); 44 | 45 | /* stem(result, analysis, n) - get stems from a morph. analysis 46 | * example: 47 | * char ** result, result2; 48 | * int n1 = Hunspell_analyze(result, "words"); 49 | * int n2 = Hunspell_stem2(result2, result, n1); 50 | */ 51 | 52 | LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n); 53 | 54 | /* generate(result, word, word2) - morphological generation by example(s) */ 55 | 56 | LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word, 57 | const char * word2); 58 | 59 | /* generate(result, word, desc, n) - generation by morph. description(s) 60 | * example: 61 | * char ** result; 62 | * char * affix = "is:plural"; // description depends from dictionaries, too 63 | * int n = Hunspell_generate2(result, "word", &affix, 1); 64 | * for (int i = 0; i < n; i++) printf("%s\n", result[i]); 65 | */ 66 | 67 | LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word, 68 | char** desc, int n); 69 | 70 | /* functions for run-time modification of the dictionary */ 71 | 72 | /* add word to the run-time dictionary */ 73 | 74 | LIBHUNSPELL_DLL_EXPORTED int Hunspell_add(Hunhandle *pHunspell, const char * word); 75 | 76 | /* add word to the run-time dictionary with affix flags of 77 | * the example (a dictionary word): Hunspell will recognize 78 | * affixed forms of the new word, too. 79 | */ 80 | 81 | LIBHUNSPELL_DLL_EXPORTED int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word, const char * example); 82 | 83 | /* remove word from the run-time dictionary */ 84 | 85 | LIBHUNSPELL_DLL_EXPORTED int Hunspell_remove(Hunhandle *pHunspell, const char * word); 86 | 87 | /* free suggestion lists */ 88 | 89 | LIBHUNSPELL_DLL_EXPORTED void Hunspell_free_list(Hunhandle *pHunspell, char *** slst, int n); 90 | 91 | #ifdef __cplusplus 92 | } 93 | #endif 94 | 95 | #endif 96 | -------------------------------------------------------------------------------- /vendor/hunspell/src/hunspell/hunspell.hxx: -------------------------------------------------------------------------------- 1 | #include "hunvisapi.h" 2 | 3 | #include "hashmgr.hxx" 4 | #include "affixmgr.hxx" 5 | #include "suggestmgr.hxx" 6 | #include "langnum.hxx" 7 | 8 | #define SPELL_XML "" 9 | 10 | #define MAXDIC 20 11 | #define MAXSUGGESTION 15 12 | #define MAXSHARPS 5 13 | 14 | #define HUNSPELL_OK (1 << 0) 15 | #define HUNSPELL_OK_WARN (1 << 1) 16 | 17 | #ifndef _MYSPELLMGR_HXX_ 18 | #define _MYSPELLMGR_HXX_ 19 | 20 | class LIBHUNSPELL_DLL_EXPORTED Hunspell 21 | { 22 | AffixMgr* pAMgr; 23 | HashMgr* pHMgr[MAXDIC]; 24 | int maxdic; 25 | SuggestMgr* pSMgr; 26 | char * affixpath; 27 | char * encoding; 28 | struct cs_info * csconv; 29 | int langnum; 30 | int utf8; 31 | int complexprefixes; 32 | char** wordbreak; 33 | 34 | public: 35 | 36 | /* Hunspell(aff, dic) - constructor of Hunspell class 37 | * input: path of affix file and dictionary file 38 | */ 39 | 40 | Hunspell(const char * affpath, const char * dpath, const char * key = NULL); 41 | ~Hunspell(); 42 | 43 | /* load extra dictionaries (only dic files) */ 44 | int add_dic(const char * dpath, const char * key = NULL); 45 | 46 | /* spell(word) - spellcheck word 47 | * output: 0 = bad word, not 0 = good word 48 | * 49 | * plus output: 50 | * info: information bit array, fields: 51 | * SPELL_COMPOUND = a compound word 52 | * SPELL_FORBIDDEN = an explicit forbidden word 53 | * root: root (stem), when input is a word with affix(es) 54 | */ 55 | 56 | int spell(const char * word, int * info = NULL, char ** root = NULL); 57 | 58 | /* suggest(suggestions, word) - search suggestions 59 | * input: pointer to an array of strings pointer and the (bad) word 60 | * array of strings pointer (here *slst) may not be initialized 61 | * output: number of suggestions in string array, and suggestions in 62 | * a newly allocated array of strings (*slts will be NULL when number 63 | * of suggestion equals 0.) 64 | */ 65 | 66 | int suggest(char*** slst, const char * word); 67 | 68 | /* deallocate suggestion lists */ 69 | 70 | void free_list(char *** slst, int n); 71 | 72 | char * get_dic_encoding(); 73 | 74 | /* morphological functions */ 75 | 76 | /* analyze(result, word) - morphological analysis of the word */ 77 | 78 | int analyze(char*** slst, const char * word); 79 | 80 | /* stem(result, word) - stemmer function */ 81 | 82 | int stem(char*** slst, const char * word); 83 | 84 | /* stem(result, analysis, n) - get stems from a morph. analysis 85 | * example: 86 | * char ** result, result2; 87 | * int n1 = analyze(&result, "words"); 88 | * int n2 = stem(&result2, result, n1); 89 | */ 90 | 91 | int stem(char*** slst, char ** morph, int n); 92 | 93 | /* generate(result, word, word2) - morphological generation by example(s) */ 94 | 95 | int generate(char*** slst, const char * word, const char * word2); 96 | 97 | /* generate(result, word, desc, n) - generation by morph. description(s) 98 | * example: 99 | * char ** result; 100 | * char * affix = "is:plural"; // description depends from dictionaries, too 101 | * int n = generate(&result, "word", &affix, 1); 102 | * for (int i = 0; i < n; i++) printf("%s\n", result[i]); 103 | */ 104 | 105 | int generate(char*** slst, const char * word, char ** desc, int n); 106 | 107 | /* functions for run-time modification of the dictionary */ 108 | 109 | /* add word to the run-time dictionary */ 110 | 111 | int add(const char * word); 112 | 113 | /* add word to the run-time dictionary with affix flags of 114 | * the example (a dictionary word): Hunspell will recognize 115 | * affixed forms of the new word, too. 116 | */ 117 | 118 | int add_with_affix(const char * word, const char * example); 119 | 120 | /* remove word from the run-time dictionary */ 121 | 122 | int remove(const char * word); 123 | 124 | /* other */ 125 | 126 | /* get extra word characters definied in affix file for tokenization */ 127 | const char * get_wordchars(); 128 | unsigned short * get_wordchars_utf16(int * len); 129 | 130 | struct cs_info * get_csconv(); 131 | const char * get_version(); 132 | 133 | int get_langnum() const; 134 | 135 | /* experimental and deprecated functions */ 136 | 137 | #ifdef HUNSPELL_EXPERIMENTAL 138 | /* suffix is an affix flag string, similarly in dictionary files */ 139 | int put_word_suffix(const char * word, const char * suffix); 140 | char * morph_with_correction(const char * word); 141 | 142 | /* spec. suggestions */ 143 | int suggest_auto(char*** slst, const char * word); 144 | int suggest_pos_stems(char*** slst, const char * word); 145 | #endif 146 | 147 | private: 148 | int cleanword(char *, const char *, int * pcaptype, int * pabbrev); 149 | int cleanword2(char *, const char *, w_char *, int * w_len, int * pcaptype, int * pabbrev); 150 | void mkinitcap(char *); 151 | int mkinitcap2(char * p, w_char * u, int nc); 152 | int mkinitsmall2(char * p, w_char * u, int nc); 153 | void mkallcap(char *); 154 | int mkallcap2(char * p, w_char * u, int nc); 155 | void mkallsmall(char *); 156 | int mkallsmall2(char * p, w_char * u, int nc); 157 | struct hentry * checkword(const char *, int * info, char **root); 158 | char * sharps_u8_l1(char * dest, char * source); 159 | hentry * spellsharps(char * base, char *, int, int, char * tmp, int * info, char **root); 160 | int is_keepcase(const hentry * rv); 161 | int insert_sug(char ***slst, char * word, int ns); 162 | void cat_result(char * result, char * st); 163 | char * stem_description(const char * desc); 164 | int spellml(char*** slst, const char * word); 165 | int get_xml_par(char * dest, const char * par, int maxl); 166 | const char * get_xml_pos(const char * s, const char * attr); 167 | int get_xml_list(char ***slst, char * list, const char * tag); 168 | int check_xml_par(const char * q, const char * attr, const char * value); 169 | 170 | }; 171 | 172 | #endif 173 | -------------------------------------------------------------------------------- /vendor/hunspell/src/hunspell/hunvisapi.h: -------------------------------------------------------------------------------- 1 | #ifndef _HUNSPELL_VISIBILITY_H_ 2 | #define _HUNSPELL_VISIBILITY_H_ 3 | 4 | #if defined(HUNSPELL_STATIC) 5 | # define LIBHUNSPELL_DLL_EXPORTED 6 | #elif defined(_MSC_VER) 7 | # if defined(BUILDING_LIBHUNSPELL) 8 | # define LIBHUNSPELL_DLL_EXPORTED __declspec(dllexport) 9 | # else 10 | # define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport) 11 | # endif 12 | #elif BUILDING_LIBHUNSPELL && 1 13 | # define LIBHUNSPELL_DLL_EXPORTED __attribute__((__visibility__("default"))) 14 | #else 15 | # define LIBHUNSPELL_DLL_EXPORTED 16 | #endif 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /vendor/hunspell/src/hunspell/hunvisapi.h.in: -------------------------------------------------------------------------------- 1 | #ifndef _HUNSPELL_VISIBILITY_H_ 2 | #define _HUNSPELL_VISIBILITY_H_ 3 | 4 | #if defined(HUNSPELL_STATIC) 5 | # define LIBHUNSPELL_DLL_EXPORTED 6 | #elif defined(_MSC_VER) 7 | # if defined(BUILDING_LIBHUNSPELL) 8 | # define LIBHUNSPELL_DLL_EXPORTED __declspec(dllexport) 9 | # else 10 | # define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport) 11 | # endif 12 | #elif BUILDING_LIBHUNSPELL && @HAVE_VISIBILITY@ 13 | # define LIBHUNSPELL_DLL_EXPORTED __attribute__((__visibility__("default"))) 14 | #else 15 | # define LIBHUNSPELL_DLL_EXPORTED 16 | #endif 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /vendor/hunspell/src/hunspell/hunzip.cxx: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "hunzip.hxx" 6 | 7 | #define CODELEN 65536 8 | #define BASEBITREC 5000 9 | 10 | #define UNCOMPRESSED '\002' 11 | #define MAGIC "hz0" 12 | #define MAGIC_ENCRYPT "hz1" 13 | #define MAGICLEN (sizeof(MAGIC) - 1) 14 | 15 | int Hunzip::fail(const char * err, const char * par) { 16 | fprintf(stderr, err, par); 17 | return -1; 18 | } 19 | 20 | Hunzip::Hunzip(const char * file, const char * key) { 21 | bufsiz = 0; 22 | lastbit = 0; 23 | inc = 0; 24 | outc = 0; 25 | dec = NULL; 26 | fin = NULL; 27 | filename = (char *) malloc(strlen(file) + 1); 28 | if (filename) strcpy(filename, file); 29 | if (getcode(key) == -1) bufsiz = -1; 30 | else bufsiz = getbuf(); 31 | } 32 | 33 | int Hunzip::getcode(const char * key) { 34 | unsigned char c[2]; 35 | int i, j, n, p; 36 | int allocatedbit = BASEBITREC; 37 | const char * enc = key; 38 | 39 | if (!filename) return -1; 40 | 41 | fin = fopen(filename, "rb"); 42 | if (!fin) return -1; 43 | 44 | // read magic number 45 | if ((fread(in, 1, 3, fin) < MAGICLEN) 46 | || !(strncmp(MAGIC, in, MAGICLEN) == 0 || 47 | strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0)) { 48 | return fail(MSG_FORMAT, filename); 49 | } 50 | 51 | // check encryption 52 | if (strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0) { 53 | unsigned char cs; 54 | if (!key) return fail(MSG_KEY, filename); 55 | if (fread(&c, 1, 1, fin) < 1) return fail(MSG_FORMAT, filename); 56 | for (cs = 0; *enc; enc++) cs ^= *enc; 57 | if (cs != c[0]) return fail(MSG_KEY, filename); 58 | enc = key; 59 | } else key = NULL; 60 | 61 | // read record count 62 | if (fread(&c, 1, 2, fin) < 2) return fail(MSG_FORMAT, filename); 63 | 64 | if (key) { 65 | c[0] ^= *enc; 66 | if (*(++enc) == '\0') enc = key; 67 | c[1] ^= *enc; 68 | } 69 | 70 | n = ((int) c[0] << 8) + c[1]; 71 | dec = (struct bit *) malloc(BASEBITREC * sizeof(struct bit)); 72 | if (!dec) return fail(MSG_MEMORY, filename); 73 | dec[0].v[0] = 0; 74 | dec[0].v[1] = 0; 75 | 76 | // read codes 77 | for (i = 0; i < n; i++) { 78 | unsigned char l; 79 | if (fread(c, 1, 2, fin) < 2) return fail(MSG_FORMAT, filename); 80 | if (key) { 81 | if (*(++enc) == '\0') enc = key; 82 | c[0] ^= *enc; 83 | if (*(++enc) == '\0') enc = key; 84 | c[1] ^= *enc; 85 | } 86 | if (fread(&l, 1, 1, fin) < 1) return fail(MSG_FORMAT, filename); 87 | if (key) { 88 | if (*(++enc) == '\0') enc = key; 89 | l ^= *enc; 90 | } 91 | if (fread(in, 1, l/8+1, fin) < (size_t) l/8+1) return fail(MSG_FORMAT, filename); 92 | if (key) for (j = 0; j <= l/8; j++) { 93 | if (*(++enc) == '\0') enc = key; 94 | in[j] ^= *enc; 95 | } 96 | p = 0; 97 | for (j = 0; j < l; j++) { 98 | int b = (in[j/8] & (1 << (7 - (j % 8)))) ? 1 : 0; 99 | int oldp = p; 100 | p = dec[p].v[b]; 101 | if (p == 0) { 102 | lastbit++; 103 | if (lastbit == allocatedbit) { 104 | allocatedbit += BASEBITREC; 105 | dec = (struct bit *) realloc(dec, allocatedbit * sizeof(struct bit)); 106 | } 107 | dec[lastbit].v[0] = 0; 108 | dec[lastbit].v[1] = 0; 109 | dec[oldp].v[b] = lastbit; 110 | p = lastbit; 111 | } 112 | } 113 | dec[p].c[0] = c[0]; 114 | dec[p].c[1] = c[1]; 115 | } 116 | return 0; 117 | } 118 | 119 | Hunzip::~Hunzip() 120 | { 121 | if (dec) free(dec); 122 | if (fin) fclose(fin); 123 | if (filename) free(filename); 124 | } 125 | 126 | int Hunzip::getbuf() { 127 | int p = 0; 128 | int o = 0; 129 | do { 130 | if (inc == 0) inbits = fread(in, 1, BUFSIZE, fin) * 8; 131 | for (; inc < inbits; inc++) { 132 | int b = (in[inc / 8] & (1 << (7 - (inc % 8)))) ? 1 : 0; 133 | int oldp = p; 134 | p = dec[p].v[b]; 135 | if (p == 0) { 136 | if (oldp == lastbit) { 137 | fclose(fin); 138 | fin = NULL; 139 | // add last odd byte 140 | if (dec[lastbit].c[0]) out[o++] = dec[lastbit].c[1]; 141 | return o; 142 | } 143 | out[o++] = dec[oldp].c[0]; 144 | out[o++] = dec[oldp].c[1]; 145 | if (o == BUFSIZE) return o; 146 | p = dec[p].v[b]; 147 | } 148 | } 149 | inc = 0; 150 | } while (inbits == BUFSIZE * 8); 151 | return fail(MSG_FORMAT, filename); 152 | } 153 | 154 | const char * Hunzip::getline() { 155 | char linebuf[BUFSIZE]; 156 | int l = 0, eol = 0, left = 0, right = 0; 157 | if (bufsiz == -1) return NULL; 158 | while (l < bufsiz && !eol) { 159 | linebuf[l++] = out[outc]; 160 | switch (out[outc]) { 161 | case '\t': break; 162 | case 31: { // escape 163 | if (++outc == bufsiz) { 164 | bufsiz = getbuf(); 165 | outc = 0; 166 | } 167 | linebuf[l - 1] = out[outc]; 168 | break; 169 | } 170 | case ' ': break; 171 | default: if (((unsigned char) out[outc]) < 47) { 172 | if (out[outc] > 32) { 173 | right = out[outc] - 31; 174 | if (++outc == bufsiz) { 175 | bufsiz = getbuf(); 176 | outc = 0; 177 | } 178 | } 179 | if (out[outc] == 30) left = 9; else left = out[outc]; 180 | linebuf[l-1] = '\n'; 181 | eol = 1; 182 | } 183 | } 184 | if (++outc == bufsiz) { 185 | outc = 0; 186 | bufsiz = fin ? getbuf(): -1; 187 | } 188 | } 189 | if (right) strcpy(linebuf + l - 1, line + strlen(line) - right - 1); 190 | else linebuf[l] = '\0'; 191 | strcpy(line + left, linebuf); 192 | return line; 193 | } 194 | -------------------------------------------------------------------------------- /vendor/hunspell/src/hunspell/hunzip.hxx: -------------------------------------------------------------------------------- 1 | /* hunzip: file decompression for sorted dictionaries with optional encryption, 2 | * algorithm: prefix-suffix encoding and 16-bit Huffman encoding */ 3 | 4 | #ifndef _HUNZIP_HXX_ 5 | #define _HUNZIP_HXX_ 6 | 7 | #include "hunvisapi.h" 8 | 9 | #include 10 | 11 | #define BUFSIZE 65536 12 | #define HZIP_EXTENSION ".hz" 13 | 14 | #define MSG_OPEN "error: %s: cannot open\n" 15 | #define MSG_FORMAT "error: %s: not in hzip format\n" 16 | #define MSG_MEMORY "error: %s: missing memory\n" 17 | #define MSG_KEY "error: %s: missing or bad password\n" 18 | 19 | struct bit { 20 | unsigned char c[2]; 21 | int v[2]; 22 | }; 23 | 24 | class LIBHUNSPELL_DLL_EXPORTED Hunzip 25 | { 26 | 27 | protected: 28 | char * filename; 29 | FILE * fin; 30 | int bufsiz, lastbit, inc, inbits, outc; 31 | struct bit * dec; // code table 32 | char in[BUFSIZE]; // input buffer 33 | char out[BUFSIZE + 1]; // Huffman-decoded buffer 34 | char line[BUFSIZE + 50]; // decoded line 35 | int getcode(const char * key); 36 | int getbuf(); 37 | int fail(const char * err, const char * par); 38 | 39 | public: 40 | Hunzip(const char * filename, const char * key = NULL); 41 | ~Hunzip(); 42 | const char * getline(); 43 | }; 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /vendor/hunspell/src/hunspell/langnum.hxx: -------------------------------------------------------------------------------- 1 | #ifndef _LANGNUM_HXX_ 2 | #define _LANGNUM_HXX_ 3 | 4 | /* 5 | language numbers for language specific codes 6 | see http://l10n.openoffice.org/languages.html 7 | */ 8 | 9 | enum { 10 | LANG_ar=96, 11 | LANG_az=100, // custom number 12 | LANG_bg=41, 13 | LANG_ca=37, 14 | LANG_cs=42, 15 | LANG_da=45, 16 | LANG_de=49, 17 | LANG_el=30, 18 | LANG_en=01, 19 | LANG_es=34, 20 | LANG_eu=10, 21 | LANG_fr=02, 22 | LANG_gl=38, 23 | LANG_hr=78, 24 | LANG_hu=36, 25 | LANG_it=39, 26 | LANG_la=99, // custom number 27 | LANG_lv=101, // custom number 28 | LANG_nl=31, 29 | LANG_pl=48, 30 | LANG_pt=03, 31 | LANG_ru=07, 32 | LANG_sv=50, 33 | LANG_tr=90, 34 | LANG_uk=80, 35 | LANG_xx=999 36 | }; 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /vendor/hunspell/src/hunspell/license.hunspell: -------------------------------------------------------------------------------- 1 | /* ***** BEGIN LICENSE BLOCK ***** 2 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 3 | * 4 | * The contents of this file are subject to the Mozilla Public License Version 5 | * 1.1 (the "License"); you may not use this file except in compliance with 6 | * the License. You may obtain a copy of the License at 7 | * http://www.mozilla.org/MPL/ 8 | * 9 | * Software distributed under the License is distributed on an "AS IS" basis, 10 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 11 | * for the specific language governing rights and limitations under the 12 | * License. 13 | * 14 | * The Original Code is Hunspell, based on MySpell. 15 | * 16 | * The Initial Developers of the Original Code are 17 | * Kevin Hendricks (MySpell) and Laszlo Nemeth (Hunspell). 18 | * Portions created by the Initial Developers are Copyright (C) 2002-2005 19 | * the Initial Developers. All Rights Reserved. 20 | * 21 | * Contributor(s): 22 | * David Einstein 23 | * Davide Prina 24 | * Giuseppe Modugno 25 | * Gianluca Turconi 26 | * Simon Brouwer 27 | * Noll Janos 28 | * Biro Arpad 29 | * Goldman Eleonora 30 | * Sarlos Tamas 31 | * Bencsath Boldizsar 32 | * Halacsy Peter 33 | * Dvornik Laszlo 34 | * Gefferth Andras 35 | * Nagy Viktor 36 | * Varga Daniel 37 | * Chris Halls 38 | * Rene Engelhard 39 | * Bram Moolenaar 40 | * Dafydd Jones 41 | * Harri Pitkanen 42 | * Andras Timar 43 | * Tor Lillqvist 44 | * 45 | * Alternatively, the contents of this file may be used under the terms of 46 | * either the GNU General Public License Version 2 or later (the "GPL"), or 47 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 48 | * in which case the provisions of the GPL or the LGPL are applicable instead 49 | * of those above. If you wish to allow use of your version of this file only 50 | * under the terms of either the GPL or the LGPL, and not to allow others to 51 | * use your version of this file under the terms of the MPL, indicate your 52 | * decision by deleting the provisions above and replace them with the notice 53 | * and other provisions required by the GPL or the LGPL. If you do not delete 54 | * the provisions above, a recipient may use your version of this file under 55 | * the terms of any one of the MPL, the GPL or the LGPL. 56 | * 57 | * ***** END LICENSE BLOCK ***** */ 58 | 59 | -------------------------------------------------------------------------------- /vendor/hunspell/src/hunspell/license.myspell: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada 3 | * And Contributors. All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions 7 | * are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in the 14 | * documentation and/or other materials provided with the distribution. 15 | * 16 | * 3. All modifications to the source code must be clearly marked as 17 | * such. Binary redistributions based on modified source code 18 | * must be clearly marked as modified versions in the documentation 19 | * and/or other materials provided with the distribution. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS 22 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 25 | * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 | * SUCH DAMAGE. 33 | * 34 | * 35 | * NOTE: A special thanks and credit goes to Geoff Kuenning 36 | * the creator of ispell. MySpell's affix algorithms were 37 | * based on those of ispell which should be noted is 38 | * copyright Geoff Kuenning et.al. and now available 39 | * under a BSD style license. For more information on ispell 40 | * and affix compression in general, please see: 41 | * http://www.cs.ucla.edu/ficus-members/geoff/ispell.html 42 | * (the home page for ispell) 43 | * 44 | * An almost complete rewrite of MySpell for use by 45 | * the Mozilla project has been developed by David Einstein 46 | * (Deinst@world.std.com). David and I are now 47 | * working on parallel development tracks to help 48 | * our respective projects (Mozilla and OpenOffice.org 49 | * and we will maintain full affix file and dictionary 50 | * file compatibility and work on merging our versions 51 | * of MySpell back into a single tree. David has been 52 | * a significant help in improving MySpell. 53 | * 54 | * Special thanks also go to La'szlo' Ne'meth 55 | * who is the author of the 56 | * Hungarian dictionary and who developed and contributed 57 | * the code to support compound words in MySpell 58 | * and fixed numerous problems with the encoding 59 | * case conversion tables. 60 | * 61 | */ 62 | -------------------------------------------------------------------------------- /vendor/hunspell/src/hunspell/phonet.cxx: -------------------------------------------------------------------------------- 1 | /* phonetic.c - generic replacement aglogithms for phonetic transformation 2 | Copyright (C) 2000 Bjoern Jacke 3 | 4 | This library is free software; you can redistribute it and/or 5 | modify it under the terms of the GNU Lesser General Public 6 | License version 2.1 as published by the Free Software Foundation; 7 | 8 | This library is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 | Lesser General Public License for more details. 12 | 13 | You should have received a copy of the GNU Lesser General Public 14 | License along with this library; If not, see 15 | . 16 | 17 | Changelog: 18 | 19 | 2000-01-05 Bjoern Jacke 20 | Initial Release insprired by the article about phonetic 21 | transformations out of c't 25/1999 22 | 23 | 2007-07-26 Bjoern Jacke 24 | Released under MPL/GPL/LGPL tri-license for Hunspell 25 | 26 | 2007-08-23 Laszlo Nemeth 27 | Porting from Aspell to Hunspell using C-like structs 28 | */ 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | #include "csutil.hxx" 36 | #include "phonet.hxx" 37 | 38 | void init_phonet_hash(phonetable & parms) 39 | { 40 | int i, k; 41 | 42 | for (i = 0; i < HASHSIZE; i++) { 43 | parms.hash[i] = -1; 44 | } 45 | 46 | for (i = 0; parms.rules[i][0] != '\0'; i += 2) { 47 | /** set hash value **/ 48 | k = (unsigned char) parms.rules[i][0]; 49 | 50 | if (parms.hash[k] < 0) { 51 | parms.hash[k] = i; 52 | } 53 | } 54 | } 55 | 56 | // like strcpy but safe if the strings overlap 57 | // but only if dest < src 58 | static inline void strmove(char * dest, char * src) { 59 | while (*src) 60 | *dest++ = *src++; 61 | *dest = '\0'; 62 | } 63 | 64 | static int myisalpha(char ch) { 65 | if ((unsigned char) ch < 128) return isalpha(ch); 66 | return 1; 67 | } 68 | 69 | /* phonetic transcription algorithm */ 70 | /* see: http://aspell.net/man-html/Phonetic-Code.html */ 71 | /* convert string to uppercase before this call */ 72 | int phonet (const char * inword, char * target, 73 | int len, 74 | phonetable & parms) 75 | { 76 | /** Do phonetic transformation. **/ 77 | /** "len" = length of "inword" incl. '\0'. **/ 78 | 79 | /** result: >= 0: length of "target" **/ 80 | /** otherwise: error **/ 81 | 82 | int i,j,k=0,n,p,z; 83 | int k0,n0,p0=-333,z0; 84 | char c, c0; 85 | const char * s; 86 | typedef unsigned char uchar; 87 | char word[MAXPHONETUTF8LEN + 1]; 88 | if (len == -1) len = strlen(inword); 89 | if (len > MAXPHONETUTF8LEN) return 0; 90 | strcpy(word, inword); 91 | 92 | /** check word **/ 93 | i = j = z = 0; 94 | while ((c = word[i]) != '\0') { 95 | n = parms.hash[(uchar) c]; 96 | z0 = 0; 97 | 98 | if (n >= 0) { 99 | /** check all rules for the same letter **/ 100 | while (parms.rules[n][0] == c) { 101 | 102 | /** check whole string **/ 103 | k = 1; /** number of found letters **/ 104 | p = 5; /** default priority **/ 105 | s = parms.rules[n]; 106 | s++; /** important for (see below) "*(s-1)" **/ 107 | 108 | while (*s != '\0' && word[i+k] == *s 109 | && !isdigit ((unsigned char) *s) && strchr ("(-<^$", *s) == NULL) { 110 | k++; 111 | s++; 112 | } 113 | if (*s == '(') { 114 | /** check letters in "(..)" **/ 115 | if (myisalpha(word[i+k]) // ...could be implied? 116 | && strchr(s+1, word[i+k]) != NULL) { 117 | k++; 118 | while (*s != ')') 119 | s++; 120 | s++; 121 | } 122 | } 123 | p0 = (int) *s; 124 | k0 = k; 125 | while (*s == '-' && k > 1) { 126 | k--; 127 | s++; 128 | } 129 | if (*s == '<') 130 | s++; 131 | if (isdigit ((unsigned char) *s)) { 132 | /** determine priority **/ 133 | p = *s - '0'; 134 | s++; 135 | } 136 | if (*s == '^' && *(s+1) == '^') 137 | s++; 138 | 139 | if (*s == '\0' 140 | || (*s == '^' 141 | && (i == 0 || ! myisalpha(word[i-1])) 142 | && (*(s+1) != '$' 143 | || (! myisalpha(word[i+k0]) ))) 144 | || (*s == '$' && i > 0 145 | && myisalpha(word[i-1]) 146 | && (! myisalpha(word[i+k0]) ))) 147 | { 148 | /** search for followup rules, if: **/ 149 | /** parms.followup and k > 1 and NO '-' in searchstring **/ 150 | c0 = word[i+k-1]; 151 | n0 = parms.hash[(uchar) c0]; 152 | 153 | // if (parms.followup && k > 1 && n0 >= 0 154 | if (k > 1 && n0 >= 0 155 | && p0 != (int) '-' && word[i+k] != '\0') { 156 | /** test follow-up rule for "word[i+k]" **/ 157 | while (parms.rules[n0][0] == c0) { 158 | 159 | /** check whole string **/ 160 | k0 = k; 161 | p0 = 5; 162 | s = parms.rules[n0]; 163 | s++; 164 | while (*s != '\0' && word[i+k0] == *s 165 | && ! isdigit((unsigned char) *s) && strchr("(-<^$",*s) == NULL) { 166 | k0++; 167 | s++; 168 | } 169 | if (*s == '(') { 170 | /** check letters **/ 171 | if (myisalpha(word[i+k0]) 172 | && strchr (s+1, word[i+k0]) != NULL) { 173 | k0++; 174 | while (*s != ')' && *s != '\0') 175 | s++; 176 | if (*s == ')') 177 | s++; 178 | } 179 | } 180 | while (*s == '-') { 181 | /** "k0" gets NOT reduced **/ 182 | /** because "if (k0 == k)" **/ 183 | s++; 184 | } 185 | if (*s == '<') 186 | s++; 187 | if (isdigit ((unsigned char) *s)) { 188 | p0 = *s - '0'; 189 | s++; 190 | } 191 | 192 | if (*s == '\0' 193 | /** *s == '^' cuts **/ 194 | || (*s == '$' && ! myisalpha(word[i+k0]))) 195 | { 196 | if (k0 == k) { 197 | /** this is just a piece of the string **/ 198 | n0 += 2; 199 | continue; 200 | } 201 | 202 | if (p0 < p) { 203 | /** priority too low **/ 204 | n0 += 2; 205 | continue; 206 | } 207 | /** rule fits; stop search **/ 208 | break; 209 | } 210 | n0 += 2; 211 | } /** End of "while (parms.rules[n0][0] == c0)" **/ 212 | 213 | if (p0 >= p && parms.rules[n0][0] == c0) { 214 | n += 2; 215 | continue; 216 | } 217 | } /** end of follow-up stuff **/ 218 | 219 | /** replace string **/ 220 | s = parms.rules[n+1]; 221 | p0 = (parms.rules[n][0] != '\0' 222 | && strchr (parms.rules[n]+1,'<') != NULL) ? 1:0; 223 | if (p0 == 1 && z == 0) { 224 | /** rule with '<' is used **/ 225 | if (j > 0 && *s != '\0' 226 | && (target[j-1] == c || target[j-1] == *s)) { 227 | j--; 228 | } 229 | z0 = 1; 230 | z = 1; 231 | k0 = 0; 232 | while (*s != '\0' && word[i+k0] != '\0') { 233 | word[i+k0] = *s; 234 | k0++; 235 | s++; 236 | } 237 | if (k > k0) 238 | strmove (&word[0]+i+k0, &word[0]+i+k); 239 | 240 | /** new "actual letter" **/ 241 | c = word[i]; 242 | } 243 | else { /** no '<' rule used **/ 244 | i += k - 1; 245 | z = 0; 246 | while (*s != '\0' 247 | && *(s+1) != '\0' && j < len) { 248 | if (j == 0 || target[j-1] != *s) { 249 | target[j] = *s; 250 | j++; 251 | } 252 | s++; 253 | } 254 | /** new "actual letter" **/ 255 | c = *s; 256 | if (parms.rules[n][0] != '\0' 257 | && strstr (parms.rules[n]+1, "^^") != NULL) { 258 | if (c != '\0') { 259 | target[j] = c; 260 | j++; 261 | } 262 | strmove (&word[0], &word[0]+i+1); 263 | i = 0; 264 | z0 = 1; 265 | } 266 | } 267 | break; 268 | } /** end of follow-up stuff **/ 269 | n += 2; 270 | } /** end of while (parms.rules[n][0] == c) **/ 271 | } /** end of if (n >= 0) **/ 272 | if (z0 == 0) { 273 | // if (k && (assert(p0!=-333),!p0) && j < len && c != '\0' 274 | // && (!parms.collapse_result || j == 0 || target[j-1] != c)){ 275 | if (k && !p0 && j < len && c != '\0' 276 | && (1 || j == 0 || target[j-1] != c)){ 277 | /** condense only double letters **/ 278 | target[j] = c; 279 | ///printf("\n setting \n"); 280 | j++; 281 | } 282 | 283 | i++; 284 | z = 0; 285 | k=0; 286 | } 287 | } /** end of while ((c = word[i]) != '\0') **/ 288 | 289 | target[j] = '\0'; 290 | return (j); 291 | 292 | } /** end of function "phonet" **/ 293 | -------------------------------------------------------------------------------- /vendor/hunspell/src/hunspell/phonet.hxx: -------------------------------------------------------------------------------- 1 | /* phonetic.c - generic replacement aglogithms for phonetic transformation 2 | Copyright (C) 2000 Bjoern Jacke 3 | 4 | This library is free software; you can redistribute it and/or 5 | modify it under the terms of the GNU Lesser General Public 6 | License version 2.1 as published by the Free Software Foundation; 7 | 8 | This library is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 | Lesser General Public License for more details. 12 | 13 | You should have received a copy of the GNU Lesser General Public 14 | License along with this library; If not, see 15 | . 16 | 17 | Changelog: 18 | 19 | 2000-01-05 Bjoern Jacke 20 | Initial Release insprired by the article about phonetic 21 | transformations out of c't 25/1999 22 | 23 | 2007-07-26 Bjoern Jacke 24 | Released under MPL/GPL/LGPL tri-license for Hunspell 25 | 26 | 2007-08-23 Laszlo Nemeth 27 | Porting from Aspell to Hunspell using C-like structs 28 | */ 29 | 30 | #ifndef __PHONETHXX__ 31 | #define __PHONETHXX__ 32 | 33 | #define HASHSIZE 256 34 | #define MAXPHONETLEN 256 35 | #define MAXPHONETUTF8LEN (MAXPHONETLEN * 4) 36 | 37 | #include "hunvisapi.h" 38 | 39 | struct phonetable { 40 | char utf8; 41 | cs_info * lang; 42 | int num; 43 | char * * rules; 44 | int hash[HASHSIZE]; 45 | }; 46 | 47 | LIBHUNSPELL_DLL_EXPORTED void init_phonet_hash(phonetable & parms); 48 | 49 | LIBHUNSPELL_DLL_EXPORTED int phonet (const char * inword, char * target, 50 | int len, phonetable & phone); 51 | 52 | #endif 53 | -------------------------------------------------------------------------------- /vendor/hunspell/src/hunspell/replist.cxx: -------------------------------------------------------------------------------- 1 | #include "license.hunspell" 2 | #include "license.myspell" 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "replist.hxx" 9 | #include "csutil.hxx" 10 | 11 | RepList::RepList(int n) { 12 | dat = (replentry **) malloc(sizeof(replentry *) * n); 13 | if (dat == 0) size = 0; else size = n; 14 | pos = 0; 15 | } 16 | 17 | RepList::~RepList() 18 | { 19 | for (int i = 0; i < pos; i++) { 20 | free(dat[i]->pattern); 21 | free(dat[i]->pattern2); 22 | free(dat[i]); 23 | } 24 | free(dat); 25 | } 26 | 27 | int RepList::get_pos() { 28 | return pos; 29 | } 30 | 31 | replentry * RepList::item(int n) { 32 | return dat[n]; 33 | } 34 | 35 | int RepList::near(const char * word) { 36 | int p1 = 0; 37 | int p2 = pos; 38 | while ((p2 - p1) > 1) { 39 | int m = (p1 + p2) / 2; 40 | int c = strcmp(word, dat[m]->pattern); 41 | if (c <= 0) { 42 | if (c < 0) p2 = m; else p1 = p2 = m; 43 | } else p1 = m; 44 | } 45 | return p1; 46 | } 47 | 48 | int RepList::match(const char * word, int n) { 49 | if (strncmp(word, dat[n]->pattern, strlen(dat[n]->pattern)) == 0) return strlen(dat[n]->pattern); 50 | return 0; 51 | } 52 | 53 | int RepList::add(char * pat1, char * pat2) { 54 | if (pos >= size || pat1 == NULL || pat2 == NULL) return 1; 55 | replentry * r = (replentry *) malloc(sizeof(replentry)); 56 | if (r == NULL) return 1; 57 | r->pattern = mystrrep(pat1, "_", " "); 58 | r->pattern2 = mystrrep(pat2, "_", " "); 59 | r->start = false; 60 | r->end = false; 61 | dat[pos++] = r; 62 | for (int i = pos - 1; i > 0; i--) { 63 | r = dat[i]; 64 | if (strcmp(r->pattern, dat[i - 1]->pattern) < 0) { 65 | dat[i] = dat[i - 1]; 66 | dat[i - 1] = r; 67 | } else break; 68 | } 69 | return 0; 70 | } 71 | 72 | int RepList::conv(const char * word, char * dest) { 73 | int stl = 0; 74 | int change = 0; 75 | for (size_t i = 0; i < strlen(word); i++) { 76 | int n = near(word + i); 77 | int l = match(word + i, n); 78 | if (l) { 79 | strcpy(dest + stl, dat[n]->pattern2); 80 | stl += strlen(dat[n]->pattern2); 81 | i += l - 1; 82 | change = 1; 83 | } else dest[stl++] = word[i]; 84 | } 85 | dest[stl] = '\0'; 86 | return change; 87 | } 88 | -------------------------------------------------------------------------------- /vendor/hunspell/src/hunspell/replist.hxx: -------------------------------------------------------------------------------- 1 | /* string replacement list class */ 2 | #ifndef _REPLIST_HXX_ 3 | #define _REPLIST_HXX_ 4 | 5 | #include "hunvisapi.h" 6 | 7 | #include "w_char.hxx" 8 | 9 | class LIBHUNSPELL_DLL_EXPORTED RepList 10 | { 11 | protected: 12 | replentry ** dat; 13 | int size; 14 | int pos; 15 | 16 | public: 17 | RepList(int n); 18 | ~RepList(); 19 | 20 | int get_pos(); 21 | int add(char * pat1, char * pat2); 22 | replentry * item(int n); 23 | int near(const char * word); 24 | int match(const char * word, int n); 25 | int conv(const char * word, char * dest); 26 | }; 27 | #endif 28 | -------------------------------------------------------------------------------- /vendor/hunspell/src/hunspell/suggestmgr.hxx: -------------------------------------------------------------------------------- 1 | #ifndef _SUGGESTMGR_HXX_ 2 | #define _SUGGESTMGR_HXX_ 3 | 4 | #define MAXSWL 100 5 | #define MAXSWUTF8L (MAXSWL * 4) 6 | #define MAX_ROOTS 100 7 | #define MAX_WORDS 100 8 | #define MAX_GUESS 200 9 | #define MAXNGRAMSUGS 4 10 | #define MAXPHONSUGS 2 11 | #define MAXCOMPOUNDSUGS 3 12 | 13 | // timelimit: max ~1/4 sec (process time on Linux) for a time consuming function 14 | #define TIMELIMIT (CLOCKS_PER_SEC >> 2) 15 | #define MINTIMER 100 16 | #define MAXPLUSTIMER 100 17 | 18 | #define NGRAM_LONGER_WORSE (1 << 0) 19 | #define NGRAM_ANY_MISMATCH (1 << 1) 20 | #define NGRAM_LOWERING (1 << 2) 21 | #define NGRAM_WEIGHTED (1 << 3) 22 | 23 | #include "hunvisapi.h" 24 | 25 | #include "atypes.hxx" 26 | #include "affixmgr.hxx" 27 | #include "hashmgr.hxx" 28 | #include "langnum.hxx" 29 | #include 30 | 31 | enum { LCS_UP, LCS_LEFT, LCS_UPLEFT }; 32 | 33 | class LIBHUNSPELL_DLL_EXPORTED SuggestMgr 34 | { 35 | char * ckey; 36 | int ckeyl; 37 | w_char * ckey_utf; 38 | 39 | char * ctry; 40 | int ctryl; 41 | w_char * ctry_utf; 42 | 43 | AffixMgr* pAMgr; 44 | int maxSug; 45 | struct cs_info * csconv; 46 | int utf8; 47 | int langnum; 48 | int nosplitsugs; 49 | int maxngramsugs; 50 | int maxcpdsugs; 51 | int complexprefixes; 52 | 53 | 54 | public: 55 | SuggestMgr(const char * tryme, int maxn, AffixMgr *aptr); 56 | ~SuggestMgr(); 57 | 58 | int suggest(char*** slst, const char * word, int nsug, int * onlycmpdsug); 59 | int ngsuggest(char ** wlst, char * word, int ns, HashMgr** pHMgr, int md); 60 | int suggest_auto(char*** slst, const char * word, int nsug); 61 | int suggest_stems(char*** slst, const char * word, int nsug); 62 | int suggest_pos_stems(char*** slst, const char * word, int nsug); 63 | 64 | char * suggest_morph(const char * word); 65 | char * suggest_gen(char ** pl, int pln, char * pattern); 66 | char * suggest_morph_for_spelling_error(const char * word); 67 | 68 | private: 69 | int testsug(char** wlst, const char * candidate, int wl, int ns, int cpdsuggest, 70 | int * timer, clock_t * timelimit); 71 | int checkword(const char *, int, int, int *, clock_t *); 72 | int check_forbidden(const char *, int); 73 | 74 | int capchars(char **, const char *, int, int); 75 | int replchars(char**, const char *, int, int); 76 | int doubletwochars(char**, const char *, int, int); 77 | int forgotchar(char **, const char *, int, int); 78 | int swapchar(char **, const char *, int, int); 79 | int longswapchar(char **, const char *, int, int); 80 | int movechar(char **, const char *, int, int); 81 | int extrachar(char **, const char *, int, int); 82 | int badcharkey(char **, const char *, int, int); 83 | int badchar(char **, const char *, int, int); 84 | int twowords(char **, const char *, int, int); 85 | int fixstems(char **, const char *, int); 86 | 87 | int capchars_utf(char **, const w_char *, int wl, int, int); 88 | int doubletwochars_utf(char**, const w_char *, int wl, int, int); 89 | int forgotchar_utf(char**, const w_char *, int wl, int, int); 90 | int extrachar_utf(char**, const w_char *, int wl, int, int); 91 | int badcharkey_utf(char **, const w_char *, int wl, int, int); 92 | int badchar_utf(char **, const w_char *, int wl, int, int); 93 | int swapchar_utf(char **, const w_char *, int wl, int, int); 94 | int longswapchar_utf(char **, const w_char *, int, int, int); 95 | int movechar_utf(char **, const w_char *, int, int, int); 96 | 97 | int mapchars(char**, const char *, int, int); 98 | int map_related(const char *, char *, int, int, char ** wlst, int, int, const mapentry*, int, int *, clock_t *); 99 | int ngram(int n, char * s1, const char * s2, int opt); 100 | int mystrlen(const char * word); 101 | int leftcommonsubstring(char * s1, const char * s2); 102 | int commoncharacterpositions(char * s1, const char * s2, int * is_swap); 103 | void bubblesort( char ** rwd, char ** rwd2, int * rsc, int n); 104 | void lcs(const char * s, const char * s2, int * l1, int * l2, char ** result); 105 | int lcslen(const char * s, const char* s2); 106 | char * suggest_hentry_gen(hentry * rv, char * pattern); 107 | 108 | }; 109 | 110 | #endif 111 | 112 | -------------------------------------------------------------------------------- /vendor/hunspell/src/hunspell/w_char.hxx: -------------------------------------------------------------------------------- 1 | #ifndef __WCHARHXX__ 2 | #define __WCHARHXX__ 3 | 4 | #ifndef GCC 5 | typedef struct { 6 | #else 7 | typedef struct __attribute__ ((packed)) { 8 | #endif 9 | unsigned char l; 10 | unsigned char h; 11 | } w_char; 12 | 13 | // two character arrays 14 | struct replentry { 15 | char * pattern; 16 | char * pattern2; 17 | bool start; 18 | bool end; 19 | }; 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /vendor/hunspell/src/parsers/firstparser.cxx: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "../hunspell/csutil.hxx" 7 | #include "firstparser.hxx" 8 | 9 | #ifndef W32 10 | using namespace std; 11 | #endif 12 | 13 | FirstParser::FirstParser(const char * wordchars) 14 | { 15 | init(wordchars); 16 | } 17 | 18 | FirstParser::~FirstParser() 19 | { 20 | } 21 | 22 | char * FirstParser::next_token() 23 | { 24 | char * tabpos = strchr(line[actual],'\t'); 25 | if ((tabpos) && (tabpos - line[actual]>token)) { 26 | char * t = (char *) malloc(tabpos - line[actual] + 1); 27 | t[tabpos - line[actual]] = '\0'; 28 | token = tabpos - line[actual] +1; 29 | if (t) return strncpy(t, line[actual], tabpos - line[actual]); 30 | fprintf(stderr,"Error - Insufficient Memory\n"); 31 | } 32 | return NULL; 33 | } 34 | -------------------------------------------------------------------------------- /vendor/hunspell/src/parsers/firstparser.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * parser classes of HunTools 3 | * 4 | * implemented: text, HTML, TeX, first word 5 | * 6 | * Copyright (C) 2003, Laszlo Nemeth 7 | * 8 | */ 9 | 10 | #ifndef _FIRSTPARSER_HXX_ 11 | #define _FIRSTPARSER_HXX_ 12 | 13 | #include "textparser.hxx" 14 | 15 | /* 16 | * Check first word of the input line 17 | * 18 | */ 19 | 20 | class FirstParser : public TextParser 21 | { 22 | 23 | public: 24 | 25 | 26 | FirstParser(const char * wc); 27 | virtual ~FirstParser(); 28 | 29 | virtual char * next_token(); 30 | 31 | }; 32 | 33 | #endif 34 | 35 | -------------------------------------------------------------------------------- /vendor/hunspell/src/parsers/htmlparser.cxx: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "../hunspell/csutil.hxx" 7 | #include "htmlparser.hxx" 8 | 9 | 10 | #ifndef W32 11 | using namespace std; 12 | #endif 13 | 14 | enum { ST_NON_WORD, ST_WORD, ST_TAG, ST_CHAR_ENTITY, ST_OTHER_TAG, ST_ATTRIB }; 15 | 16 | static const char * PATTERN[][2] = { 17 | { "" }, 18 | { "" }, 19 | { "" }, 20 | { "" }, 21 | { "" }, 22 | { "" }, 23 | { "" }, 24 | { "" }, 25 | { "" }, 26 | { "" }, 27 | { "<[cdata[", "]]>" }, // XML comment 28 | { "<", ">" } 29 | }; 30 | 31 | #define PATTERN_LEN (sizeof(PATTERN) / (sizeof(char *) * 2)) 32 | 33 | static const char * PATTERN2[][2] = { 34 | { " 0) && (line[actual][head] == '>')) { 119 | state = ST_NON_WORD; 120 | } else if (((i = look_pattern(PATTERN, PATTERN_LEN, 1)) != -1) && 121 | (strcmp(PATTERN[i][1],PATTERN[pattern_num][1]) == 0)) { 122 | state = ST_NON_WORD; 123 | head += strlen(PATTERN[pattern_num][1]) - 1; 124 | } else if ( (strcmp(PATTERN[pattern_num][0], "<") == 0) && 125 | ((line[actual][head] == '"') || (line[actual][head] == '\''))) { 126 | quotmark = line[actual][head]; 127 | state = ST_ATTRIB; 128 | } 129 | break; 130 | case ST_ATTRIB: // non word chars 131 | prevstate = ST_ATTRIB; 132 | if (line[actual][head] == quotmark) { 133 | state = ST_TAG; 134 | if (checkattr == 2) checkattr = 1; 135 | // for IMG ALT 136 | } else if (is_wordchar(line[actual] + head) && (checkattr == 2)) { 137 | state = ST_WORD; 138 | token = head; 139 | } else if (line[actual][head] == '&') { 140 | state = ST_CHAR_ENTITY; 141 | } 142 | break; 143 | case ST_CHAR_ENTITY: // SGML element 144 | if ((tolower(line[actual][head]) == ';')) { 145 | state = prevstate; 146 | head--; 147 | } 148 | } 149 | if (next_char(line[actual], &head)) return NULL; 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /vendor/hunspell/src/parsers/htmlparser.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * HTML parser class for MySpell 3 | * 4 | * implemented: text, HTML, TeX 5 | * 6 | * Copyright (C) 2002, Laszlo Nemeth 7 | * 8 | */ 9 | 10 | #ifndef _HTMLPARSER_HXX_ 11 | #define _HTMLPARSER_HXX_ 12 | 13 | 14 | #include "textparser.hxx" 15 | 16 | /* 17 | * HTML Parser 18 | * 19 | */ 20 | 21 | class HTMLParser : public TextParser 22 | { 23 | public: 24 | 25 | HTMLParser(const char * wc); 26 | HTMLParser(unsigned short * wordchars, int len); 27 | virtual ~HTMLParser(); 28 | 29 | virtual char * next_token(); 30 | 31 | private: 32 | 33 | int look_pattern(const char * p[][2], unsigned int len, int column); 34 | int pattern_num; 35 | int pattern2_num; 36 | int prevstate; 37 | int checkattr; 38 | char quotmark; 39 | 40 | }; 41 | 42 | 43 | #endif 44 | 45 | -------------------------------------------------------------------------------- /vendor/hunspell/src/parsers/latexparser.cxx: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "../hunspell/csutil.hxx" 7 | #include "latexparser.hxx" 8 | 9 | #ifndef W32 10 | using namespace std; 11 | #endif 12 | 13 | static struct { 14 | const char * pat[2]; 15 | int arg; 16 | } PATTERN[] = { 17 | { { "\\(", "\\)" } , 0 }, 18 | { { "$$", "$$" } , 0 }, 19 | { { "$", "$" } , 0 }, 20 | { { "\\begin{math}", "\\end{math}" } , 0 }, 21 | { { "\\[", "\\]" } , 0 }, 22 | { { "\\begin{displaymath}", "\\end{displaymath}" } , 0 }, 23 | { { "\\begin{equation}", "\\end{equation}" } , 0 }, 24 | { { "\\begin{equation*}", "\\end{equation*}" } , 0 }, 25 | { { "\\cite", NULL } , 1 }, 26 | { { "\\nocite", NULL } , 1 }, 27 | { { "\\index", NULL } , 1 }, 28 | { { "\\label", NULL } , 1 }, 29 | { { "\\ref", NULL } , 1 }, 30 | { { "\\pageref", NULL } , 1 }, 31 | { { "\\parbox", NULL } , 1 }, 32 | { { "\\begin{verbatim}", "\\end{verbatim}" } , 0 }, 33 | { { "\\verb+", "+" } , 0 }, 34 | { { "\\verb|", "|" } , 0 }, 35 | { { "\\verb#", "#" } , 0 }, 36 | { { "\\verb*", "*" } , 0 }, 37 | { { "\\documentstyle", "\\begin{document}" } , 0 }, 38 | { { "\\documentclass", "\\begin{document}" } , 0 }, 39 | // { { "\\documentclass", NULL } , 1 }, 40 | { { "\\usepackage", NULL } , 1 }, 41 | { { "\\includeonly", NULL } , 1 }, 42 | { { "\\include", NULL } , 1 }, 43 | { { "\\input", NULL } , 1 }, 44 | { { "\\vspace", NULL } , 1 }, 45 | { { "\\setlength", NULL } , 2 }, 46 | { { "\\addtolength", NULL } , 2 }, 47 | { { "\\settowidth", NULL } , 2 }, 48 | { { "\\rule", NULL } , 2 }, 49 | { { "\\hspace", NULL } , 1 } , 50 | { { "\\vspace", NULL } , 1 } , 51 | { { "\\\\[", "]" } , 0 }, 52 | { { "\\pagebreak[", "]" } , 0 } , 53 | { { "\\nopagebreak[", "]" } , 0 } , 54 | { { "\\enlargethispage", NULL } , 1 } , 55 | { { "\\begin{tabular}", NULL } , 1 } , 56 | { { "\\addcontentsline", NULL } , 2 } , 57 | { { "\\begin{thebibliography}", NULL } , 1 } , 58 | { { "\\bibliography", NULL } , 1 } , 59 | { { "\\bibliographystyle", NULL } , 1 } , 60 | { { "\\bibitem", NULL } , 1 } , 61 | { { "\\begin", NULL } , 1 } , 62 | { { "\\end", NULL } , 1 } , 63 | { { "\\pagestyle", NULL } , 1 } , 64 | { { "\\pagenumbering", NULL } , 1 } , 65 | { { "\\thispagestyle", NULL } , 1 } , 66 | { { "\\newtheorem", NULL } , 2 }, 67 | { { "\\newcommand", NULL } , 2 }, 68 | { { "\\renewcommand", NULL } , 2 }, 69 | { { "\\setcounter", NULL } , 2 }, 70 | { { "\\addtocounter", NULL } , 1 }, 71 | { { "\\stepcounter", NULL } , 1 }, 72 | { { "\\selectlanguage", NULL } , 1 }, 73 | { { "\\inputencoding", NULL } , 1 }, 74 | { { "\\hyphenation", NULL } , 1 }, 75 | { { "\\definecolor", NULL } , 3 }, 76 | { { "\\color", NULL } , 1 }, 77 | { { "\\textcolor", NULL } , 1 }, 78 | { { "\\pagecolor", NULL } , 1 }, 79 | { { "\\colorbox", NULL } , 2 }, 80 | { { "\\fcolorbox", NULL } , 2 }, 81 | { { "\\declaregraphicsextensions", NULL } , 1 }, 82 | { { "\\psfig", NULL } , 1 }, 83 | { { "\\url", NULL } , 1 }, 84 | { { "\\eqref", NULL } , 1 }, 85 | { { "\\vskip", NULL } , 1 }, 86 | { { "\\vglue", NULL } , 1 }, 87 | { { "\'\'", NULL } , 1 } 88 | }; 89 | 90 | #define PATTERN_LEN (sizeof(PATTERN) / sizeof(PATTERN[0])) 91 | 92 | LaTeXParser::LaTeXParser(const char * wordchars) 93 | { 94 | init(wordchars); 95 | } 96 | 97 | LaTeXParser::LaTeXParser(unsigned short * wordchars, int len) 98 | { 99 | init(wordchars, len); 100 | } 101 | 102 | LaTeXParser::~LaTeXParser() 103 | { 104 | } 105 | 106 | int LaTeXParser::look_pattern(int col) 107 | { 108 | for (unsigned int i = 0; i < PATTERN_LEN; i++) { 109 | char * j = line[actual] + head; 110 | const char * k = PATTERN[i].pat[col]; 111 | if (! k) continue; 112 | while ((*k != '\0') && (tolower(*j) == *k)) { 113 | j++; 114 | k++; 115 | } 116 | if (*k == '\0') return i; 117 | } 118 | return -1; 119 | } 120 | 121 | /* 122 | * LaTeXParser 123 | * 124 | * state 0: not wordchar 125 | * state 1: wordchar 126 | * state 2: comments 127 | * state 3: commands 128 | * state 4: commands with arguments 129 | * state 5: % comment 130 | * 131 | */ 132 | 133 | 134 | char * LaTeXParser::next_token() 135 | { 136 | int i; 137 | int slash = 0; 138 | int apostrophe; 139 | for (;;) { 140 | // fprintf(stderr,"depth: %d, state: %d, , arg: %d, token: %s\n",depth,state,arg,line[actual]+head); 141 | 142 | switch (state) 143 | { 144 | case 0: // non word chars 145 | if ((pattern_num = look_pattern(0)) != -1) { 146 | if (PATTERN[pattern_num].pat[1]) { 147 | state = 2; 148 | } else { 149 | state = 4; 150 | depth = 0; 151 | arg = 0; 152 | opt = 1; 153 | } 154 | head += strlen(PATTERN[pattern_num].pat[0]) - 1; 155 | } else if ((line[actual][head] == '%')) { 156 | state = 5; 157 | } else if (is_wordchar(line[actual] + head)) { 158 | state = 1; 159 | token = head; 160 | } else if (line[actual][head] == '\\') { 161 | if (line[actual][head + 1] == '\\' || // \\ (linebreak) 162 | (line[actual][head + 1] == '$') || // \$ (dollar sign) 163 | (line[actual][head + 1] == '%')) { // \% (percent) 164 | head++; 165 | break; 166 | } 167 | state = 3; 168 | } else if (line[actual][head] == '%') { 169 | if ((head==0) || (line[actual][head - 1] != '\\')) state = 5; 170 | } 171 | break; 172 | case 1: // wordchar 173 | apostrophe = 0; 174 | if (! is_wordchar(line[actual] + head) || 175 | (line[actual][head] == '\'' && line[actual][head+1] == '\'' && ++apostrophe)) { 176 | state = 0; 177 | char * t = alloc_token(token, &head); 178 | if (apostrophe) head += 2; 179 | if (t) return t; 180 | } 181 | break; 182 | case 2: // comment, labels, etc 183 | if (((i = look_pattern(1)) != -1) && 184 | (strcmp(PATTERN[i].pat[1],PATTERN[pattern_num].pat[1]) == 0)) { 185 | state = 0; 186 | head += strlen(PATTERN[pattern_num].pat[1]) - 1; 187 | } 188 | break; 189 | case 3: // command 190 | if ((tolower(line[actual][head]) < 'a') || (tolower(line[actual][head]) > 'z')) { 191 | state = 0; 192 | head--; 193 | } 194 | break; 195 | case 4: // command with arguments 196 | if (slash && (line[actual][head] != '\0')) { 197 | slash = 0; 198 | head++; 199 | break; 200 | } else if (line[actual][head]=='\\') { 201 | slash = 1; 202 | } else if ((line[actual][head] == '{') || 203 | ((opt) && (line[actual][head] == '['))) { 204 | depth++; 205 | opt = 0; 206 | } else if (line[actual][head] == '}') { 207 | depth--; 208 | if (depth == 0) { 209 | opt = 1; 210 | arg++; 211 | } 212 | if (((depth == 0) && (arg == PATTERN[pattern_num].arg)) || 213 | (depth < 0) ) { 214 | state = 0; // XXX not handles the last optional arg. 215 | } 216 | } else if (line[actual][head] == ']') depth--; 217 | } // case 218 | if (next_char(line[actual], &head)) { 219 | if (state == 5) state = 0; 220 | return NULL; 221 | } 222 | } 223 | } 224 | -------------------------------------------------------------------------------- /vendor/hunspell/src/parsers/latexparser.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * parser classes for MySpell 3 | * 4 | * implemented: text, HTML, TeX 5 | * 6 | * Copyright (C) 2002, Laszlo Nemeth 7 | * 8 | */ 9 | 10 | #ifndef _LATEXPARSER_HXX_ 11 | #define _LATEXPARSER_HXX_ 12 | 13 | 14 | #include "textparser.hxx" 15 | 16 | /* 17 | * HTML Parser 18 | * 19 | */ 20 | 21 | class LaTeXParser : public TextParser 22 | { 23 | int pattern_num; // number of comment 24 | int depth; // depth of blocks 25 | int arg; // arguments's number 26 | int opt; // optional argument attrib. 27 | 28 | public: 29 | 30 | LaTeXParser(const char * wc); 31 | LaTeXParser(unsigned short * wordchars, int len); 32 | virtual ~LaTeXParser(); 33 | 34 | virtual char * next_token(); 35 | 36 | private: 37 | 38 | int look_pattern(int col); 39 | 40 | }; 41 | 42 | 43 | #endif 44 | 45 | -------------------------------------------------------------------------------- /vendor/hunspell/src/parsers/manparser.cxx: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "../hunspell/csutil.hxx" 7 | #include "manparser.hxx" 8 | 9 | 10 | #ifndef W32 11 | using namespace std; 12 | #endif 13 | 14 | ManParser::ManParser() { 15 | } 16 | 17 | ManParser::ManParser(const char * wordchars) 18 | { 19 | init(wordchars); 20 | } 21 | 22 | ManParser::ManParser(unsigned short * wordchars, int len) 23 | { 24 | init(wordchars, len); 25 | } 26 | 27 | ManParser::~ManParser() 28 | { 29 | } 30 | 31 | char * ManParser::next_token() 32 | { 33 | for (;;) { 34 | switch (state) 35 | { 36 | case 1: // command arguments 37 | if (line[actual][head] == ' ') state = 2; 38 | break; 39 | case 0: // dot in begin of line 40 | if (line[actual][0] == '.') { 41 | state = 1; 42 | break; 43 | } else { 44 | state = 2; 45 | } 46 | // no break 47 | case 2: // non word chars 48 | if (is_wordchar(line[actual] + head)) { 49 | state = 3; 50 | token = head; 51 | } else if ((line[actual][head] == '\\') && 52 | (line[actual][head + 1] == 'f') && 53 | (line[actual][head + 2] != '\0')) { 54 | head += 2; 55 | } 56 | break; 57 | case 3: // wordchar 58 | if (! is_wordchar(line[actual] + head)) { 59 | state = 2; 60 | char * t = alloc_token(token, &head); 61 | if (t) return t; 62 | } 63 | break; 64 | } 65 | if (next_char(line[actual], &head)) { 66 | state = 0; 67 | return NULL; 68 | } 69 | } 70 | } 71 | 72 | -------------------------------------------------------------------------------- /vendor/hunspell/src/parsers/manparser.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * parser classes for MySpell 3 | * 4 | * implemented: text, HTML, TeX 5 | * 6 | * Copyright (C) 2002, Laszlo Nemeth 7 | * 8 | */ 9 | 10 | #ifndef _MANPARSER_HXX_ 11 | #define _MANPARSER_HXX_ 12 | 13 | #include "textparser.hxx" 14 | 15 | /* 16 | * Manparse Parser 17 | * 18 | */ 19 | 20 | class ManParser : public TextParser 21 | { 22 | 23 | protected: 24 | 25 | 26 | public: 27 | 28 | ManParser(); 29 | ManParser(const char * wc); 30 | ManParser(unsigned short * wordchars, int len); 31 | virtual ~ManParser(); 32 | 33 | virtual char * next_token(); 34 | 35 | }; 36 | 37 | #endif 38 | 39 | -------------------------------------------------------------------------------- /vendor/hunspell/src/parsers/testparser.cxx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atom/node-spellchecker/1883c560dd0d768db9d797b470e007fa4cb820a6/vendor/hunspell/src/parsers/testparser.cxx -------------------------------------------------------------------------------- /vendor/hunspell/src/parsers/textparser.cxx: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "../hunspell/csutil.hxx" 7 | #include "textparser.hxx" 8 | 9 | #ifndef W32 10 | using namespace std; 11 | #endif 12 | 13 | // ISO-8859-1 HTML character entities 14 | 15 | static const char * LATIN1[] = { 16 | "À", 17 | "Ã", 18 | "Å", 19 | "Æ", 20 | "È", 21 | "Ê", 22 | "Ì", 23 | "Ï", 24 | "Ð", 25 | "Ñ", 26 | "Ò", 27 | "Ø", 28 | "Ù", 29 | "Þ", 30 | "à", 31 | "ã", 32 | "å", 33 | "æ", 34 | "è", 35 | "ê", 36 | "ì", 37 | "ï", 38 | "ð", 39 | "ñ", 40 | "ò", 41 | "ø", 42 | "ù", 43 | "þ", 44 | "ÿ" 45 | }; 46 | 47 | #define LATIN1_LEN (sizeof(LATIN1) / sizeof(char *)) 48 | 49 | TextParser::TextParser() { 50 | init((char *) NULL); 51 | } 52 | 53 | TextParser::TextParser(const char * wordchars) 54 | { 55 | init(wordchars); 56 | } 57 | 58 | TextParser::TextParser(unsigned short * wordchars, int len) 59 | { 60 | init(wordchars, len); 61 | } 62 | 63 | TextParser::~TextParser() 64 | { 65 | } 66 | 67 | int TextParser::is_wordchar(char * w) 68 | { 69 | if (*w == '\0') return 0; 70 | if (utf8) { 71 | w_char wc; 72 | unsigned short idx; 73 | u8_u16(&wc, 1, w); 74 | idx = (wc.h << 8) + wc.l; 75 | return (unicodeisalpha(idx) || (wordchars_utf16 && flag_bsearch(wordchars_utf16, *((unsigned short *) &wc), wclen))); 76 | } else { 77 | return wordcharacters[(*w + 256) % 256]; 78 | } 79 | } 80 | 81 | const char * TextParser::get_latin1(char * s) 82 | { 83 | if (s[0] == '&') { 84 | unsigned int i = 0; 85 | while ((i < LATIN1_LEN) && 86 | strncmp(LATIN1[i], s, strlen(LATIN1[i]))) i++; 87 | if (i != LATIN1_LEN) return LATIN1[i]; 88 | } 89 | return NULL; 90 | } 91 | 92 | void TextParser::init(const char * wordchars) 93 | { 94 | for (int i = 0; i < MAXPREVLINE; i++) { 95 | line[i][0] = '\0'; 96 | } 97 | actual = 0; 98 | head = 0; 99 | token = 0; 100 | state = 0; 101 | utf8 = 0; 102 | checkurl = 0; 103 | unsigned int j; 104 | for (j = 0; j < 256; j++) { 105 | wordcharacters[j] = 0; 106 | } 107 | if (!wordchars) wordchars = "qwertzuiopasdfghjklyxcvbnmQWERTZUIOPASDFGHJKLYXCVBNM"; 108 | for (j = 0; j < strlen(wordchars); j++) { 109 | wordcharacters[(wordchars[j] + 256) % 256] = 1; 110 | } 111 | } 112 | 113 | void TextParser::init(unsigned short * wc, int len) 114 | { 115 | for (int i = 0; i < MAXPREVLINE; i++) { 116 | line[i][0] = '\0'; 117 | } 118 | actual = 0; 119 | head = 0; 120 | token = 0; 121 | state = 0; 122 | utf8 = 1; 123 | checkurl = 0; 124 | wordchars_utf16 = wc; 125 | wclen = len; 126 | } 127 | 128 | int TextParser::next_char(char * line, int * pos) { 129 | if (*(line + *pos) == '\0') return 1; 130 | if (utf8) { 131 | if (*(line + *pos) >> 7) { 132 | // jump to next UTF-8 character 133 | for((*pos)++; (*(line + *pos) & 0xc0) == 0x80; (*pos)++); 134 | } else { 135 | (*pos)++; 136 | } 137 | } else (*pos)++; 138 | return 0; 139 | } 140 | 141 | void TextParser::put_line(char * word) 142 | { 143 | actual = (actual + 1) % MAXPREVLINE; 144 | strcpy(line[actual], word); 145 | token = 0; 146 | head = 0; 147 | check_urls(); 148 | } 149 | 150 | char * TextParser::get_prevline(int n) 151 | { 152 | return mystrdup(line[(actual + MAXPREVLINE - n) % MAXPREVLINE]); 153 | } 154 | 155 | char * TextParser::get_line() 156 | { 157 | return get_prevline(0); 158 | } 159 | 160 | char * TextParser::next_token() 161 | { 162 | const char * latin1; 163 | 164 | for (;;) { 165 | switch (state) 166 | { 167 | case 0: // non word chars 168 | if (is_wordchar(line[actual] + head)) { 169 | state = 1; 170 | token = head; 171 | } else if ((latin1 = get_latin1(line[actual] + head))) { 172 | state = 1; 173 | token = head; 174 | head += strlen(latin1); 175 | } 176 | break; 177 | case 1: // wordchar 178 | if ((latin1 = get_latin1(line[actual] + head))) { 179 | head += strlen(latin1); 180 | } else if (! is_wordchar(line[actual] + head)) { 181 | state = 0; 182 | char * t = alloc_token(token, &head); 183 | if (t) return t; 184 | } 185 | break; 186 | } 187 | if (next_char(line[actual], &head)) return NULL; 188 | } 189 | } 190 | 191 | int TextParser::get_tokenpos() 192 | { 193 | return token; 194 | } 195 | 196 | int TextParser::change_token(const char * word) 197 | { 198 | if (word) { 199 | char * r = mystrdup(line[actual] + head); 200 | strcpy(line[actual] + token, word); 201 | strcat(line[actual], r); 202 | head = token; 203 | free(r); 204 | return 1; 205 | } 206 | return 0; 207 | } 208 | 209 | void TextParser::check_urls() 210 | { 211 | int url_state = 0; 212 | int url_head = 0; 213 | int url_token = 0; 214 | int url = 0; 215 | for (;;) { 216 | switch (url_state) 217 | { 218 | case 0: // non word chars 219 | if (is_wordchar(line[actual] + url_head)) { 220 | url_state = 1; 221 | url_token = url_head; 222 | // Unix path 223 | } else if (*(line[actual] + url_head) == '/') { 224 | url_state = 1; 225 | url_token = url_head; 226 | url = 1; 227 | } 228 | break; 229 | case 1: // wordchar 230 | char ch = *(line[actual] + url_head); 231 | // e-mail address 232 | if ((ch == '@') || 233 | // MS-DOS, Windows path 234 | (strncmp(line[actual] + url_head, ":\\", 2) == 0) || 235 | // URL 236 | (strncmp(line[actual] + url_head, "://", 3) == 0)) { 237 | url = 1; 238 | } else if (! (is_wordchar(line[actual] + url_head) || 239 | (ch == '-') || (ch == '_') || (ch == '\\') || 240 | (ch == '.') || (ch == ':') || (ch == '/') || 241 | (ch == '~') || (ch == '%') || (ch == '*') || 242 | (ch == '$') || (ch == '[') || (ch == ']') || 243 | (ch == '?') || (ch == '!') || 244 | ((ch >= '0') && (ch <= '9')))) { 245 | url_state = 0; 246 | if (url == 1) { 247 | for (int i = url_token; i < url_head; i++) { 248 | *(urlline + i) = 1; 249 | } 250 | } 251 | url = 0; 252 | } 253 | break; 254 | } 255 | *(urlline + url_head) = 0; 256 | if (next_char(line[actual], &url_head)) return; 257 | } 258 | } 259 | 260 | int TextParser::get_url(int token_pos, int * head) 261 | { 262 | for (int i = *head; urlline[i] && *(line[actual]+i); i++, (*head)++); 263 | return checkurl ? 0 : urlline[token_pos]; 264 | } 265 | 266 | void TextParser::set_url_checking(int check) 267 | { 268 | checkurl = check; 269 | } 270 | 271 | 272 | char * TextParser::alloc_token(int token, int * head) 273 | { 274 | if (get_url(token, head)) return NULL; 275 | char * t = (char *) malloc(*head - token + 1); 276 | if (t) { 277 | t[*head - token] = '\0'; 278 | strncpy(t, line[actual] + token, *head - token); 279 | // remove colon for Finnish and Swedish language 280 | if (t[*head - token - 1] == ':') { 281 | t[*head - token - 1] = '\0'; 282 | if (!t[0]) { 283 | free(t); 284 | return NULL; 285 | } 286 | } 287 | return t; 288 | } 289 | fprintf(stderr,"Error - Insufficient Memory\n"); 290 | return NULL; 291 | } 292 | -------------------------------------------------------------------------------- /vendor/hunspell/src/parsers/textparser.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * parser classes for MySpell 3 | * 4 | * implemented: text, HTML, TeX 5 | * 6 | * Copyright (C) 2002, Laszlo Nemeth 7 | * 8 | */ 9 | 10 | #ifndef _TEXTPARSER_HXX_ 11 | #define _TEXTPARSER_HXX_ 12 | 13 | // set sum of actual and previous lines 14 | #define MAXPREVLINE 4 15 | 16 | #ifndef MAXLNLEN 17 | #define MAXLNLEN 8192 18 | #endif 19 | 20 | /* 21 | * Base Text Parser 22 | * 23 | */ 24 | 25 | class TextParser 26 | { 27 | 28 | protected: 29 | void init(const char *); 30 | void init(unsigned short * wordchars, int len); 31 | int wordcharacters[256]; // for detection of the word boundaries 32 | char line[MAXPREVLINE][MAXLNLEN]; // parsed and previous lines 33 | char urlline[MAXLNLEN]; // mask for url detection 34 | int checkurl; 35 | int actual; // actual line 36 | int head; // head position 37 | int token; // begin of token 38 | int state; // state of automata 39 | int utf8; // UTF-8 character encoding 40 | int next_char(char * line, int * pos); 41 | unsigned short * wordchars_utf16; 42 | int wclen; 43 | 44 | public: 45 | 46 | TextParser(); 47 | TextParser(unsigned short * wordchars, int len); 48 | TextParser(const char * wc); 49 | virtual ~TextParser(); 50 | 51 | void put_line(char * line); 52 | char * get_line(); 53 | char * get_prevline(int n); 54 | virtual char * next_token(); 55 | int change_token(const char * word); 56 | void set_url_checking(int check); 57 | 58 | int get_tokenpos(); 59 | int is_wordchar(char * w); 60 | const char * get_latin1(char * s); 61 | char * next_char(); 62 | int tokenize_urls(); 63 | void check_urls(); 64 | int get_url(int token_pos, int * head); 65 | char * alloc_token(int token, int * head); 66 | }; 67 | 68 | #endif 69 | 70 | -------------------------------------------------------------------------------- /vendor/hunspell_dictionaries/README.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atom/node-spellchecker/1883c560dd0d768db9d797b470e007fa4cb820a6/vendor/hunspell_dictionaries/README.txt -------------------------------------------------------------------------------- /vendor/hunspell_dictionaries/en_US.aff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atom/node-spellchecker/1883c560dd0d768db9d797b470e007fa4cb820a6/vendor/hunspell_dictionaries/en_US.aff -------------------------------------------------------------------------------- /vendor/hunspell_dictionaries/en_US.dic: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atom/node-spellchecker/1883c560dd0d768db9d797b470e007fa4cb820a6/vendor/hunspell_dictionaries/en_US.dic --------------------------------------------------------------------------------