├── .editorconfig ├── .gitattributes ├── .gitignore ├── .gitmodules ├── .npmignore ├── .travis.yml ├── .vscode └── launch.json ├── CI-EncryptTool.md ├── LICENSE ├── README.md ├── appveyor.yml ├── binding.gyp ├── examples └── .gitkeep ├── index.d.ts ├── index.js ├── lib └── binding │ └── .gitkeep ├── package.json ├── src ├── main.cc ├── tagger_class.cc ├── tagger_class.h ├── trainer_class.cc └── trainer_class.h ├── tea.yaml └── test ├── models ├── model_9.bin └── wordsent_crf_v1.model ├── specs ├── tagger.js └── trainer.js └── start.js /.editorconfig: -------------------------------------------------------------------------------- 1 | # top-most EditorConfig file 2 | root = true 3 | 4 | # Unix-style newlines with a newline ending every file 5 | [*] 6 | charset = utf-8 7 | end_of_line = lf 8 | insert_final_newline = true 9 | trim_trailing_whitespace = true 10 | indent_style = space 11 | indent_size = 2 12 | 13 | # editorconfig-tools is unable to ignore longs strings or urls 14 | max_line_length = null 15 | 16 | # Tab indentation (no size specified) 17 | [Makefile] 18 | indent_style = tab 19 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | ## GITATTRIBUTES FOR WEB PROJECTS 2 | # 3 | # These settings are for any web project. 4 | # Ref: https://git.io/fxxEi 5 | # 6 | # Details per file setting: 7 | # text These files should be normalized (i.e. convert CRLF => LF). 8 | # binary These files are binary and should be left untouched. 9 | # 10 | # Note that binary is a macro for -text -diff. 11 | ###################################################################### 12 | 13 | ## AUTO-DETECT 14 | ## Handle line endings automatically for files detected as 15 | ## text and leave all files detected as binary untouched. 16 | ## This will handle all files NOT defined below. 17 | * text=auto eol=lf 18 | 19 | ## CSHARP 20 | *.cs text diff=csharp 21 | *.sln text eol=crlf 22 | *.csproj text eol=crlf 23 | 24 | ## SOURCE CODE 25 | *.bat text eol=crlf 26 | *.coffee text 27 | *.css text 28 | *.htm text diff=html 29 | *.html text diff=html 30 | *.inc text 31 | *.ini text 32 | *.js text 33 | *.json text 34 | *.jsx text 35 | *.less text 36 | *.od text 37 | *.onlydata text 38 | *.php text diff=php 39 | *.pl text 40 | *.py text diff=python 41 | *.rb text diff=ruby 42 | *.sass text 43 | *.scm text 44 | *.scss text 45 | *.sh text eol=lf 46 | *.sql text 47 | *.styl text 48 | *.tag text 49 | *.ts text 50 | *.tsx text 51 | *.vue text 52 | *.xml text 53 | *.xhtml text diff=html 54 | 55 | ## DOCKER 56 | *.dockerignore text 57 | Dockerfile text 58 | 59 | ## DOCUMENTATION 60 | *.ipynb text 61 | *.markdown text 62 | *.md text 63 | *.mdwn text 64 | *.mdown text 65 | *.mkd text 66 | *.mkdn text 67 | *.mdtxt text 68 | *.mdtext text 69 | *.txt text 70 | AUTHORS text 71 | CHANGELOG text 72 | CHANGES text 73 | CONTRIBUTING text 74 | COPYING text 75 | copyright text 76 | *COPYRIGHT* text 77 | INSTALL text 78 | license text 79 | LICENSE text 80 | NEWS text 81 | readme text 82 | *README* text 83 | TODO text 84 | 85 | ## TEMPLATES 86 | *.dot text 87 | *.ejs text 88 | *.haml text 89 | *.handlebars text 90 | *.hbs text 91 | *.hbt text 92 | *.jade text 93 | *.latte text 94 | *.mustache text 95 | *.njk text 96 | *.phtml text 97 | *.tmpl text 98 | *.tpl text 99 | *.twig text 100 | 101 | ## LINTERS 102 | .csslintrc text 103 | .eslintrc text 104 | .htmlhintrc text 105 | .jscsrc text 106 | .jshintrc text 107 | .jshintignore text 108 | .stylelintrc text 109 | 110 | ## CONFIGS 111 | *.bowerrc text 112 | *.cnf text 113 | *.conf text 114 | *.config text 115 | .babelrc text 116 | .browserslistrc text 117 | .editorconfig text 118 | .env text 119 | .gitattributes text 120 | .gitconfig text 121 | .gitignore text 122 | .htaccess text 123 | *.lock text 124 | *.npmignore text 125 | *.yaml text 126 | *.yml text 127 | browserslist text 128 | Makefile text 129 | makefile text 130 | 131 | ## HEROKU 132 | Procfile text 133 | .slugignore text 134 | 135 | ## GRAPHICS 136 | *.ai binary 137 | *.bmp binary 138 | *.eps binary 139 | *.gif binary 140 | *.ico binary 141 | *.jng binary 142 | *.jp2 binary 143 | *.jpg binary 144 | *.jpeg binary 145 | *.jpx binary 146 | *.jxr binary 147 | *.pdf binary 148 | *.png binary 149 | *.psb binary 150 | *.psd binary 151 | *.svg text 152 | *.svgz binary 153 | *.tif binary 154 | *.tiff binary 155 | *.wbmp binary 156 | *.webp binary 157 | 158 | ## AUDIO 159 | *.kar binary 160 | *.m4a binary 161 | *.mid binary 162 | *.midi binary 163 | *.mp3 binary 164 | *.ogg binary 165 | *.ra binary 166 | 167 | ## VIDEO 168 | *.3gpp binary 169 | *.3gp binary 170 | *.as binary 171 | *.asf binary 172 | *.asx binary 173 | *.fla binary 174 | *.flv binary 175 | *.m4v binary 176 | *.mng binary 177 | *.mov binary 178 | *.mp4 binary 179 | *.mpeg binary 180 | *.mpg binary 181 | *.ogv binary 182 | *.swc binary 183 | *.swf binary 184 | *.webm binary 185 | 186 | ## ARCHIVES 187 | *.7z binary 188 | *.gz binary 189 | *.jar binary 190 | *.rar binary 191 | *.tar binary 192 | *.zip binary 193 | 194 | ## FONTS 195 | *.ttf binary 196 | *.eot binary 197 | *.otf binary 198 | *.woff binary 199 | *.woff2 binary 200 | 201 | ## EXECUTABLES 202 | *.exe binary 203 | *.pyc binary 204 | 205 | # Project text 206 | .gitignore text 207 | *.gitattributes text 208 | *.md text 209 | 210 | # Project binary 211 | *.lock binary 212 | *.dll binary 213 | *.doc binary 214 | *.docx binary 215 | *.xls binary 216 | *.xlsx binary 217 | /dist/* binary 218 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | 8 | # Runtime data 9 | pids 10 | *.pid 11 | *.seed 12 | *.pid.lock 13 | 14 | # Directory for instrumented libs generated by jscoverage/JSCover 15 | lib-cov 16 | 17 | # Coverage directory used by tools like istanbul 18 | coverage 19 | 20 | # nyc test coverage 21 | .nyc_output 22 | 23 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 24 | .grunt 25 | 26 | # Bower dependency directory (https://bower.io/) 27 | bower_components 28 | 29 | # node-waf configuration 30 | .lock-wscript 31 | 32 | # Compiled binary addons (http://nodejs.org/api/addons.html) 33 | build/Release 34 | 35 | # Dependency directories 36 | node_modules/ 37 | jspm_packages/ 38 | 39 | # Typescript v1 declaration files 40 | typings/ 41 | 42 | # Optional npm cache directory 43 | .npm 44 | 45 | # Optional eslint cache 46 | .eslintcache 47 | 48 | # Optional REPL history 49 | .node_repl_history 50 | 51 | # Output of 'npm pack' 52 | *.tgz 53 | 54 | # Yarn Integrity file 55 | .yarn-integrity 56 | 57 | # dotenv environment variables file 58 | .env 59 | 60 | ################################################ 61 | # VNTK - Vietnamese language toolkit 62 | # 63 | # Common files generated by text editors, 64 | # operating systems, file systems, etc. 65 | ################################################ 66 | 67 | .vscode 68 | obj 69 | build* 70 | lib/binding 71 | crfsuite.node 72 | model.crfsuite 73 | package-lock.json 74 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "crfsuite"] 2 | path = crfsuite 3 | url = https://github.com/chokkan/crfsuite.git 4 | [submodule "liblbfgs"] 5 | path = liblbfgs 6 | url = https://github.com/chokkan/liblbfgs.git 7 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | crfsuite.node 2 | model.crfsuite 3 | CI-EncryptTool.md 4 | examples 5 | build 6 | docs 7 | test 8 | lib -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | 3 | compiler: gcc 4 | sudo: false 5 | 6 | os: 7 | - linux 8 | - osx 9 | 10 | env: 11 | global: 12 | - secure: Vb4syWCMM1zciqis3IRqKQEiDxSnf4J42YA5GTKQpy3kDJZy/oknPW+vvhWTD/LajHzgiks/Mv7625GlWQUvFerJceKXQOf3AHUmDQBeTMzqoRacCNF3nGGCveqvEuQPnLgcAh/3p26aDxYt3H5OaL6w3hRtnBic5AU48um+IydPt+0/zaYQFgdIulPnHTJLLiD9/NlttaaC3AdUeBfdaQbyaulTxPuBTncW8UnuXYYkzfsAq/8+6N/OHMrE7GLgY4d8JF5sfaTIVKsVcnTiFBs9ldknbc6ottVYJOdWGrKTb9ERUsKw1J3NrQ+7PeuZt/tdWbk2bWvSMjvqrV+eTTujzozl1Rz5jIXi1nLPkWBUooOMqo7VKowD0HflGp7hcoJSnVJ5lZtFd8vVYkf1oQPenrQe1KFhTAdz5K/kfG46OOzChuue+wY6wep4IyWIkbPq2zgP3Zkj2SxYt+tpALA3ccgfJ9oD5m3WnXpXqLlZhtgW0vJp2UJKgIM5Vz09lc0pBe/x1AAaMO72ykQOf2Pim8RQ+w3ZH7BmE9mVje5fDHSEpodtDNlTRcCpIR3VLLDmASKtZ9sK+SCxSblTxDwhu/RvhMx4F4FY8LjTVccnX+tPW5/til1kDkMdZ8gvrfkD/PXFO8P3PT8EYi4Sq8Ojgzqnifx2Q4MMi+/Mx7M= 13 | 14 | node_js: 15 | # support latest Nodejs LTS version 16 | - '10' 17 | 18 | addons: 19 | apt: 20 | sources: 21 | - ubuntu-toolchain-r-test 22 | packages: 23 | - gcc-4.8 24 | - g++-4.8 25 | 26 | sudo: false 27 | 28 | before_install: 29 | - if [ "$TRAVIS_NODE_VERSION" = "0.8" ]; then npm install -g npm@2.7.3; fi; 30 | - if [ $TRAVIS_OS_NAME == "linux" ]; then 31 | export CC="gcc-4.8"; 32 | export CXX="g++-4.8"; 33 | export LINK="gcc-4.8"; 34 | export LINKXX="g++-4.8"; 35 | fi 36 | - nvm --version 37 | - node --version 38 | - npm --version 39 | - gcc --version 40 | - g++ --version 41 | 42 | before_script: 43 | # figure out if we should publish 44 | - echo $TRAVIS_BRANCH 45 | - echo `git describe --tags --always HEAD` 46 | - PUBLISH_BINARY=false 47 | - COMMIT_MESSAGE=$(git show -s --format=%B $TRAVIS_COMMIT | tr -d '\n') 48 | - if [[ $TRAVIS_BRANCH == `git describe --tags --always HEAD` || ${COMMIT_MESSAGE} =~ "[publish binary]" ]]; then PUBLISH_BINARY=true; fi; 49 | - echo "Publishing binaries? ->" $PUBLISH_BINARY 50 | 51 | install: 52 | - npm install --build-from-source 53 | 54 | script: 55 | - npm test 56 | - if [[ $PUBLISH_BINARY == true ]]; then node-pre-gyp rebuild package && node-pre-gyp-github publish --release; fi; 57 | 58 | #cache: 59 | # directories: 60 | # - $HOME/.node-gyp 61 | # - $HOME/.npm 62 | # - node_modules 63 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "type": "node", 9 | "request": "launch", 10 | "name": "Launch Program", 11 | // "program": "${workspaceFolder}\\index.js" 12 | "program": "${file}" 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /CI-EncryptTool.md: -------------------------------------------------------------------------------- 1 | AppVeyor CI - Encrypt configuration data tool 2 | ============================================= 3 | 4 | Ref: https://ci.appveyor.com/tools/encrypt 5 | 6 | Travis CI - Encrypt Travis CI settings 7 | ============================================= 8 | 9 | Ref: http://rkh.github.io/travis-encrypt/public/index.html -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Nhữ Bảo Vũ 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # node-crfsuite 2 | 3 | A nodejs binding for crfsuite 4 | 5 | [![MIT License](https://img.shields.io/badge/license-MIT_License-green.svg?style=flat-square)](./LICENSE) 6 | [![npm version](https://img.shields.io/npm/v/crfsuite.svg?style=flat)](https://www.npmjs.com/package/crfsuite) 7 | [![downloads](https://img.shields.io/npm/dm/crfsuite.svg)](https://www.npmjs.com/package/crfsuite) 8 | [![Travis](https://travis-ci.org/vunb/node-crfsuite.svg?branch=master)](https://travis-ci.org/vunb/node-crfsuite) 9 | [![Appveyor](https://ci.appveyor.com/api/projects/status/9gd460vxd6jbel14/branch/master?svg=true)](https://ci.appveyor.com/project/vunb/node-crfsuite/branch/master) 10 | 11 | > This is a link to the CRFSuite library written by Naoaki Okazaki. CRF or Conditional Random Fields are a class of statistical modeling method often applied in pattern recognition and machine learning and used for structured prediction. 12 | 13 | # Installation 14 | 15 | For most "standard" use cases (on Mac, Linux, or Windows on a x86 or x64 processor), `node-crfsuite` will install easy with: 16 | 17 | > npm install crfsuite 18 | 19 | # API Usage 20 | 21 | ## CRFSuite Tagger 22 | 23 | ```js 24 | const crfsuite = require('crfsuite') 25 | const tagger = new crfsuite.Tagger() 26 | 27 | let is_opened = tagger.open('./path/to/crf.model') 28 | console.log('File model is opened:', is_opened) 29 | 30 | let tags = tagger.tag(input) 31 | console.log('Tags: ', tags) 32 | ``` 33 | 34 | ## CRFSuite Trainer 35 | 36 | ```js 37 | const path = require('path') 38 | const crfsuite = require('crfsuite') 39 | const trainer = new crfsuite.Trainer({ 40 | debug: true 41 | }) 42 | 43 | let model_filename = path.resolve('./model.crfsuite') 44 | 45 | let xseq = [['walk'], ['walk', 'shop'], ['clean', 'shop']] 46 | let yseq = ['sunny', 'sunny', 'rainy'] 47 | 48 | // submit training data to the trainer 49 | trainer.append(xseq, yseq) 50 | trainer.train(model_filename) 51 | 52 | // output: ./model.crfsuite 53 | ``` 54 | 55 | # Installation Special Cases 56 | 57 | We use [node-pre-gyp](https://github.com/mapbox/node-pre-gyp) to compile and publish binaries of the library for most common use cases (Linux, Mac, Windows on standard processor platforms). If you have a special case, `node-crfsuite` will work, but it will compile the binary during the install. Compiling with nodejs is done via [node-gyp](https://github.com/nodejs/node-gyp) which requires Python 2.x, so please ensure you have it installed and in your path for all operating systems. Python 3.x will not work. 58 | 59 | * See [node-gyp installation prerequisites](https://github.com/nodejs/node-gyp#installation). 60 | 61 | ## Build from source 62 | 63 | ```bash 64 | # clone the project 65 | git clone --recursive https://github.com/vunb/node-crfsuite.git 66 | 67 | # go to working folder 68 | cd node-crfsuite 69 | 70 | # install dependencies and build the binary 71 | npm install 72 | ``` 73 | 74 | For development: 75 | 76 | ```bash 77 | # rebuild 78 | npm run build 79 | 80 | # run unit-test 81 | npm test 82 | ``` 83 | 84 | # Change Log 85 | 86 | ### 2020-03-07: Release version 1.0.1 87 | 88 | * Add an options parameter to pass to a Trainer with a debug property to avoid writing logs to stdout. ([17](https://github.com/vunb/node-crfsuite/pull/17)) ([mathquis](https://github.com/mathquis)) 89 | * Update typescript declaration file 90 | 91 | ### 2019-07-18: Release version 1.0.0 92 | 93 | * Convert all sources to use N-API, remove `nan` 94 | * Add typescript declaration file 95 | * Cleanup package dependencies 96 | * CI Tool run & test only in node version 10 LTS 97 | * Add project convention: `.gitattributes`, `.editorconfig` 98 | * Enforced to use `new` keyword to create new **Tagger** and **Trainer** 99 | 100 | ### 2019-06-09: uses N-API 101 | 102 | From `crfsuite@0.9.6` the library uses `N-API` to use the binary in multiple version of Node. 103 | 104 | * [Acording to Node.js Foundation](https://medium.com/the-node-js-collection/4f35b781f00e): With N-API, native module developers can compile their module once per platform and architecture, and make it available for any version of Node.js that implements N-API. This holds true even for versions of Node.js that are built with a different VM, e.g. [Node-ChakraCore](https://github.com/nodejs/node-chakracore/). 105 | 106 | # Contributing 107 | 108 | Pull requests and stars are highly welcome. 109 | 110 | For bugs and feature requests, please [create an issue](https://github.com/vunb/node-crfsuite/issues/new). 111 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | os: unstable 2 | environment: 3 | NODE_PRE_GYP_GITHUB_TOKEN: 4 | secure: ex3daTUjJEaSvxAKyUiPOiLjEoYhmisgN9YjzbdYvMhdu3YyZo56JyEcTPjKIhQz 5 | 6 | matrix: 7 | # support latest Nodejs LTS version 8 | - nodejs_version: "10" 9 | 10 | platform: 11 | - x64 12 | - x86 13 | 14 | # Install scripts. (runs after repo cloning) 15 | install: 16 | # Get the latest stable version of Node.js or io.js 17 | - ps: Install-Product node $env:nodejs_version $env:platform 18 | - set PATH=%APPDATA%\npm;%PATH% 19 | # install submodules 20 | - git submodule update --init 21 | # fixes nodejs/node-gyp#972 22 | # - npm install -g npm@5.7.1 23 | # install modules 24 | - npm install -g node-gyp node-pre-gyp node-pre-gyp-github 25 | - npm install --build-from-source 26 | # Check if we're building the latest tag, if so 27 | # then we publish the binaries if tests pass. 28 | - ps: > 29 | if ($env:APPVEYOR_REPO_COMMIT_MESSAGE.ToLower().Contains('[publish binary]') -OR $(git describe --tags --always HEAD) -eq $env:APPVEYOR_REPO_BRANCH) { 30 | $env:publish_binary = "true"; 31 | } 32 | if ($env:publish_binary -eq "true") { 33 | "We're publishing a binary!" | Write-Host 34 | } else { 35 | "We're not publishing a binary" | Write-Host 36 | } 37 | true; 38 | 39 | # Post-install test scripts. 40 | test_script: 41 | # Output useful info for debugging. 42 | - node --version 43 | - npm --version 44 | # run tests 45 | - npm test 46 | # publish binary 47 | - ps: if ($env:publish_binary -eq "true") { node-pre-gyp configure clean build package; node-pre-gyp-github publish --release } 48 | 49 | # Don't actually build. 50 | build: off 51 | -------------------------------------------------------------------------------- /binding.gyp: -------------------------------------------------------------------------------- 1 | { 2 | "targets": [ 3 | { 4 | "target_name": "action_after_build", 5 | "type": "none", 6 | "dependencies": ["<(module_name)"], 7 | "copies": [{ 8 | "files": ["<(PRODUCT_DIR)/<(module_name).node"], 9 | "destination": "<(module_path)" 10 | }] 11 | }, 12 | { 13 | "target_name": "crfsuite", 14 | "cflags_cc": [ 15 | "-std=c++11", 16 | "-pthread", 17 | "-fexceptions", 18 | "-O3", 19 | "-Wall", 20 | "-Wno-sign-compare", 21 | "-pedantic", 22 | "-DUSE_SSE", 23 | "-DUSE_SSE2" 24 | ], 25 | "conditions": [ 26 | [ "OS=='linux'", { 27 | "cflags+": [ "-std=c++11", "-fexceptions" ], 28 | "cflags_c+": [ "-std=c++11", "-fexceptions" ], 29 | "cflags_cc+": [ "-std=c++11", "-fexceptions" ], 30 | }], 31 | [ "OS=='freebsd'", { 32 | "cflags+": [ "-std=c++11", "-fexceptions" ], 33 | "cflags_c+": [ "-std=c++11", "-fexceptions" ], 34 | "cflags_cc+": [ "-std=c++11", "-fexceptions" ], 35 | }], 36 | [ "OS=='mac'", { 37 | "cflags+": [ "-stdlib=libc++" ], 38 | "xcode_settings": { 39 | "OTHER_CPLUSPLUSFLAGS" : [ "-std=c++11", "-stdlib=libc++", "-pthread" ], 40 | "OTHER_LDFLAGS": [ "-stdlib=libc++" ], 41 | "GCC_ENABLE_CPP_EXCEPTIONS": "YES", 42 | "MACOSX_DEPLOYMENT_TARGET": "10.7", 43 | "CLANG_CXX_LANGUAGE_STANDARD":"c++11", 44 | "CLANG_CXX_LIBRARY": "libc++" 45 | }, 46 | }], 47 | [ 48 | "OS=='win'", { 49 | "cflags": [ 50 | "-Wall" 51 | ], 52 | "defines": [ 53 | "WIN" 54 | ], 55 | "msvs_settings": { 56 | "VCCLCompilerTool": { 57 | "ExceptionHandling": "2", 58 | "DisableSpecificWarnings": [ 59 | "4244" 60 | ], 61 | }, 62 | "VCLinkerTool": { 63 | "LinkTimeCodeGeneration": 1, 64 | "OptimizeReferences": 2, 65 | "EnableCOMDATFolding": 2, 66 | "LinkIncremental": 1, 67 | } 68 | } 69 | }] 70 | ], 71 | "sources": [ 72 | "liblbfgs/lib/lbfgs.c", 73 | 74 | "crfsuite/lib/crf/src/crf1d_context.c", 75 | "crfsuite/lib/crf/src/crf1d_encode.c", 76 | "crfsuite/lib/crf/src/crf1d_feature.c", 77 | "crfsuite/lib/crf/src/crf1d_model.c", 78 | "crfsuite/lib/crf/src/crf1d_tag.c", 79 | "crfsuite/lib/crf/src/crfsuite.c", 80 | "crfsuite/lib/crf/src/crfsuite_train.c", 81 | "crfsuite/lib/crf/src/dataset.c", 82 | "crfsuite/lib/crf/src/dictionary.c", 83 | "crfsuite/lib/crf/src/holdout.c", 84 | "crfsuite/lib/crf/src/logging.c", 85 | "crfsuite/lib/crf/src/params.c", 86 | "crfsuite/lib/crf/src/quark.c", 87 | "crfsuite/lib/crf/src/rumavl.c", 88 | "crfsuite/lib/crf/src/train_arow.c", 89 | "crfsuite/lib/crf/src/train_averaged_perceptron.c", 90 | "crfsuite/lib/crf/src/train_l2sgd.c", 91 | "crfsuite/lib/crf/src/train_lbfgs.c", 92 | "crfsuite/lib/crf/src/train_passive_aggressive.c", 93 | 94 | "crfsuite/lib/cqdb/src/cqdb.c", 95 | "crfsuite/lib/cqdb/src/lookup3.c", 96 | 97 | "src/main.cc", 98 | "src/trainer_class.cc", 99 | "src/tagger_class.cc" 100 | ], 101 | "defines": [ 102 | "NAPI_VERSION=<(napi_build_version)", 103 | ], 104 | "include_dirs": [ 105 | "liblbfgs/include", 106 | "crfsuite/include", 107 | "crfsuite/lib/cqdb/include", 108 | "): { probability: number; result: string[] }; 3 | open(model_filename: string): boolean; 4 | } 5 | 6 | export interface Options { 7 | [key: string]: string; 8 | } 9 | 10 | export interface TrainerOptions { 11 | [key: string]: any; 12 | debug?: boolean; 13 | } 14 | 15 | export interface TrainerCallback { 16 | (str: string): void; 17 | } 18 | 19 | export declare class Trainer { 20 | constructor(opts?: TrainerOptions); 21 | append(xseq: Array, yseq: string[]): void; 22 | train(model_filename: string): number; 23 | get_params(options: Options): any; 24 | set_params(options: Options): void; 25 | set_callback(callback: TrainerCallback): void; 26 | } 27 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | var path = require('path'); 2 | var binary = require('node-pre-gyp'); 3 | 4 | var binaryPath = binary.find(path.resolve(path.join(__dirname, './package.json'))); 5 | var CRFSuite = require(binaryPath); 6 | 7 | module.exports = CRFSuite; 8 | -------------------------------------------------------------------------------- /lib/binding/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vunb/node-crfsuite/c5724c15fa50bf2321874ab3ef58bd6ef7471806/lib/binding/.gitkeep -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "crfsuite", 3 | "version": "1.0.1", 4 | "description": "NodeJS binding for CRFsuite", 5 | "main": "index.js", 6 | "types": "index.d.ts", 7 | "engines": { 8 | "node": ">=8.0.0" 9 | }, 10 | "scripts": { 11 | "config": "node-pre-gyp configure", 12 | "build": "node-pre-gyp rebuild", 13 | "install": "node-pre-gyp install --fallback-to-build", 14 | "test": "tape test/start.js | tap-spec", 15 | "publish-binary": "git commit --allow-empty -m \"[publish binary]\"" 16 | }, 17 | "repository": { 18 | "type": "git", 19 | "url": "git+https://github.com/vunb/node-crfsuite.git" 20 | }, 21 | "keywords": [ 22 | "crf", 23 | "crfsuite", 24 | "data-science" 25 | ], 26 | "author": "vunb", 27 | "license": "MIT", 28 | "bugs": { 29 | "url": "https://github.com/vunb/node-crfsuite/issues" 30 | }, 31 | "homepage": "https://github.com/vunb/node-crfsuite#readme", 32 | "binary": { 33 | "module_name": "crfsuite", 34 | "module_path": "./lib/binding/{node_napi_label}", 35 | "package_name": "{module_name}-{platform}-{arch}-{node_napi_label}.tar.gz", 36 | "host": "https://github.com/vunb/node-crfsuite/releases/download/", 37 | "remote_path": "{version}", 38 | "napi_versions": [ 39 | 1, 40 | 3 41 | ] 42 | }, 43 | "files": [ 44 | "src", 45 | "crfsuite/lib", 46 | "crfsuite/include", 47 | "liblbfgs/lib", 48 | "liblbfgs/include", 49 | "binding.gyp", 50 | "index.js", 51 | "index.d.ts" 52 | ], 53 | "dependencies": { 54 | "node-addon-api": "^1.6.3", 55 | "node-pre-gyp": "^0.13.0" 56 | }, 57 | "devDependencies": { 58 | "node-gyp": "^4.0.0", 59 | "node-pre-gyp-github": "^1.4.3", 60 | "tap-spec": "^5.0.0", 61 | "tape": "^4.11.0" 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/main.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "trainer_class.h" 4 | #include "tagger_class.h" 5 | 6 | Napi::Object Initialize(Napi::Env env, Napi::Object exports) 7 | { 8 | TrainerClass::Init(env, exports); 9 | TaggerClass::Init(env, exports); 10 | return exports; 11 | } 12 | 13 | NODE_API_MODULE(crfsuite, Initialize) 14 | -------------------------------------------------------------------------------- /src/tagger_class.cc: -------------------------------------------------------------------------------- 1 | #include "tagger_class.h" 2 | 3 | Napi::FunctionReference TaggerClass::constructor; 4 | 5 | TaggerClass::TaggerClass(const Napi::CallbackInfo &info) 6 | : Napi::ObjectWrap(info) 7 | { 8 | Napi::Env env = info.Env(); 9 | Napi::HandleScope scope(env); 10 | tagger = new CRFSuite::Tagger(); 11 | } 12 | 13 | Napi::Object TaggerClass::Init(Napi::Env env, Napi::Object exports) 14 | { 15 | Napi::HandleScope scope(env); 16 | Napi::Function func = DefineClass(env, "TaggerClass", 17 | {InstanceMethod("open", &TaggerClass::Open), 18 | InstanceMethod("close", &TaggerClass::Close), 19 | InstanceMethod("tag", &TaggerClass::Tag), 20 | InstanceMethod("get_labels", &TaggerClass::GetLabels) 21 | 22 | }); 23 | 24 | constructor = Napi::Persistent(func); 25 | constructor.SuppressDestruct(); 26 | 27 | exports.Set("Tagger", func); 28 | return exports; 29 | } 30 | 31 | Napi::Value TaggerClass::Open(const Napi::CallbackInfo &info) 32 | { 33 | if (info.Length() < 1 || !info[0].IsString()) 34 | { 35 | Napi::TypeError::New(info.Env(), "Path to model file is missing or invalid").ThrowAsJavaScriptException(); 36 | } 37 | 38 | Napi::String path = info[0].As(); 39 | return Napi::Boolean::New(info.Env(), this->tagger->open(path.Utf8Value())); 40 | } 41 | 42 | Napi::Value TaggerClass::Close(const Napi::CallbackInfo &info) 43 | { 44 | this->tagger->close(); 45 | return Napi::Boolean::New(info.Env(), true); 46 | } 47 | 48 | Napi::Value TaggerClass::Tag(const Napi::CallbackInfo &info) 49 | { 50 | if (info.Length() < 1) 51 | { 52 | Napi::TypeError::New(info.Env(), "xseq is missing").ThrowAsJavaScriptException(); 53 | } 54 | else if (!info[0].IsArray()) 55 | { 56 | Napi::TypeError::New(info.Env(), "xseq must be an array of arrays").ThrowAsJavaScriptException(); 57 | } 58 | 59 | Napi::Array xseq = info[0].As(); 60 | 61 | CRFSuite::ItemSequence items; 62 | CRFSuite::StringList labels; 63 | 64 | for (size_t i = 0; i < xseq.Length(); ++i) 65 | { 66 | Napi::Value val = xseq.Get(i); 67 | if (!val.IsArray()) 68 | { 69 | Napi::TypeError::New(info.Env(), "xseq must be an array of arrays").ThrowAsJavaScriptException(); 70 | } 71 | 72 | Napi::Array xxseq = val.As(); 73 | 74 | CRFSuite::Item item; 75 | item.empty(); 76 | 77 | for (size_t j = 0; j < xxseq.Length(); ++j) 78 | { 79 | Napi::String observable = xxseq.Get(j).ToString(); 80 | item.push_back(CRFSuite::Attribute(observable.Utf8Value())); 81 | } 82 | items.push_back(item); 83 | } 84 | 85 | labels = this->tagger->tag(items); 86 | 87 | // Create a new empty array. 88 | Napi::Array result = Napi::Array::New(info.Env(), labels.size()); 89 | 90 | for (size_t i = 0; i < labels.size(); i++) 91 | { 92 | Napi::String value = Napi::String::New(info.Env(), labels[i]); 93 | result.Set(i, value); 94 | } 95 | 96 | return result; 97 | } 98 | 99 | Napi::Value TaggerClass::GetLabels(const Napi::CallbackInfo &info) 100 | { 101 | CRFSuite::StringList list = this->tagger->labels(); 102 | 103 | // Create a new empty array. 104 | Napi::Array result = Napi::Array::New(info.Env(), list.size()); 105 | 106 | for (size_t i = 0; i < list.size(); i++) 107 | { 108 | Napi::String value = Napi::String::New(info.Env(), list[i]); 109 | result.Set(i, value); 110 | } 111 | 112 | return result; 113 | } 114 | -------------------------------------------------------------------------------- /src/tagger_class.h: -------------------------------------------------------------------------------- 1 | #ifndef _TAGGER_CLASS_H_ 2 | #define _TAGGER_CLASS_H_ 3 | 4 | #include 5 | #include 6 | 7 | class TaggerClass : public Napi::ObjectWrap 8 | { 9 | public: 10 | static Napi::Object Init(Napi::Env env, Napi::Object exports); 11 | explicit TaggerClass(const Napi::CallbackInfo &info); 12 | 13 | /** 14 | * Destructor 15 | */ 16 | ~TaggerClass() 17 | { 18 | delete tagger; 19 | } 20 | 21 | private: 22 | static Napi::FunctionReference constructor; 23 | 24 | Napi::Value Open(const Napi::CallbackInfo &info); 25 | Napi::Value Close(const Napi::CallbackInfo &info); 26 | Napi::Value Tag(const Napi::CallbackInfo &info); 27 | Napi::Value GetLabels(const Napi::CallbackInfo &info); 28 | 29 | CRFSuite::Tagger *tagger; 30 | }; 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /src/trainer_class.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "trainer_class.h" 3 | 4 | Napi::FunctionReference TrainerClass::constructor; 5 | 6 | NodeTrainer::NodeTrainer(bool debug) : CRFSuite::Trainer() 7 | { 8 | this->debug = debug; 9 | } 10 | 11 | void NodeTrainer::message(const std::string &msg) 12 | { 13 | if ( this->debug == true ) std::cout << msg; 14 | } 15 | 16 | // TrainerClass::TrainerClass(const Napi::CallbackInfo &info) 17 | // : Napi::ObjectWrap(info), trainer(NULL) 18 | // { 19 | // Napi::HandleScope scope(info.Env()); 20 | // } 21 | 22 | Napi::Object TrainerClass::Init(Napi::Env env, Napi::Object exports) 23 | { 24 | Napi::HandleScope scope(env); 25 | Napi::Function func = DefineClass(env, "TrainerClass", 26 | {InstanceMethod("init", &TrainerClass::InitTrainer), 27 | InstanceMethod("get_params", &TrainerClass::GetParams), 28 | InstanceMethod("set_params", &TrainerClass::SetParams), 29 | InstanceMethod("append", &TrainerClass::Append), 30 | InstanceMethod("train", &TrainerClass::Train) 31 | 32 | }); 33 | 34 | constructor = Napi::Persistent(func); 35 | constructor.SuppressDestruct(); 36 | 37 | exports.Set("Trainer", func); 38 | return exports; 39 | } 40 | 41 | TrainerClass::TrainerClass(const Napi::CallbackInfo &info) 42 | : Napi::ObjectWrap(info) 43 | { 44 | Napi::Env env = info.Env(); 45 | Napi::HandleScope scope(env); 46 | 47 | bool debug = false; 48 | if ( info.Length() == 1 ){ 49 | Napi::Object options = info[0].As(); 50 | if ( options.Has("debug") ){ 51 | debug = options.Get("debug").ToBoolean().Value(); 52 | } 53 | } 54 | 55 | this->trainer = new NodeTrainer(debug); 56 | this->trainer->select("lbfgs", "crf1d"); 57 | } 58 | 59 | Napi::Value TrainerClass::InitTrainer(const Napi::CallbackInfo &info) 60 | { 61 | // Isolate* isolate = args.GetIsolate(); 62 | // TrainerClass* obj = ObjectWrap::Unwrap(args.Holder()); 63 | // obj->trainer = new NodeTrainer(); 64 | // args.GetReturnValue().Set(v8::Boolean::New(isolate, obj->trainer->select("lbfgs", "crf1d"))); 65 | return Napi::Boolean::New(info.Env(), true); 66 | } 67 | 68 | Napi::Value TrainerClass::GetParams(const Napi::CallbackInfo &info) 69 | { 70 | CRFSuite::StringList params = this->trainer->params(); 71 | 72 | // Create a new empty object. 73 | Napi::Object obj = Napi::Object::New(info.Env()); 74 | 75 | for (size_t i = 0; i < params.size(); i++) 76 | { 77 | obj.Set(params[i], this->trainer->get(params[i])); 78 | } 79 | 80 | return obj; 81 | } 82 | 83 | void TrainerClass::SetParams(const Napi::CallbackInfo &info) 84 | { 85 | if (info.Length() == 0 || !info[0].IsObject()) 86 | { 87 | Napi::TypeError::New(info.Env(), "Options argument is missing or invalid").ThrowAsJavaScriptException(); 88 | } 89 | 90 | Napi::Object params = info[0].As(); 91 | Napi::Array property_names = params.GetPropertyNames(); 92 | 93 | for (size_t i = 0; i < property_names.Length(); ++i) 94 | { 95 | Napi::Value key = property_names.Get(i); 96 | Napi::Value value = params.Get(key); 97 | 98 | if (key.IsString()) 99 | { 100 | this->trainer->set(key.As().Utf8Value(), value.ToString().Utf8Value()); 101 | } 102 | else 103 | { 104 | Napi::TypeError::New(info.Env(), "Invalid parameter name").ThrowAsJavaScriptException(); 105 | } 106 | } 107 | } 108 | 109 | void TrainerClass::Append(const Napi::CallbackInfo &info) 110 | { 111 | if (info.Length() < 2) 112 | { 113 | Napi::TypeError::New(info.Env(), "Invalid number of arguments").ThrowAsJavaScriptException(); 114 | } 115 | 116 | if (!info[0].IsArray()) 117 | { 118 | Napi::TypeError::New(info.Env(), "xseq (training data) argument must be an array of arrays").ThrowAsJavaScriptException(); 119 | } 120 | 121 | if (!info[1].IsArray()) 122 | { 123 | Napi::TypeError::New(info.Env(), "yseq (labels) argument must be an array").ThrowAsJavaScriptException(); 124 | } 125 | 126 | Napi::Array xseq = info[0].As(); // Local::Cast(args[0]); 127 | Napi::Array yseq = info[1].As(); // Local::Cast(args[1]); 128 | 129 | if (xseq.Length() != yseq.Length()) 130 | { 131 | Napi::TypeError::New(info.Env(), "xseq and yseq must be of same size").ThrowAsJavaScriptException(); 132 | } 133 | 134 | CRFSuite::ItemSequence items; 135 | CRFSuite::StringList labels; 136 | 137 | for (size_t i = 0; i < xseq.Length(); ++i) 138 | { 139 | Napi::Value val = xseq.Get(i); 140 | if (!val.IsArray()) 141 | { 142 | Napi::TypeError::New(info.Env(), "xseq (training data) argument must be an array of arrays").ThrowAsJavaScriptException(); 143 | } 144 | 145 | Napi::Array xxseq = val.As(); 146 | 147 | CRFSuite::Item item; 148 | item.empty(); 149 | 150 | for (size_t j = 0; j < xxseq.Length(); ++j) 151 | { 152 | Napi::String attrName = xxseq.Get(j).ToString(); 153 | item.push_back(CRFSuite::Attribute(attrName.Utf8Value())); 154 | } 155 | items.push_back(item); 156 | } 157 | 158 | for (size_t i = 0; i < yseq.Length(); ++i) 159 | { 160 | Napi::String attrName = yseq.Get(i).ToString(); 161 | labels.push_back(attrName.Utf8Value()); 162 | } 163 | 164 | try 165 | { 166 | this->trainer->append(items, labels, 0); 167 | } 168 | catch (std::invalid_argument &e) 169 | { 170 | Napi::TypeError::New(info.Env(), "Invalid arguments").ThrowAsJavaScriptException(); 171 | } 172 | catch (std::runtime_error &e) 173 | { 174 | Napi::TypeError::New(info.Env(), "Out of memory").ThrowAsJavaScriptException(); 175 | } 176 | } 177 | 178 | Napi::Value TrainerClass::Train(const Napi::CallbackInfo &info) 179 | { 180 | if (info.Length() < 1 || !info[0].IsString()) 181 | { 182 | Napi::TypeError::New(info.Env(), "Path to model file is missing or invalid").ThrowAsJavaScriptException(); 183 | } 184 | 185 | Napi::String path = info[0].As(); 186 | int32_t status = this->trainer->train(path.Utf8Value(), -1); 187 | 188 | return Napi::Number::New(info.Env(), status); 189 | } 190 | -------------------------------------------------------------------------------- /src/trainer_class.h: -------------------------------------------------------------------------------- 1 | #ifndef _TRAINER_CLASS_H_ 2 | #define _TRAINER_CLASS_H_ 3 | 4 | #include 5 | #include 6 | 7 | class NodeTrainer : public CRFSuite::Trainer 8 | { 9 | public: 10 | NodeTrainer(bool debug); 11 | virtual void message(const std::string &msg); 12 | private: 13 | bool debug = false; 14 | }; 15 | 16 | class TrainerClass : public Napi::ObjectWrap 17 | { 18 | public: 19 | static Napi::Object Init(Napi::Env env, Napi::Object exports); 20 | explicit TrainerClass(const Napi::CallbackInfo &info); 21 | 22 | /** 23 | * Destructor 24 | */ 25 | ~TrainerClass() 26 | { 27 | if (trainer) 28 | delete trainer; 29 | } 30 | 31 | private: 32 | static Napi::FunctionReference constructor; 33 | NodeTrainer *trainer; 34 | 35 | Napi::Value InitTrainer(const Napi::CallbackInfo &info); 36 | Napi::Value GetParams(const Napi::CallbackInfo &info); 37 | void SetParams(const Napi::CallbackInfo &info); 38 | void Append(const Napi::CallbackInfo &info); 39 | Napi::Value Train(const Napi::CallbackInfo &info); 40 | }; 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /tea.yaml: -------------------------------------------------------------------------------- 1 | # https://tea.xyz/what-is-this-file 2 | --- 3 | version: 1.0.0 4 | codeOwners: 5 | - '0x452244cFD2293a8a9270bCc725eFc6924663B1B6' 6 | quorum: 1 7 | -------------------------------------------------------------------------------- /test/models/model_9.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vunb/node-crfsuite/c5724c15fa50bf2321874ab3ef58bd6ef7471806/test/models/model_9.bin -------------------------------------------------------------------------------- /test/models/wordsent_crf_v1.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vunb/node-crfsuite/c5724c15fa50bf2321874ab3ef58bd6ef7471806/test/models/wordsent_crf_v1.model -------------------------------------------------------------------------------- /test/specs/tagger.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | const test = require('tape'); 3 | const path = require('path'); 4 | const crfsuite = require('../..'); 5 | const tagger = new crfsuite.Tagger(); 6 | 7 | const input = [['Chào', 'bias', 'word.lower=chào', 'word.isupper=False', 'word.istitle=True', 'word.isdigit=False', 'BOS', '2_word.in_dictionary=1', '-2_word:BOS', '-3_word:BOS', '3_word.in_dictionary=0', 'BOS', '+1:word.lower=mừng', '+2:word.lower=các'], ['mừng', 'bias', 'word.lower=mừng', 'word.isupper=False', 'word.istitle=False', 'word.isdigit=False', '-1:word.lower=chào', '-1:word.istitle=True', '-1:word.isupper=False', '2_word.in_dictionary=0', '-2_word.in_dictionary=1', '-3_word:BOS', '3_word.in_dictionary=0', 'BOS', '+1:word.lower=các', '+2:word.lower=bạn'], ['các', 'bias', 'word.lower=các', 'word.isupper=False', 'word.istitle=False', 'word.isdigit=False', '-1:word.lower=mừng', '-1:word.istitle=False', '-1:word.isupper=False', '2_word.in_dictionary=0', '-2_word.in_dictionary=0', '-3_word.in_dictionary=0', '3_word.in_dictionary=0', '-2:word.lower=chào', '+1:word.lower=bạn', '+2:word.lower=trẻ'], ['bạn', 'bias', 'word.lower=bạn', 'word.isupper=False', 'word.istitle=False', 'word.isdigit=False', '-1:word.lower=các', '-1:word.istitle=False', '-1:word.isupper=False', '2_word.in_dictionary=0', '-2_word.in_dictionary=0', '-3_word.in_dictionary=0', '3_word.in_dictionary=0', '-2:word.lower=mừng', '+1:word.lower=trẻ', '+2:word.lower=tới'], ['trẻ', 'bias', 'word.lower=trẻ', 'word.isupper=False', 'word.istitle=False', 'word.isdigit=False', '-1:word.lower=bạn', '-1:word.istitle=False', '-1:word.isupper=False', '2_word.in_dictionary=0', '-2_word.in_dictionary=0', '-3_word.in_dictionary=0', '3_word.in_dictionary=0', '-2:word.lower=các', '+1:word.lower=tới', '+2:word.lower=thành'], ['tới', 'bias', 'word.lower=tới', 'word.isupper=False', 'word.istitle=False', 'word.isdigit=False', '-1:word.lower=trẻ', '-1:word.istitle=False', '-1:word.isupper=False', '2_word.in_dictionary=0', '-2_word.in_dictionary=0', '-3_word.in_dictionary=0', '3_word.in_dictionary=0', '-2:word.lower=bạn', '+1:word.lower=thành', '+2:word.lower=phố'], ['thành', 'bias', 'word.lower=thành', 'word.isupper=False', 'word.istitle=False', 'word.isdigit=False', '-1:word.lower=tới', '-1:word.istitle=False', '-1:word.isupper=False', '2_word.in_dictionary=1', '-2_word.in_dictionary=0', '-3_word.in_dictionary=0', '3_word.in_dictionary=0', '-2:word.lower=trẻ', '+1:word.lower=phố', '+2:word.lower=hà'], ['phố', 'bias', 'word.lower=phố', 'word.isupper=False', 'word.istitle=False', 'word.isdigit=False', '-1:word.lower=thành', '-1:word.istitle=False', '-1:word.isupper=False', '2_word.in_dictionary=0', '-2_word.in_dictionary=1', '-3_word.in_dictionary=0', '3_word.in_dictionary=0', '-2:word.lower=tới', '+1:word.lower=hà', '+2:word.lower=nội'], ['Hà', 'bias', 'word.lower=hà', 'word.isupper=False', 'word.istitle=True', 'word.isdigit=False', '-1:word.lower=phố', '-1:word.istitle=False', '-1:word.isupper=False', '2_word.in_dictionary=0', '-2_word.in_dictionary=0', '-3_word.in_dictionary=0', '3_word:EOS', '-2:word.lower=thành', '+1:word.lower=nội', 'EOS'], ['Nội', 'bias', 'word.lower=nội', 'word.isupper=False', 'word.istitle=True', 'word.isdigit=False', '-1:word.lower=hà', '-1:word.istitle=True', '-1:word.isupper=False', '2_word:EOS', '-2_word.in_dictionary=0', '-3_word.in_dictionary=0', '3_word:EOS', '-2:word.lower=phố', 'EOS', 'EOS']] 8 | 9 | test('crfsuite tagger', function (t) { 10 | t.plan(2) 11 | 12 | let model_path = path.resolve(path.join(__dirname, '../models/wordsent_crf_v1.model')) 13 | console.log('File model path: ' + model_path) 14 | let is_opened = tagger.open(model_path) 15 | 16 | t.ok(is_opened, 'File model is opened') 17 | 18 | let tags = tagger.tag(input) 19 | console.log('Tags: ', tags) 20 | 21 | t.deepEqual(tags, ['B_W', 'I_W', 'B_W', 'B_W', 'B_W', 'B_W', 'B_W', 'I_W', 'B_W', 'I_W'], `['Chào mừng', 'các', 'bạn', 'trẻ', 'tới', 'thành phố', 'Hà Nội']`) 22 | }) 23 | -------------------------------------------------------------------------------- /test/specs/trainer.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | const fs = require('fs'); 3 | const path = require('path'); 4 | const test = require('tape'); 5 | const crfsuite = require('../..'); 6 | 7 | const trainer = new crfsuite.Trainer({ 8 | debug: false, 9 | }); 10 | 11 | 12 | test('crfsuite trainer', function (t) { 13 | t.plan(2) 14 | 15 | let model_filename = path.resolve('./model.crfsuite') 16 | 17 | t.notOk(fs.existsSync(model_filename), 'model file must not exist before training: ' + model_filename) 18 | 19 | let xseq = [['walk'], ['walk', 'shop'], ['clean', 'shop']] 20 | let yseq = ['sunny', 'sunny', 'rainy'] 21 | 22 | // submit training data to the trainer 23 | trainer.append(xseq, yseq) 24 | trainer.train(model_filename) 25 | 26 | t.ok(fs.existsSync(model_filename), 'model file must exist after training') 27 | }) 28 | -------------------------------------------------------------------------------- /test/start.js: -------------------------------------------------------------------------------- 1 | var path = require('path'); 2 | var dir = '../test/specs/'; 3 | 4 | [ 5 | 'trainer', 6 | 'tagger' 7 | ].forEach((script) => { 8 | require(path.join(dir, script)); 9 | }); 10 | --------------------------------------------------------------------------------