├── dist └── .gitkeep ├── lib └── .gitkeep ├── .travis.yml ├── CHANGELOG.md ├── .npmignore ├── .gitignore ├── test ├── browser.spec.js └── node.spec.js ├── .eslintrc ├── LICENSE ├── README.md ├── package.json ├── src └── index.js └── .babelrc /dist/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - 6 4 | 5 | script: 6 | - npm run build 7 | 8 | after_script: 9 | - ./node_modules/.bin/jest --coverage && cat ./coverage/lcov.info | ./node_modules/coveralls/bin/coveralls.js && rm -rf ./coverage -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 2 | ## [1.1.0](https://github.com/hexenq/kuroshiro-analyzer-kuromoji/compare/1.0.0...1.1.0) (2018-08-05) 3 | 4 | ### Build 5 | 6 | * modify the name of umd file 7 | 8 | ### Miscellaneous 9 | 10 | * Update README.md 11 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | 6 | # Runtime data 7 | pids 8 | *.pid 9 | *.seed 10 | 11 | # Dependency directory 12 | node_modules 13 | bower_components 14 | components 15 | 16 | # Workspace 17 | *.sublime-workspace 18 | .idea 19 | .vscode 20 | 21 | # Coverage directory used by tools like istanbul 22 | coverage 23 | 24 | # Others 25 | .DS_Store 26 | package-lock.json 27 | *.map 28 | docs 29 | tmp 30 | demo 31 | **/.gitkeep -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | 6 | # Runtime data 7 | pids 8 | *.pid 9 | *.seed 10 | 11 | # Dependency directory 12 | bower_components 13 | node_modules 14 | 15 | # Workspace 16 | *.sublime-workspace 17 | .idea/ 18 | .vscode 19 | /demo/bower_components 20 | 21 | # Coverage directory used by tools like istanbul 22 | coverage 23 | 24 | # Generated files 25 | dist/* 26 | lib/* 27 | 28 | # Others 29 | .DS_Store 30 | package-lock.json 31 | *.map 32 | docs 33 | tmp 34 | !**/.gitkeep -------------------------------------------------------------------------------- /test/browser.spec.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @jest-environment jsdom 3 | */ 4 | 5 | import Analyzer from "../src"; 6 | 7 | describe("kuroshiro-analyzer-kuromoji Browser Test", () => { 8 | const EXAMPLE_TEXT = "すもももももも"; 9 | 10 | let analyzer; 11 | 12 | beforeAll(async () => { 13 | analyzer = new Analyzer(); 14 | await analyzer.init(); 15 | }); 16 | it("Parse Sentence", (done) => { 17 | const ori = EXAMPLE_TEXT; 18 | analyzer.parse(ori) 19 | .then((result) => { 20 | // console.debug(result); 21 | expect(result).toBeInstanceOf(Array); 22 | expect(result).toHaveLength(4); 23 | done(); 24 | }) 25 | .catch((err) => { 26 | done(err); 27 | }); 28 | }); 29 | }); 30 | -------------------------------------------------------------------------------- /.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "parser": "babel-eslint", 3 | "rules": { 4 | "max-len": 0, 5 | "no-plusplus": 0, 6 | "comma-dangle": ["error", "never"], 7 | "brace-style": ["error", "stroustrup"], 8 | "no-console": 0, 9 | "padded-blocks": 0, 10 | "indent": ["error", 4, {"SwitchCase": 1}], 11 | "spaced-comment": 1, 12 | "quotes": ["error", "double", { "allowTemplateLiterals": true }], 13 | "import/prefer-default-export": "off", 14 | "consistent-return": 0, 15 | "no-useless-escape": 0, 16 | "no-underscore-dangle": 0, 17 | "no-unused-vars": 0, 18 | "no-param-reassign": 0, 19 | "no-case-declarations": 0, 20 | "prefer-destructuring": 0, 21 | "func-names": 0, 22 | "camelcase": 0, 23 | "no-cond-assign": 0 24 | }, 25 | "extends": "airbnb-base", 26 | "env": { 27 | "es6": true, 28 | "node": true, 29 | "browser": true, 30 | "jest": true 31 | } 32 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015-2018 Hexen Qi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # kuroshiro-analyzer-kuromoji 2 | 3 | [![Build Status](https://travis-ci.com/hexenq/kuroshiro-analyzer-kuromoji.svg?branch=master)](https://travis-ci.org/hexenq/kuroshiro-analyzer-kuromoji) 4 | [![Coverage Status](https://coveralls.io/repos/github/hexenq/kuroshiro-analyzer-kuromoji/badge.svg?branch=master)](https://coveralls.io/github/hexenq/kuroshiro-analyzer-kuromoji?branch=master) 5 | [![npm version](https://badge.fury.io/js/kuroshiro-analyzer-kuromoji.svg)](http://badge.fury.io/js/kuroshiro-analyzer-kuromoji) 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 |
Packagekuroshiro-analyzer-kuromoji
DescriptionKuromoji morphological analyzer for kuroshiro.
CompatibilityNode✓ (>=6)
Browser
26 | 27 | ## Install 28 | ```sh 29 | $ npm install kuroshiro-analyzer-kuromoji 30 | ``` 31 | *For legacy frontend workflows, you could include `dist/kuroshiro-analyzer-kuromoji.min.js` in your page and the exported name is `KuromojiAnalyzer`. (you may first build it from source with `npm run build` after `npm install`)* 32 | 33 | ## Usage with kuroshiro 34 | ### Configure analyzer 35 | This analyzer utilizes [kuromoji.js](https://github.com/takuyaa/kuromoji.js). 36 | 37 | You could specify the path of your dictionary files with `dictPath` param. 38 | 39 | ```js 40 | import KuromojiAnalyzer from "kuroshiro-analyzer-kuromoji"; 41 | 42 | const analyzer = new KuromojiAnalyzer(); 43 | 44 | await kuroshiro.init(analyzer); 45 | ``` 46 | 47 | ### Initialization Parameters 48 | __Example:__ 49 | ```js 50 | const analyzer = new KuromojiAnalyzer({ 51 | dictPath: "url/to/dictionary_files" 52 | }); 53 | ``` 54 | - `dictPath`: *Optional* Path of the dictionary files -------------------------------------------------------------------------------- /test/node.spec.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @jest-environment node 3 | */ 4 | 5 | import Analyzer from "../src"; 6 | 7 | describe("kuroshiro-analyzer-kuromoji Node Test", () => { 8 | const EXAMPLE_TEXT = "すもももももも"; 9 | 10 | let analyzer; 11 | 12 | it("Initialization", async (done) => { 13 | analyzer = new Analyzer({ 14 | dictPath: "node_modules/kuromoji/dict/" 15 | }); 16 | await analyzer.init(); 17 | done(); 18 | }); 19 | 20 | it("Repeated Initialization", async (done) => { 21 | analyzer = new Analyzer({ 22 | dictPath: "node_modules/kuromoji/dict/" 23 | }); 24 | try { 25 | await analyzer.init(); 26 | await analyzer.init(); 27 | done("SHOULD NOT BE HERE"); 28 | } 29 | catch (err) { 30 | done(); 31 | } 32 | }); 33 | 34 | it("Kuromoji Build Failed", async (done) => { 35 | analyzer = new Analyzer({ 36 | dictPath: "node_modules/foo/bar" 37 | }); 38 | try { 39 | await analyzer.init(); 40 | done("SHOULD NOT BE HERE"); 41 | } 42 | catch (err) { 43 | done(); 44 | } 45 | }); 46 | 47 | it("Parse Sentence", async (done) => { 48 | analyzer = new Analyzer(); 49 | await analyzer.init(); 50 | 51 | const ori = EXAMPLE_TEXT; 52 | analyzer.parse(ori) 53 | .then((result) => { 54 | // console.debug(result); 55 | expect(result).toBeInstanceOf(Array); 56 | expect(result).toHaveLength(4); 57 | done(); 58 | }) 59 | .catch((err) => { 60 | done(err); 61 | }); 62 | }); 63 | 64 | it("Parse Null", async (done) => { 65 | analyzer = new Analyzer(); 66 | await analyzer.init(); 67 | 68 | analyzer.parse() 69 | .then((result) => { 70 | // console.debug(result); 71 | expect(result).toBeInstanceOf(Array); 72 | expect(result).toHaveLength(0); 73 | done(); 74 | }) 75 | .catch((err) => { 76 | done(err); 77 | }); 78 | }); 79 | 80 | it("Parse Blank Sentence", async (done) => { 81 | analyzer = new Analyzer(); 82 | await analyzer.init(); 83 | 84 | const ori = ""; 85 | analyzer.parse(ori) 86 | .then((result) => { 87 | // console.debug(result); 88 | expect(result).toBeInstanceOf(Array); 89 | expect(result).toHaveLength(0); 90 | done(); 91 | }) 92 | .catch((err) => { 93 | done(err); 94 | }); 95 | }); 96 | }); 97 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "kuroshiro-analyzer-kuromoji", 3 | "version": "1.1.0", 4 | "description": "kuromoji morphological analyzer for kuroshiro", 5 | "main": "lib/index.js", 6 | "module": "src/index.js", 7 | "scripts": { 8 | "lint": "eslint src test", 9 | "lint:fix": "eslint --fix src test --ext .js", 10 | "jest": "jest --verbose", 11 | "jest:watch": "npm run jest -- --watch", 12 | "jest:coverage": "jest --verbose --coverage", 13 | "test": "npm run lint && npm run jest", 14 | "clean": "rimraf lib dist", 15 | "build": "npm run build:cjs && npm run build:umd && npm run build:umd:min", 16 | "build:cjs": "cross-env BABEL_ENV=cjs babel src --out-dir lib", 17 | "build:umd": "cross-env BABEL_ENV=umd NODE_ENV=development browserify src/index.js -s KuromojiAnalyzer -o dist/kuroshiro-analyzer-kuromoji.js -t [ babelify ]", 18 | "build:umd:min": "cross-env BABEL_ENV=umd NODE_ENV=production browserify src/index.js -s KuromojiAnalyzer -g uglifyify -o dist/kuroshiro-analyzer-kuromoji.min.js -t [ babelify ]" 19 | }, 20 | "keywords": [ 21 | "kuroshiro", 22 | "morphology", 23 | "analyzer", 24 | "kuromoji", 25 | "japanese", 26 | "language", 27 | "kanji", 28 | "hiragana", 29 | "katakana", 30 | "romaji", 31 | "kana" 32 | ], 33 | "repository": { 34 | "type": "git", 35 | "url": "https://github.com/hexenq/kuroshiro-analyzer-kuromoji.git" 36 | }, 37 | "author": "Hexen Qi", 38 | "license": "MIT", 39 | "bugs": { 40 | "url": "https://github.com/hexenq/kuroshiro-analyzer-kuromoji/issues" 41 | }, 42 | "homepage": "https://github.com/hexenq/kuroshiro-analyzer-kuromoji", 43 | "dependencies": { 44 | "kuromoji": "^0.1.1" 45 | }, 46 | "devDependencies": { 47 | "babel-cli": "^6.26.0", 48 | "babel-core": "^6.26.0", 49 | "babel-eslint": "^8.2.3", 50 | "babel-jest": "^22.4.4", 51 | "babel-plugin-add-module-exports": "^0.2.1", 52 | "babel-plugin-transform-runtime": "^6.23.0", 53 | "babel-preset-env": "^1.6.1", 54 | "babelify": "^8.0.0", 55 | "browserify": "^16.2.2", 56 | "coveralls": "^3.0.1", 57 | "cross-env": "^5.1.5", 58 | "eslint": "^4.19.1", 59 | "eslint-config-airbnb-base": "^12.1.0", 60 | "eslint-plugin-import": "^2.12.0", 61 | "jest": "^22.4.4", 62 | "uglifyify": "^5.0.0" 63 | }, 64 | "browserify": { 65 | "transform": [ 66 | [ 67 | "babelify" 68 | ] 69 | ] 70 | }, 71 | "jest": { 72 | "transform": { 73 | "^.+\\.jsx?$": "babel-jest" 74 | }, 75 | "testRegex": "(/test/.*|(\\.|/)(test|spec))\\.jsx?$", 76 | "testURL": "http://localhost" 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/index.js: -------------------------------------------------------------------------------- 1 | import kuromoji from "kuromoji"; 2 | 3 | // Check where we are 4 | let isNode = false; 5 | const isBrowser = (typeof window !== "undefined"); 6 | if (!isBrowser && typeof module !== "undefined" && module.exports) { 7 | isNode = true; 8 | } 9 | 10 | /** 11 | * Kuromoji based morphological analyzer for kuroshiro 12 | */ 13 | class Analyzer { 14 | /** 15 | * Constructor 16 | * @param {Object} [options] JSON object which have key-value pairs settings 17 | * @param {string} [options.dictPath] Path of the dictionary files 18 | */ 19 | constructor({ dictPath } = {}) { 20 | this._analyzer = null; 21 | 22 | if (!dictPath) { 23 | if (isNode) this._dictPath = require.resolve("kuromoji").replace(/src(?!.*src).*/, "dict/"); 24 | else this._dictPath = "node_modules/kuromoji/dict/"; 25 | } 26 | else { 27 | this._dictPath = dictPath; 28 | } 29 | } 30 | 31 | /** 32 | * Initialize the analyzer 33 | * @returns {Promise} Promise object represents the result of initialization 34 | */ 35 | init() { 36 | return new Promise((resolve, reject) => { 37 | const self = this; 38 | if (this._analyzer == null) { 39 | kuromoji.builder({ dicPath: this._dictPath }).build((err, newAnalyzer) => { 40 | if (err) { 41 | return reject(err); 42 | } 43 | self._analyzer = newAnalyzer; 44 | resolve(); 45 | }); 46 | } 47 | else { 48 | reject(new Error("This analyzer has already been initialized.")); 49 | } 50 | }); 51 | } 52 | 53 | /** 54 | * Parse the given string 55 | * @param {string} str input string 56 | * @returns {Promise} Promise object represents the result of parsing 57 | * @example The result of parsing 58 | * [{ 59 | * "surface_form": "黒白", // 表層形 60 | * "pos": "名詞", // 品詞 (part of speech) 61 | * "pos_detail_1": "一般", // 品詞細分類1 62 | * "pos_detail_2": "*", // 品詞細分類2 63 | * "pos_detail_3": "*", // 品詞細分類3 64 | * "conjugated_type": "*", // 活用型 65 | * "conjugated_form": "*", // 活用形 66 | * "basic_form": "黒白", // 基本形 67 | * "reading": "クロシロ", // 読み 68 | * "pronunciation": "クロシロ", // 発音 69 | * "verbose": { // Other properties 70 | * "word_id": 413560, 71 | * "word_type": "KNOWN", 72 | * "word_position": 1 73 | * } 74 | * }] 75 | */ 76 | parse(str = "") { 77 | return new Promise((resolve, reject) => { 78 | if (str.trim() === "") return resolve([]); 79 | const result = this._analyzer.tokenize(str); 80 | for (let i = 0; i < result.length; i++) { 81 | result[i].verbose = {}; 82 | result[i].verbose.word_id = result[i].word_id; 83 | result[i].verbose.word_type = result[i].word_type; 84 | result[i].verbose.word_position = result[i].word_position; 85 | delete result[i].word_id; 86 | delete result[i].word_type; 87 | delete result[i].word_position; 88 | } 89 | resolve(result); 90 | }); 91 | } 92 | } 93 | 94 | export default Analyzer; 95 | -------------------------------------------------------------------------------- /.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "presets": [ 3 | "env" 4 | ], 5 | "plugins": [ 6 | "add-module-exports", 7 | [ 8 | "transform-runtime", 9 | { 10 | "helpers": false, 11 | "polyfill": false, 12 | "regenerator": true, 13 | "moduleName": "babel-runtime" 14 | } 15 | ] 16 | ], 17 | "env": { 18 | "cjs": { 19 | "presets": [ 20 | [ 21 | "env", 22 | { 23 | "targets": { 24 | "node": "6" 25 | } 26 | } 27 | ] 28 | ], 29 | "plugins": [ 30 | "add-module-exports", 31 | [ 32 | "transform-runtime", 33 | { 34 | "helpers": false, 35 | "polyfill": false, 36 | "regenerator": true, 37 | "moduleName": "babel-runtime" 38 | } 39 | ] 40 | ] 41 | }, 42 | "es": { 43 | "presets": [ 44 | [ 45 | "env", 46 | { 47 | "targets": { 48 | "browsers": [ 49 | ">0.25%", 50 | "not ie 11", 51 | "not op_mini all" 52 | ] 53 | }, 54 | "modules": false 55 | } 56 | ] 57 | ], 58 | "plugins": [ 59 | [ 60 | "transform-runtime", 61 | { 62 | "helpers": false, 63 | "polyfill": false, 64 | "regenerator": true, 65 | "moduleName": "babel-runtime" 66 | } 67 | ] 68 | ] 69 | }, 70 | "umd": { 71 | "presets": [ 72 | [ 73 | "env", 74 | { 75 | "targets": { 76 | "browsers": [ 77 | ">0.25%", 78 | "not ie 11", 79 | "not op_mini all" 80 | ] 81 | } 82 | } 83 | ] 84 | ], 85 | "plugins": [ 86 | "add-module-exports", 87 | [ 88 | "transform-runtime", 89 | { 90 | "helpers": false, 91 | "polyfill": false, 92 | "regenerator": true, 93 | "moduleName": "babel-runtime" 94 | } 95 | ] 96 | ] 97 | }, 98 | "test": { 99 | "presets": [ 100 | [ 101 | "env", 102 | { 103 | "targets": { 104 | "node": "6" 105 | } 106 | } 107 | ] 108 | ], 109 | "plugins": [ 110 | "add-module-exports", 111 | [ 112 | "transform-runtime", 113 | { 114 | "helpers": false, 115 | "polyfill": false, 116 | "regenerator": true, 117 | "moduleName": "babel-runtime" 118 | } 119 | ] 120 | ] 121 | } 122 | } 123 | } --------------------------------------------------------------------------------