├── .editorconfig ├── .github ├── ISSUE_TEMPLATE │ ├── ---bug-report.md │ └── --feature-request.md ├── PULL_REQUEST_TEMPLATE │ └── pull_request_template.md └── workflows │ ├── lint.yml │ └── test.yml ├── CODE_OF_CONDUCT.md ├── LICENSE ├── README.ja.md ├── README.md ├── example.ts ├── mod.ts ├── src ├── MeCab.ts └── types.ts └── test ├── MeCab.test.ts └── dummyMeCab.ts /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | end_of_line = lf 6 | insert_final_newline = true 7 | indent_style = space 8 | indent_size = 2 9 | trim_trailing_whitespace = true 10 | 11 | [*.md] 12 | trim_trailing_whitespace = false 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/---bug-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F41B Bug report" 3 | about: Create a report to help us improve / バグを報告して改善に役立てる 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## 💡 Summary 11 | 12 | 13 | 14 | ## 🙂 Expected Behavior 15 | 16 | 17 | 18 | ## ☹️ Actual Behavior 19 | 20 | 21 | 22 | ## 📸 Screenshots 23 | 24 | 25 | 26 | ## 📝 Steps to Reproduce 27 | 28 | 29 | 30 | 1. 31 | 2. 32 | 3. 33 | 34 | ## 📌 Environment 35 | 36 | 37 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/--feature-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "✨ Feature request" 3 | about: Suggest an idea for this project / このプロジェクトのための新機能などを提案する 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## 💡 Summary 11 | 12 | 13 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## 💡 Summary 2 | 3 | 4 | 5 | ## ✅ Linked Issues 6 | 7 | 8 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - name: Setup repo 15 | uses: actions/checkout@v2 16 | 17 | - name: Setup Deno 18 | uses: denoland/setup-deno@v1 19 | with: 20 | deno-version: v1.x 21 | 22 | - name: Verify formatting 23 | run: deno fmt --check 24 | 25 | - name: Run linter 26 | run: deno lint 27 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | jobs: 10 | test: 11 | runs-on: ${{ matrix.os }} 12 | 13 | strategy: 14 | matrix: 15 | os: [windows-latest, macos-latest, ubuntu-latest] 16 | 17 | steps: 18 | - name: Setup repo 19 | uses: actions/checkout@v2 20 | 21 | - name: Setup Deno 22 | uses: denoland/setup-deno@v1 23 | with: 24 | deno-version: v1.x 25 | 26 | - name: Run test 27 | run: deno test --allow-run --coverage=coverage 28 | 29 | - name: Create lcov format coverage 30 | if: matrix.os != 'windows-latest' 31 | run: deno coverage ./coverage --lcov > coverage.lcov 32 | 33 | - name: Upload coverage 34 | if: matrix.os != 'windows-latest' 35 | uses: paambaati/codeclimate-action@v2.7.5 36 | env: 37 | CC_TEST_REPORTER_ID: ${{secrets.CC_TEST_REPORTER_ID}} 38 | with: 39 | coverageCommand: cat coverage.lcov 40 | coverageLocations: | 41 | ${{github.workspace}}/coverage.lcov:lcov 42 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | deno-mecab uses 2 | [Rust's Code of Conduct](https://www.rust-lang.org/policies/code-of-conduct). In 3 | the forums, every community member must follow the rules and values expressed 4 | there. Please email [sleeping.ifrea@gmail.com](mailto:sleeping.ifrea@gmail.com) 5 | to report any instance of misconduct. 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Seraimu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.ja.md: -------------------------------------------------------------------------------- 1 | # deno_mecab 2 | 3 | [![Test](https://img.shields.io/github/actions/workflow/status/sera1mu/deno_mecab/test.yml?branch=main&label=Test&logo=github&logoColor=silver)](https://github.com/sera1mu/deno-mecab/actions/workflows/test.yml) 4 | [![Lint](https://img.shields.io/github/actions/workflow/status/sera1mu/deno_mecab/lint.yml?branch=main&label=lint&logo=github&logoColor=silver)](https://github.com/sera1mu/deno-mecab/actions/workflows/check-code.yml) 5 | [![Test Coverage](https://img.shields.io/codeclimate/coverage/sera1mu/deno-mecab?logo=Code%20Climate)](https://codeclimate.com/github/sera1mu/deno-mecab/test_coverage) 6 | [![Maintainability](https://img.shields.io/codeclimate/maintainability/sera1mu/deno-mecab?logo=Code%20Climate)](https://codeclimate.com/github/sera1mu/deno-mecab/maintainability) 7 | [![license](https://img.shields.io/github/license/sera1mu/deno-mecab)](https://github.com/sera1mu/deno-mecab/blob/main/LICENSE) 8 | 9 | ### [English](https://github.com/sera1mu/deno-mecab/blob/main/README.md) | 日本語 10 | 11 | deno_mecabは、MeCabを用いた非同期の日本語形態素解析モジュールです。 12 | 13 | ## Getting Started 14 | 15 | ### Requirements 16 | 17 | - [Deno](https://deno.land) 18 | - [MeCab](https://taku910.github.io/mecab/) 19 | - MeCab 辞書 20 | - [mecab-ipadic](https://github.com/taku910/mecab/tree/master/mecab-ipadic) 21 | - [mecab-jumandic](https://github.com/taku910/mecab/tree/master/mecab-jumandic) 22 | - [mecab-ipadic-neologd](https://github.com/neologd/mecab-ipadic-neologd) 23 | 24 | ### Example 25 | 26 | 簡単な例として、これを実行してみてください: 27 | 28 | ``` 29 | deno run --allow-run https://deno.land/x/deno_mecab/example.ts 30 | ``` 31 | 32 | 実行されるスクリプト: 33 | 34 | ```ts 35 | import MeCab from "https://deno.land/x/deno_mecab@v1.2.2/mod.ts"; 36 | 37 | const mecab = new MeCab(["mecab"]); 38 | 39 | const text = "JavaScriptはとても楽しいです。"; 40 | 41 | // Parse (形態素解析) 42 | console.log(await mecab.parse(text)); 43 | // [{surface: "JavaScript", feature: "名詞", featureDetails: [ "固有名詞", "組織", "*" ], ... 44 | 45 | // Dump (ダンプ出力) 46 | console.log(await mecab.dump(text)); 47 | // [{nodeId: 0, surface: "BOS", feature: "BOS/EOS", featureDetails: [ "*", "*", "*" ], ... 48 | 49 | // Wakati (わかち書き) 50 | console.log(await mecab.wakati("JavaScriptはとても楽しいです。")); 51 | // [ "JavaScript", "は", "とても", "楽しい", "です", "。" ] 52 | 53 | // Yomi (読み付与) 54 | console.log(await mecab.yomi("日本語")); 55 | // ニホンゴ 56 | ``` 57 | 58 | ## Maintainer 59 | 60 | [@sera1mu](https://github.com/sera1mu) 61 | 62 | ## License 63 | 64 | MIT © 2021 Seraimu 65 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # deno_mecab 2 | 3 | [![Test](https://img.shields.io/github/actions/workflow/status/sera1mu/deno_mecab/test.yml?branch=main&label=Test&logo=github&logoColor=silver)](https://github.com/sera1mu/deno-mecab/actions/workflows/test.yml) 4 | [![Lint](https://img.shields.io/github/actions/workflow/status/sera1mu/deno_mecab/lint.yml?branch=main&label=lint&logo=github&logoColor=silver)](https://github.com/sera1mu/deno-mecab/actions/workflows/check-code.yml) 5 | [![Test Coverage](https://img.shields.io/codeclimate/coverage/sera1mu/deno-mecab?logo=Code%20Climate)](https://codeclimate.com/github/sera1mu/deno-mecab/test_coverage) 6 | [![Maintainability](https://img.shields.io/codeclimate/maintainability/sera1mu/deno-mecab?logo=Code%20Climate)](https://codeclimate.com/github/sera1mu/deno-mecab/maintainability) 7 | [![license](https://img.shields.io/github/license/sera1mu/deno-mecab)](https://github.com/sera1mu/deno-mecab/blob/main/LICENSE) 8 | 9 | ### English | [日本語](https://github.com/sera1mu/deno-mecab/blob/main/README.ja.md) 10 | 11 | deno_mecab is an asynchronous Japanese morphological analysis module using 12 | MeCab. 13 | 14 | ## Getting Started 15 | 16 | ### Requirements 17 | 18 | - [Deno](https://deno.land) 19 | - [MeCab](https://taku910.github.io/mecab/) 20 | - MeCab Dictionary 21 | - [mecab-ipadic](https://github.com/taku910/mecab/tree/master/mecab-ipadic) 22 | - [mecab-jumandic](https://github.com/taku910/mecab/tree/master/mecab-jumandic) 23 | - [mecab-ipadic-neologd](https://github.com/neologd/mecab-ipadic-neologd) 24 | 25 | ### Example 26 | 27 | For a quick example, run this: 28 | 29 | ``` 30 | deno run --allow-run https://deno.land/x/deno_mecab/example.ts 31 | ``` 32 | 33 | The script to be execute: 34 | 35 | ```ts 36 | import MeCab from "https://deno.land/x/deno_mecab@v1.2.2/mod.ts"; 37 | 38 | const mecab = new MeCab(["mecab"]); 39 | 40 | const text = "JavaScriptはとても楽しいです。"; 41 | 42 | // Parse (形態素解析) 43 | console.log(await mecab.parse(text)); 44 | // [{surface: "JavaScript", feature: "名詞", featureDetails: [ "固有名詞", "組織", "*" ], ... 45 | 46 | // Dump (ダンプ出力) 47 | console.log(await mecab.dump(text)); 48 | // [{nodeId: 0, surface: "BOS", feature: "BOS/EOS", featureDetails: [ "*", "*", "*" ], ... 49 | 50 | // Wakati (わかち書き) 51 | console.log(await mecab.wakati("JavaScriptはとても楽しいです。")); 52 | // [ "JavaScript", "は", "とても", "楽しい", "です", "。" ] 53 | 54 | // Yomi (読み付与) 55 | console.log(await mecab.yomi("日本語")); 56 | // ニホンゴ 57 | ``` 58 | 59 | ## Maintainer 60 | 61 | [@sera1mu](https://github.com/sera1mu) 62 | 63 | ## License 64 | 65 | MIT © 2021 Seraimu 66 | -------------------------------------------------------------------------------- /example.ts: -------------------------------------------------------------------------------- 1 | import MeCab from "https://deno.land/x/deno_mecab@v1.2.2/mod.ts"; 2 | 3 | const mecab = new MeCab(["mecab"]); 4 | 5 | const text = "JavaScriptはとても楽しいです。"; 6 | 7 | // Parse (形態素解析) 8 | console.log(await mecab.parse(text)); 9 | 10 | // Dump (ダンプ出力) 11 | console.log(await mecab.dump(text)); 12 | 13 | // Wakati (わかち書き) 14 | console.log(await mecab.wakati("JavaScriptはとても楽しいです。")); 15 | 16 | // Yomi (読み付与) 17 | console.log(await mecab.yomi("日本語")); 18 | -------------------------------------------------------------------------------- /mod.ts: -------------------------------------------------------------------------------- 1 | export { default } from "./src/MeCab.ts"; 2 | export * from "./src/types.ts"; 3 | -------------------------------------------------------------------------------- /src/MeCab.ts: -------------------------------------------------------------------------------- 1 | import { MeCabOptions, ParsedDumpWord, ParsedWord } from "./types.ts"; 2 | 3 | /** 4 | * The wrapper of MeCab. 5 | * 6 | * Requires `allow-run` permission. 7 | */ 8 | export default class MeCab { 9 | private readonly cmd: string[]; 10 | private readonly options?: MeCabOptions; 11 | 12 | constructor(cmd: string[], options?: MeCabOptions) { 13 | this.cmd = cmd; 14 | this.options = options; 15 | } 16 | 17 | /** 18 | * Run MeCab and return stdout. 19 | * 20 | * Requires `allow-run` permission. 21 | */ 22 | private async runMeCab(text: string, cmdArgs?: string[]): Promise { 23 | const options: Deno.RunOptions = { 24 | cmd: cmdArgs ? this.cmd.concat(cmdArgs) : this.cmd, 25 | cwd: this.options?.cwd, 26 | env: this.options?.env, 27 | stdout: "piped", 28 | stdin: "piped", 29 | }; 30 | 31 | const process = Deno.run(options); 32 | 33 | // Write text to stdin 34 | await process.stdin?.write(new TextEncoder().encode(text)); 35 | process.stdin?.close(); 36 | 37 | const [{ code }, stdout] = await Promise.all([ 38 | process.status(), 39 | process.output(), 40 | ]); 41 | process.close(); 42 | 43 | if (code !== 0) { 44 | throw new Error( 45 | `MeCab exited with code ${code}. MeCab stdout: ${stdout}`, 46 | ); 47 | } 48 | 49 | return new TextDecoder().decode(stdout); 50 | } 51 | 52 | /** 53 | * Parse text. 54 | * 55 | * Requires `allow-run` permission. 56 | */ 57 | async parse(text: string): Promise { 58 | const result = await this.runMeCab(text); 59 | 60 | // Remove not needed symbol 61 | const splitedResult = result 62 | .replace(/\nEOS\n/, "") 63 | .replace(/\t/g, ",") 64 | .split("\n"); 65 | 66 | const parsedWords: ParsedWord[] = []; 67 | 68 | for (const line of splitedResult) { 69 | const splitedLine = line.split(","); 70 | const word: ParsedWord = { 71 | surface: splitedLine[0], 72 | feature: splitedLine[1], 73 | featureDetails: [splitedLine[2], splitedLine[3], splitedLine[4]], 74 | conjugationForms: [splitedLine[5], splitedLine[6]], 75 | originalForm: splitedLine[7], 76 | reading: splitedLine[8], 77 | pronunciation: splitedLine[9], 78 | }; 79 | parsedWords.push(word); 80 | } 81 | return parsedWords; 82 | } 83 | 84 | /** 85 | * Get a dump of text. 86 | * 87 | * Requires `allow-run` permission. 88 | */ 89 | async dump(text: string): Promise { 90 | const result = await this.runMeCab(text, ["-Odump"]); 91 | 92 | // Remove not needed symbol 93 | const splitedLines = result.replace(/\n$/, "").split("\n"); 94 | 95 | const parsedWords: ParsedDumpWord[] = []; 96 | 97 | for (const line of splitedLines) { 98 | const splitedLine = line.split(" "); 99 | const splitedLineFeature = splitedLine[2].split(","); 100 | const word: ParsedDumpWord = { 101 | nodeId: Number(splitedLine[0]), 102 | surface: splitedLine[1], 103 | feature: splitedLineFeature[0], 104 | featureDetails: [ 105 | splitedLineFeature[1], 106 | splitedLineFeature[2], 107 | splitedLineFeature[3], 108 | ], 109 | conjugationForms: [splitedLineFeature[4], splitedLineFeature[5]], 110 | originalForm: splitedLineFeature[6], 111 | reading: splitedLineFeature[7], 112 | pronunciation: splitedLineFeature[8], 113 | characterStartByte: Number(splitedLine[3]), 114 | characterEndByte: Number(splitedLine[4]), 115 | rcAttr: Number(splitedLine[5]), 116 | lcAttr: Number(splitedLine[6]), 117 | posId: Number(splitedLine[7]), 118 | characterType: Number(splitedLine[8]), 119 | status: Number(splitedLine[9]), 120 | isBest: Number(splitedLine[10]), 121 | alpha: Number(splitedLine[11]), 122 | beta: Number(splitedLine[12]), 123 | prob: Number(splitedLine[13]), 124 | cost: Number(splitedLine[14]), 125 | }; 126 | parsedWords.push(word); 127 | } 128 | 129 | return parsedWords; 130 | } 131 | 132 | /** 133 | * Split text into words. 134 | * 135 | * Requires `allow-run` permission. 136 | */ 137 | async wakati(text: string): Promise { 138 | const result = await this.runMeCab(text, ["-Owakati"]); 139 | 140 | const editedResult = result.split(" "); 141 | editedResult.pop(); 142 | 143 | return editedResult; 144 | } 145 | 146 | /** 147 | * Add reading to text. 148 | * 149 | * Requires `allow-run` permission. 150 | */ 151 | async yomi(text: string): Promise { 152 | const result = await this.runMeCab(text, ["-Oyomi"]); 153 | 154 | const editedResult = result.replace(/ \n/g, ""); 155 | return editedResult; 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /src/types.ts: -------------------------------------------------------------------------------- 1 | /** Advanced execution options of MeCab */ 2 | export interface MeCabOptions { 3 | cwd?: string; 4 | env?: { [key: string]: string }; 5 | } 6 | 7 | export interface ParsedWord { 8 | // 0 9 | surface: string; 10 | // 1 11 | feature: string; 12 | // 2..4 13 | featureDetails: string[]; 14 | // 5..6 15 | conjugationForms: string[]; 16 | // 7 17 | originalForm: string; 18 | // 8 19 | reading?: string; 20 | // 9 21 | pronunciation?: string; 22 | } 23 | 24 | export interface ParsedDumpWord extends ParsedWord { 25 | // 0 26 | nodeId: number; 27 | // surface: 1 28 | // feature: 2 29 | // 3 30 | characterStartByte: number; 31 | // 4 32 | characterEndByte: number; 33 | // 5 34 | rcAttr: number; 35 | // 6 36 | lcAttr: number; 37 | // 7 38 | posId: number; 39 | // 8 40 | characterType: number; 41 | // 9 42 | status: number; 43 | // 10 44 | isBest: number; 45 | // 11 46 | alpha: number; 47 | // 12 48 | beta: number; 49 | // 13 50 | prob: number; 51 | // 14 52 | cost: number; 53 | } 54 | -------------------------------------------------------------------------------- /test/MeCab.test.ts: -------------------------------------------------------------------------------- 1 | import MeCab from "../src/MeCab.ts"; 2 | import { 3 | assertEquals, 4 | assertStrictEquals, 5 | } from "https://deno.land/std@0.104.0/testing/asserts.ts"; 6 | import { 7 | dirname, 8 | fromFileUrl, 9 | join, 10 | } from "https://deno.land/std@0.104.0/path/mod.ts"; 11 | 12 | const testDir = dirname(fromFileUrl(import.meta.url)); 13 | const dummyMeCabPath = join(testDir, "dummyMeCab.ts"); 14 | const cmd = ["deno", "run", dummyMeCabPath]; 15 | 16 | Deno.test("MeCab: parse", async () => { 17 | const text = "あいうえお"; 18 | const mecab = new MeCab(cmd); 19 | const actual = await mecab.parse(text); 20 | const excepted = [{ 21 | surface: text, 22 | feature: "dummy", 23 | featureDetails: ["*", "*", "*"], 24 | conjugationForms: ["*", "*"], 25 | originalForm: "*", 26 | reading: "*", 27 | pronunciation: "*", 28 | }]; 29 | 30 | assertEquals(actual, excepted); 31 | }); 32 | 33 | Deno.test("MeCab: dump", async () => { 34 | const text = "あいうえお"; 35 | const mecab = new MeCab(cmd); 36 | const actual = await mecab.dump(text); 37 | const excepted = [{ 38 | nodeId: 0, 39 | surface: text, 40 | feature: "dummy", 41 | featureDetails: ["*", "*", "*"], 42 | conjugationForms: ["*", "*"], 43 | originalForm: "*", 44 | reading: "*", 45 | pronunciation: "*", 46 | characterStartByte: 0, 47 | characterEndByte: 0, 48 | rcAttr: 0, 49 | lcAttr: 0, 50 | posId: 0, 51 | characterType: 0, 52 | status: 0, 53 | isBest: 0, 54 | alpha: 0, 55 | beta: 0, 56 | prob: 0, 57 | cost: 1, 58 | }]; 59 | 60 | assertEquals(actual, excepted); 61 | }); 62 | 63 | Deno.test("MeCab: wakati", async () => { 64 | const text = "あ いうえ お"; 65 | const mecab = new MeCab(cmd); 66 | const actual = await mecab.wakati(text); 67 | const excepted = ["あ", "いうえ", "お"]; 68 | 69 | assertEquals(actual, excepted); 70 | }); 71 | 72 | Deno.test("MeCab: yomi", async () => { 73 | const text = "あいうえお"; 74 | const mecab = new MeCab(cmd); 75 | const actual = await mecab.yomi(text); 76 | const excepted = "あいうえお"; 77 | 78 | assertStrictEquals(actual, excepted); 79 | }); 80 | -------------------------------------------------------------------------------- /test/dummyMeCab.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env -S deno run 2 | 3 | const buf = new Uint8Array(1024); 4 | const n = await Deno.stdin.read(buf); 5 | const text = new TextDecoder().decode(buf.subarray(0, n)).trim(); 6 | 7 | if (Deno.args[0] === "-Owakati" || Deno.args[0] === "-Oyomi") { 8 | await Deno.stdout.write(new TextEncoder().encode(`${text} \n`)); 9 | } else if (Deno.args[0] === "-Odump") { 10 | const result = 11 | `0 ${text} dummy,*,*,*,*,*,*,*,* 0 0 0 0 0 0 0 0 0.000000 0.000000 0.000000 1`; 12 | await Deno.stdout.write(new TextEncoder().encode(result)); 13 | } else if (!(Deno.args[0])) { 14 | const result = `${text}\tdummy,*,*,*,*,*,*,*,*\nEOS\n`; 15 | await Deno.stdout.write(new TextEncoder().encode(result)); 16 | } else { 17 | console.log(`${Deno.args[0]} option is not exist.`); 18 | } 19 | --------------------------------------------------------------------------------