├── CHANGELOG.md ├── .github ├── workflows │ └── test.yml └── release.yml ├── .gitignore ├── LICENSE ├── package.json ├── README.md ├── test └── no-doubled-conjunctive-particle-ga-test.js └── src └── no-doubled-conjunctive-particle-ga.js /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [1.1.0] 4 | 5 | ### Added 6 | 7 | - add `separatorChars` option (@odanado) 8 | 9 | ### Changed 10 | 11 | - upgrade babel 7 12 | - remove espower-babel 13 | 14 | ## [1.0.0] 15 | 16 | - initial release 17 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | on: [push, pull_request] 3 | env: 4 | CI: true 5 | jobs: 6 | test: 7 | name: "Test on Node.js ${{ matrix.node-version }}" 8 | runs-on: ubuntu-latest 9 | strategy: 10 | matrix: 11 | node-version: [ 16, 18 ] 12 | steps: 13 | - name: checkout 14 | uses: actions/checkout@v2 15 | - name: setup Node.js ${{ matrix.node-version }} 16 | uses: actions/setup-node@v1 17 | with: 18 | node-version: ${{ matrix.node-version }} 19 | - name: Install 20 | run: npm install 21 | - name: Test 22 | run: npm test 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # from https://github.com/github/gitignore/blob/master/Node.gitignore 2 | 3 | # Logs 4 | logs 5 | *.log 6 | npm-debug.log* 7 | 8 | # Runtime data 9 | pids 10 | *.pid 11 | *.seed 12 | 13 | # Directory for instrumented libs generated by jscoverage/JSCover 14 | lib-cov 15 | 16 | # Coverage directory used by tools like istanbul 17 | coverage 18 | 19 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 20 | .grunt 21 | 22 | # node-waf configuration 23 | .lock-wscript 24 | 25 | # Compiled binary addons (http://nodejs.org/api/addons.html) 26 | build/Release 27 | 28 | # Dependency directories 29 | node_modules 30 | jspm_packages 31 | 32 | # Optional npm cache directory 33 | .npm 34 | 35 | # Optional REPL history 36 | .node_repl_history 37 | 38 | # Others 39 | lib 40 | -------------------------------------------------------------------------------- /.github/release.yml: -------------------------------------------------------------------------------- 1 | changelog: 2 | exclude: 3 | labels: 4 | - 'Type: Meta' 5 | - 'Type: Question' 6 | - 'Type: Release' 7 | 8 | categories: 9 | - title: Security Fixes 10 | labels: ['Type: Security'] 11 | - title: Breaking Changes 12 | labels: ['Type: Breaking Change'] 13 | - title: Features 14 | labels: ['Type: Feature'] 15 | - title: Bug Fixes 16 | labels: ['Type: Bug'] 17 | - title: Documentation 18 | labels: ['Type: Documentation'] 19 | - title: Refactoring 20 | labels: ['Type: Refactoring'] 21 | - title: Testing 22 | labels: ['Type: Testing'] 23 | - title: Maintenance 24 | labels: ['Type: Maintenance'] 25 | - title: CI 26 | labels: ['Type: CI'] 27 | - title: Dependency Updates 28 | labels: ['Type: Dependencies', "dependencies"] 29 | - title: Other Changes 30 | labels: ['*'] 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 takahashim 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "textlint-rule-no-doubled-conjunctive-particle-ga", 3 | "version": "3.0.0", 4 | "description": "textlint rule plugin to check duplicated conjunctive particle `ga` in a sentence.", 5 | "main": "lib/no-doubled-conjunctive-particle-ga.js", 6 | "files": [ 7 | "lib/", 8 | "src/" 9 | ], 10 | "dependencies": { 11 | "kuromojin": "^3.0.0", 12 | "sentence-splitter": "^5.0.0", 13 | "textlint-rule-helper": "^2.3.1", 14 | "textlint-util-to-string": "^3.3.4" 15 | }, 16 | "devDependencies": { 17 | "@textlint/types": "^13.4.1", 18 | "textlint-scripts": "^13.4.1" 19 | }, 20 | "scripts": { 21 | "build": "textlint-scripts build", 22 | "watch": "textlint-scripts build --watch", 23 | "prepublish": "npm run --if-present build", 24 | "test": "textlint-scripts test", 25 | "textlint": "textlint" 26 | }, 27 | "keywords": [ 28 | "textlint", 29 | "rule" 30 | ], 31 | "author": "takahashim ", 32 | "license": "MIT", 33 | "repository": { 34 | "type": "git", 35 | "url": "https://github.com/textlint-ja/textlint-rule-no-doubled-conjunctive-particle-ga.git" 36 | }, 37 | "bugs": { 38 | "url": "https://github.com/textlint-ja/textlint-rule-no-doubled-conjunctive-particle-ga/issues" 39 | }, 40 | "homepage": "https://github.com/textlint-ja/textlint-rule-no-doubled-conjunctive-particle-ga", 41 | "packageManager": "npm@9.2.0" 42 | } 43 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # textlint-rule-no-doubled-conjunctive-particle-ga 2 | 3 | This module is a textlint plugin to check duplicated conjunctive particle `ga(が)` in a sentence. 4 | 5 | 逆接の接続助詞「が」は、特に否定の意味ではなくても安易に使われてしまいがちです。これが同一文中に複数回出現していないかどうかをチェックするための[textlint](https://github.com/textlint/textlint "textlint")ルールです。 6 | 7 | 例 8 | 9 | > 今日は早朝から出発した**が**、定刻には間に合わなかった**が**、無事会場に到着した。 10 | 11 | "出発した**が**" と "間に合わなかった**が**" が 同じ接続助詞の「が」が連続しています。 12 | 13 | > キーワードが多く抽出された**が**、クラスタの数が10ということもある**が**、逆に欠点となるようなキーワードが表示されなかった。 14 | 15 | "抽出された**が**" と "こともある**が**" が 同じ接続助詞の「が」が連続しています。 16 | 17 | Note: In generally, using same conjunctive particles is not an issue. `ga` is special case. 18 | 19 | ## Installation 20 | 21 | npm install textlint-rule-no-doubled-conjunctive-particle-ga 22 | 23 | ### Dependencies 24 | 25 | - [azu/kuromojin](https://github.com/azu/kuromojin): a wrapper of [kuromoji.js](https://github.com/takuyaa/kuromoji.js "kuromoji.js") 26 | - [azu/sentence-splitter](https://github.com/azu/sentence-splitter) 27 | 28 | ## Usage 29 | 30 | Via `.textlintrc`(Recommended) 31 | 32 | ```json 33 | { 34 | "rules": { 35 | "no-doubled-conjunctive-particle-ga": true 36 | } 37 | } 38 | ``` 39 | 40 | Via CLI 41 | 42 | textlint --rule no-doubled-conjunctive-particle-ga sample.md 43 | 44 | ### Options 45 | 46 | There's no options for this plugin. 47 | 48 | ## Tests 49 | 50 | npm test 51 | 52 | ## Reference 53 | 54 | - [textlint](https://github.com/textlint/textlint) 55 | - [textlint-rule-no-doubled-joshi](https://github.com/azu/textlint-rule-no-doubled-joshi): this plugin is based on it 56 | - [中野智彦, 丸山広, 高嶋章雄, 中村太一「文章中の重複表現の指摘方法の提案」, 第73回全国大会講演論文集](https://ipsj.ixsq.nii.ac.jp/ej/?action=pages_view_main&active_action=repository_view_main_item_detail&item_id=108359&item_no=1&page_id=13&block_id=8) 57 | 58 | ## License 59 | 60 | MIT 61 | -------------------------------------------------------------------------------- /test/no-doubled-conjunctive-particle-ga-test.js: -------------------------------------------------------------------------------- 1 | import TextLintTester from "textlint-tester"; 2 | import rule from '../src/no-doubled-conjunctive-particle-ga'; 3 | 4 | const tester = new TextLintTester(); 5 | tester.run("no-doubled-conjunctive-particle-ga", rule, { 6 | valid: [ 7 | "この関数がエラーになるのは、関数名が正しくないためです。", 8 | "この文章が問題となっています。", 9 | "今日は早朝から出発したが、定刻には間に合わなかった。が、無事会場に到着した。", 10 | { 11 | text: "今日は早朝から出発したが,定刻には間に合わなかった.間に合わなかったが,無事会場に到着した.", 12 | options: { 13 | separatorChars: ["。", "?", "!", "?", "!", "."] 14 | } 15 | }, 16 | // 括弧の中の区切り文字判定 17 | // https://github.com/textlint-ja/textlint-rule-no-doubled-conjunctive-particle-ga/issues/19 18 | "今日は早朝から出発したが、定刻には間に合わなかった。定刻には間に合わなかったが、無事会場に到着した", 19 | "「今日は早朝から出発したが、定刻には間に合わなかった。定刻には間に合わなかったが、無事会場に到着した」" 20 | ], 21 | invalid: [ 22 | { 23 | text: "今日は早朝から出発したが、定刻には間に合わなかったが、無事会場に到着した。", 24 | errors: [ 25 | { 26 | message: `文中に逆接の接続助詞 "が" が二回以上使われています。`, 27 | // last match 28 | line: 1, 29 | column: 12 30 | } 31 | ] 32 | }, 33 | { 34 | text: "規模は小さいが、収益は多いが、実益は小さい。", 35 | errors: [ 36 | { 37 | message: `文中に逆接の接続助詞 "が" が二回以上使われています。`, 38 | index: 6 39 | } 40 | ] 41 | }, 42 | { 43 | // https://ipsj.ixsq.nii.ac.jp/ej/index.php?action=pages_view_main&active_action=repository_action_common_download&item_id=108359&item_no=1&attribute_id=1&file_no=1&page_id=13&block_id=8 から引用 44 | text: "キーワードが多く抽出されたが、クラスタの数が10ということもあるが、逆に欠点となるようなキーワードが表示されなかった。", 45 | errors: [ 46 | { 47 | message: `文中に逆接の接続助詞 "が" が二回以上使われています。`, 48 | index: 13 49 | } 50 | ] 51 | }, 52 | // option test 53 | { 54 | text: "今日は早朝から出発したが,定刻には間に合わなかった.間に合わなかったが,無事会場に到着した.", 55 | options: { 56 | separatorChars: ["。"] // . を除外 57 | }, 58 | errors: [ 59 | { 60 | message: `文中に逆接の接続助詞 "が" が二回以上使われています。`, 61 | line: 1, 62 | column: 12 63 | } 64 | ] 65 | }, 66 | { 67 | text: "こんにちは。\n今日は早朝から出発したが、定刻には間に合わなかったが、無事会場に到着した。", 68 | errors: [ 69 | { 70 | message: `文中に逆接の接続助詞 "が" が二回以上使われています。`, 71 | line: 2, 72 | column: 12 73 | } 74 | ] 75 | }, 76 | { 77 | text: "\n今日は早朝から出発したが、定刻には間に合わなかったが、無事会場に到着した。", 78 | errors: [ 79 | { 80 | message: `文中に逆接の接続助詞 "が" が二回以上使われています。`, 81 | line: 2, 82 | column: 12 83 | } 84 | ] 85 | }, 86 | // range 87 | { 88 | text: "\n今日は早朝から出発したが、定刻には間に合わなかったが、無事会場に到着した。", 89 | errors: [ 90 | { 91 | message: `文中に逆接の接続助詞 "が" が二回以上使われています。`, 92 | range: [12, 13] 93 | } 94 | ] 95 | }, 96 | ] 97 | }); 98 | -------------------------------------------------------------------------------- /src/no-doubled-conjunctive-particle-ga.js: -------------------------------------------------------------------------------- 1 | // LICENSE : MIT 2 | "use strict"; 3 | import { RuleHelper } from "textlint-rule-helper"; 4 | import { getTokenizer } from "kuromojin"; 5 | import { splitAST, SentenceSplitterSyntax as SentenceSyntax } from "sentence-splitter"; 6 | import { StringSource } from "textlint-util-to-string"; 7 | 8 | 9 | /** 10 | * tokensから、区切り文字で分割したtokensの配列を返す 11 | * 結果は [[token, token], [token, token]] のような配列になる 12 | * @param {*[]} tokens 13 | * @param {string[]} separatorChars 14 | * @returns {*[][]} 15 | */ 16 | const splitTokensBySeparatorChars = (tokens, separatorChars) => { 17 | const results = []; 18 | let current = []; 19 | tokens.forEach(token => { 20 | if (separatorChars.includes(token.surface_form)) { 21 | results.push(current); 22 | current = []; 23 | } else { 24 | current.push(token); 25 | } 26 | }); 27 | if (current.length > 0) { 28 | results.push(current); 29 | } 30 | return results; 31 | } 32 | 33 | const defaultOptions = { 34 | separatorChars: [ 35 | ".", // period 36 | ".", // (ja) zenkaku-period 37 | "。", // (ja) 句点 38 | "?", // question mark 39 | "!", // exclamation mark 40 | "?", // (ja) zenkaku question mark 41 | "!" // (ja) zenkaku exclamation mark 42 | ] 43 | }; 44 | 45 | /* 46 | 1. Paragraph Node -> text 47 | 2. text -> sentences 48 | 3. tokenize sentence 49 | 4. report error if found word that match the rule. 50 | 51 | TODO: need abstraction 52 | */ 53 | /** 54 | * @param {import("@textlint/types").TextlintRuleContext} context 55 | * @param {*} options 56 | * @returns {import("@textlint/types").TextlintRuleReportHandler} 57 | */ 58 | export default function (context, options = {}) { 59 | const separatorChars = options.separatorChars ?? defaultOptions.separatorChars; 60 | const helper = new RuleHelper(context); 61 | const { Syntax, report, getSource, RuleError, locator } = context; 62 | return { 63 | [Syntax.Paragraph](node) { 64 | if (helper.isChildNode(node, [Syntax.Link, Syntax.Image, Syntax.BlockQuote, Syntax.Emphasis])) { 65 | return; 66 | } 67 | const isSentenceNode = node => { 68 | return node.type === SentenceSyntax.Sentence; 69 | }; 70 | const sentences = splitAST(node, { 71 | SeparatorParser: { 72 | separatorCharacters: separatorChars 73 | } 74 | }).children.filter(isSentenceNode); 75 | const source = new StringSource(node); 76 | return getTokenizer().then(tokenizer => { 77 | const checkSentence = (sentence) => { 78 | const sentenceText = getSource(sentence); 79 | const tokens = tokenizer.tokenizeForSentence(sentenceText); 80 | const isConjunctiveParticleGaToken = token => { 81 | return token.pos_detail_1 === "接続助詞" && token.surface_form === "が"; 82 | }; 83 | // カッコの中はセンテンスとして分解されないため、 84 | // 区切り文字で分割したtokensの配列を取得 85 | const tokensBySentence = splitTokensBySeparatorChars(tokens, separatorChars); 86 | tokensBySentence.forEach(tokens => { 87 | const conjunctiveParticleGaTokens = tokens.filter(isConjunctiveParticleGaToken); 88 | if (conjunctiveParticleGaTokens.length <= 1) { 89 | return; 90 | } 91 | const current = conjunctiveParticleGaTokens[0]; 92 | const sentenceIndex = source.originalIndexFromPosition(sentence.loc.start) || 0; 93 | const currentIndex = sentenceIndex + (current.word_position - 1); 94 | report(node, new RuleError(`文中に逆接の接続助詞 "が" が二回以上使われています。`, { 95 | padding: locator.range([currentIndex, currentIndex + 1]) 96 | })); 97 | }); 98 | } 99 | sentences.forEach(checkSentence); 100 | }); 101 | } 102 | } 103 | }; 104 | --------------------------------------------------------------------------------