├── .prettierignore ├── .npmrc ├── index.js ├── .gitignore ├── .editorconfig ├── .github └── workflows │ ├── bb.yml │ └── main.yml ├── tsconfig.json ├── license ├── package.json ├── test.js ├── lib └── index.js └── readme.md /.prettierignore: -------------------------------------------------------------------------------- 1 | coverage/ 2 | *.md 3 | -------------------------------------------------------------------------------- /.npmrc: -------------------------------------------------------------------------------- 1 | ignore-scripts=true 2 | package-lock=false 3 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | export {default} from './lib/index.js' 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | coverage/ 2 | node_modules/ 3 | .DS_Store 4 | *.d.ts 5 | *.log 6 | yarn.lock 7 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 2 6 | end_of_line = lf 7 | charset = utf-8 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | -------------------------------------------------------------------------------- /.github/workflows/bb.yml: -------------------------------------------------------------------------------- 1 | name: bb 2 | on: 3 | issues: 4 | types: [opened, reopened, edited, closed, labeled, unlabeled] 5 | pull_request_target: 6 | types: [opened, reopened, edited, closed, labeled, unlabeled] 7 | jobs: 8 | main: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: unifiedjs/beep-boop-beta@main 12 | with: 13 | repo-token: ${{secrets.GITHUB_TOKEN}} 14 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "checkJs": true, 4 | "customConditions": ["development"], 5 | "declaration": true, 6 | "emitDeclarationOnly": true, 7 | "exactOptionalPropertyTypes": true, 8 | "lib": ["es2022"], 9 | "module": "node16", 10 | "strict": true, 11 | "target": "es2022" 12 | }, 13 | "exclude": ["coverage/", "node_modules/"], 14 | "include": ["**/*.js"] 15 | } 16 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: main 2 | on: 3 | - pull_request 4 | - push 5 | jobs: 6 | main: 7 | name: ${{matrix.node}} 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v3 11 | - uses: actions/setup-node@v3 12 | with: 13 | node-version: ${{matrix.node}} 14 | - run: npm install 15 | - run: npm test 16 | - uses: codecov/codecov-action@v3 17 | strategy: 18 | matrix: 19 | node: 20 | - lts/gallium 21 | - node 22 | -------------------------------------------------------------------------------- /license: -------------------------------------------------------------------------------- 1 | (The MIT License) 2 | 3 | Copyright (c) 2016 Titus Wormer 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | 'Software'), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "retext-repeated-words", 3 | "version": "5.0.0", 4 | "description": "retext plugin to check for for repeated words", 5 | "license": "MIT", 6 | "keywords": [ 7 | "unified", 8 | "retext", 9 | "retext-plugin", 10 | "plugin", 11 | "repeat", 12 | "repeated", 13 | "repetition", 14 | "words" 15 | ], 16 | "repository": "retextjs/retext-repeated-words", 17 | "bugs": "https://github.com/retextjs/retext-repeated-words/issues", 18 | "funding": { 19 | "type": "opencollective", 20 | "url": "https://opencollective.com/unified" 21 | }, 22 | "author": "Titus Wormer (https://wooorm.com)", 23 | "contributors": [ 24 | "Titus Wormer (https://wooorm.com)", 25 | "Sylvan Swierkosz " 26 | ], 27 | "sideEffects": false, 28 | "type": "module", 29 | "exports": "./index.js", 30 | "files": [ 31 | "lib/", 32 | "index.d.ts", 33 | "index.js" 34 | ], 35 | "dependencies": { 36 | "@types/nlcst": "^2.0.0", 37 | "nlcst-to-string": "^4.0.0", 38 | "unist-util-position": "^5.0.0", 39 | "unist-util-visit": "^5.0.0", 40 | "vfile": "^6.0.0" 41 | }, 42 | "devDependencies": { 43 | "@types/node": "^20.0.0", 44 | "c8": "^8.0.0", 45 | "prettier": "^3.0.0", 46 | "remark-cli": "^11.0.0", 47 | "remark-preset-wooorm": "^9.0.0", 48 | "retext": "^9.0.0", 49 | "type-coverage": "^2.0.0", 50 | "typescript": "^5.0.0", 51 | "xo": "^0.56.0" 52 | }, 53 | "scripts": { 54 | "build": "tsc --build --clean && tsc --build && type-coverage", 55 | "format": "remark . --frail --output --quiet && prettier . --log-level warn --write && xo --fix", 56 | "prepack": "npm run build && npm run format", 57 | "test": "npm run build && npm run format && npm run test-coverage", 58 | "test-api": "node --conditions development test.js", 59 | "test-coverage": "c8 --100 --check-coverage --reporter lcov npm run test-api" 60 | }, 61 | "prettier": { 62 | "bracketSpacing": false, 63 | "singleQuote": true, 64 | "semi": false, 65 | "tabWidth": 2, 66 | "trailingComma": "none", 67 | "useTabs": false 68 | }, 69 | "remarkConfig": { 70 | "plugins": [ 71 | "remark-preset-wooorm" 72 | ] 73 | }, 74 | "typeCoverage": { 75 | "atLeast": 100, 76 | "detail": true, 77 | "ignoreCatch": true, 78 | "strict": true 79 | }, 80 | "xo": { 81 | "prettier": true, 82 | "rules": { 83 | "unicorn/prefer-string-replace-all": "off" 84 | } 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /test.js: -------------------------------------------------------------------------------- 1 | import assert from 'node:assert/strict' 2 | import test from 'node:test' 3 | import {retext} from 'retext' 4 | import retextRepeatedWords from 'retext-repeated-words' 5 | 6 | test('retextRepeatedWords', async function (t) { 7 | await t.test('should expose the public api', async function () { 8 | assert.deepEqual( 9 | Object.keys(await import('retext-repeated-words')).sort(), 10 | ['default'] 11 | ) 12 | }) 13 | 14 | await t.test('should emit a message w/ metadata', async function () { 15 | const file = await retext() 16 | .use(retextRepeatedWords) 17 | .process('Well, it it doesn’t have to be.') 18 | 19 | assert.deepEqual( 20 | JSON.parse(JSON.stringify({...file.messages[0], ancestors: []})), 21 | { 22 | ancestors: [], 23 | column: 7, 24 | fatal: false, 25 | message: 'Unexpected repeated `it`, remove one occurrence', 26 | line: 1, 27 | name: '1:7-1:12', 28 | place: { 29 | start: {line: 1, column: 7, offset: 6}, 30 | end: {line: 1, column: 12, offset: 11} 31 | }, 32 | reason: 'Unexpected repeated `it`, remove one occurrence', 33 | ruleId: 'it', 34 | source: 'retext-repeated-words', 35 | actual: 'it it', 36 | expected: ['it'], 37 | url: 'https://github.com/retextjs/retext-repeated-words#readme' 38 | } 39 | ) 40 | }) 41 | 42 | await t.test('should catch repeated words', async function () { 43 | const file = await retext() 44 | .use(retextRepeatedWords) 45 | .process( 46 | 'Well, it it doesn’t have to to be. Like a fish in the\nthe sea.' 47 | ) 48 | 49 | assert.deepEqual(file.messages.map(String), [ 50 | '1:7-1:12: Unexpected repeated `it`, remove one occurrence', 51 | '1:26-1:31: Unexpected repeated `to`, remove one occurrence', 52 | '1:51-2:4: Unexpected repeated `the`, remove one occurrence' 53 | ]) 54 | }) 55 | 56 | await t.test('should catch repeated words when uppercase', async function () { 57 | const file = await retext() 58 | .use(retextRepeatedWords) 59 | .process('LIKE A FISH IN THE\nTHE SEA.') 60 | 61 | assert.deepEqual(file.messages.map(String), [ 62 | '1:16-2:4: Unexpected repeated `THE`, remove one occurrence' 63 | ]) 64 | }) 65 | 66 | await t.test('should ignore sentence cased words', async function () { 67 | const file = await retext() 68 | .use(retextRepeatedWords) 69 | .process('Duran Duran is awesome.') 70 | 71 | assert.deepEqual(file.messages, []) 72 | }) 73 | 74 | await t.test('should ignore initialisms', async function () { 75 | const file = await retext() 76 | .use(retextRepeatedWords) 77 | .process('D. D. will pop up with.') 78 | 79 | assert.deepEqual(file.messages, []) 80 | }) 81 | 82 | await t.test('should ignore differently cases words', async function () { 83 | const file = await retext().use(retextRepeatedWords).process('DURAN Duran') 84 | 85 | assert.deepEqual(file.messages, []) 86 | }) 87 | 88 | await t.test('should ignore some valid repetitions', async function () { 89 | const file = await retext() 90 | .use(retextRepeatedWords) 91 | .process('the most heartening exhibition they had had since') 92 | 93 | assert.deepEqual(file.messages, []) 94 | }) 95 | 96 | await t.test('should ignore some valid repetitions (mau)', async function () { 97 | const file = await retext() 98 | .use(retextRepeatedWords) 99 | .process( 100 | 'The Mau Mau Uprising, also known as the Mau Mau Rebellion, Mau Mau Revolt, or Kenya Emergency, was a military conflict that took place in British Kenya' 101 | ) 102 | 103 | assert.deepEqual(file.messages, []) 104 | }) 105 | }) 106 | -------------------------------------------------------------------------------- /lib/index.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @typedef {import('nlcst').Root} Root 3 | * @typedef {import('vfile').VFile} VFile 4 | */ 5 | 6 | /** 7 | * @typedef Info 8 | * Info on a word. 9 | * @property {number} index 10 | * Index. 11 | * @property {string} value 12 | * Value (lowercase). 13 | */ 14 | 15 | import {toString} from 'nlcst-to-string' 16 | import {pointEnd, pointStart} from 'unist-util-position' 17 | import {SKIP, visit} from 'unist-util-visit' 18 | 19 | // List of words that can legally occur twice. 20 | const list = new Set([ 21 | 'had', 22 | 'that', 23 | 'can', 24 | 'blah', 25 | 'beep', 26 | 'yadda', 27 | 'sapiens', 28 | 'tse', 29 | 'mau' 30 | ]) 31 | 32 | /** 33 | * Check for for repeated words. 34 | * 35 | * ###### Notes 36 | * 37 | * * Doesn’t warn for certain words which *do* occur twice (`the best exhibition 38 | * they had had since`) 39 | * * Doesn’t warn for initialisms (`D. D. will pop up with…`) 40 | * * Doesn’t warn for capitalised words (`Duran Duran…`) 41 | * 42 | * @returns 43 | * Transform. 44 | */ 45 | export default function retextRepeatedWords() { 46 | /** 47 | * Transform. 48 | * 49 | * @param {Root} tree 50 | * Tree. 51 | * @param {VFile} file 52 | * File. 53 | * @returns {undefined} 54 | * Nothing. 55 | */ 56 | return function (tree, file) { 57 | visit(tree, 'SentenceNode', function (parent) { 58 | let index = -1 59 | /** @type {Info | undefined} */ 60 | let previous 61 | /** @type {Info | undefined} */ 62 | let current 63 | 64 | while (++index < parent.children.length) { 65 | const child = parent.children[index] 66 | 67 | if (child.type === 'WordNode') { 68 | const value = toString(child) 69 | 70 | current = {index, value: value.toLowerCase()} 71 | 72 | if (previous && previous.value === current.value && !ignore(value)) { 73 | const start = pointStart(parent.children[previous.index]) 74 | const end = pointEnd(child) 75 | const message = file.message( 76 | 'Unexpected repeated `' + value + '`, remove one occurrence', 77 | { 78 | ancestors: [parent, child], 79 | /* c8 ignore next -- verbose to test */ 80 | place: start && end ? {start, end} : undefined, 81 | ruleId: current.value.replace(/\W+/g, '-'), 82 | source: 'retext-repeated-words' 83 | } 84 | ) 85 | 86 | message.actual = toString( 87 | parent.children.slice(previous.index, index + 1) 88 | ) 89 | message.expected = [value] 90 | message.url = 91 | 'https://github.com/retextjs/retext-repeated-words#readme' 92 | } 93 | } else if (child.type === 'WhiteSpaceNode') { 94 | previous = current 95 | current = undefined 96 | } else { 97 | previous = undefined 98 | current = undefined 99 | } 100 | } 101 | 102 | return SKIP 103 | }) 104 | } 105 | } 106 | 107 | /** 108 | * Check if `value`, a word which occurs twice, should be ignored. 109 | * 110 | * @param {string} value 111 | * Word to check. 112 | * @returns {boolean} 113 | * Whether to ignore `value`. 114 | */ 115 | function ignore(value) { 116 | // …the most heartening exhibition they had had since… 117 | if (list.has(value.toLowerCase())) { 118 | return true 119 | } 120 | 121 | const head = value.charAt(0) 122 | 123 | if (head === head.toUpperCase()) { 124 | // D. D. will pop up with… 125 | if (value.length === 2 && value.charAt(1) === '.') { 126 | return true 127 | } 128 | 129 | const tail = value.slice(1) 130 | 131 | // Duran Duran… Bella Bella… 132 | if (tail === tail.toLowerCase()) { 133 | return true 134 | } 135 | } 136 | 137 | return false 138 | } 139 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # retext-repeated-words 2 | 3 | [![Build][build-badge]][build] 4 | [![Coverage][coverage-badge]][coverage] 5 | [![Downloads][downloads-badge]][downloads] 6 | [![Size][size-badge]][size] 7 | [![Sponsors][sponsors-badge]][collective] 8 | [![Backers][backers-badge]][collective] 9 | [![Chat][chat-badge]][chat] 10 | 11 | **[retext][]** plugin to check for ~~`for`~~ repeated words. 12 | 13 | ## Contents 14 | 15 | * [What is this?](#what-is-this) 16 | * [When should I use this?](#when-should-i-use-this) 17 | * [Install](#install) 18 | * [Use](#use) 19 | * [API](#api) 20 | * [`unified().use(retextRepeatedWords)`](#unifieduseretextrepeatedwords) 21 | * [Messages](#messages) 22 | * [Types](#types) 23 | * [Compatibility](#compatibility) 24 | * [Related](#related) 25 | * [Contribute](#contribute) 26 | * [License](#license) 27 | 28 | ## What is this? 29 | 30 | This package is a [unified][] ([retext][]) plugin to check for repeated words. 31 | For example, `like like` this. 32 | 33 | ## When should I use this? 34 | 35 | You can opt-into this plugin when you’re dealing with content that might contain 36 | grammar mistakes, and have authors that can fix that content. 37 | 38 | ## Install 39 | 40 | This package is [ESM only][esm]. 41 | In Node.js (version 16+), install with [npm][]: 42 | 43 | ```sh 44 | npm install retext-repeated-words 45 | ``` 46 | 47 | In Deno with [`esm.sh`][esmsh]: 48 | 49 | ```js 50 | import retextRepeatedWords from 'https://esm.sh/retext-repeated-words@5' 51 | ``` 52 | 53 | In browsers with [`esm.sh`][esmsh]: 54 | 55 | ```html 56 | 59 | ``` 60 | 61 | ## Use 62 | 63 | Say our document `example.txt` contains: 64 | 65 | ```txt 66 | Well, it it doesn’t have to to be. Like a fish in the 67 | the sea. 68 | ``` 69 | 70 | …and our module `example.js` contains: 71 | 72 | ```js 73 | import {read} from 'to-vfile' 74 | import {reporter} from 'vfile-reporter' 75 | import {unified} from 'unified' 76 | import retextEnglish from 'retext-english' 77 | import retextStringify from 'retext-stringify' 78 | import retextRepeatedWords from 'retext-repeated-words' 79 | 80 | const file = await unified() 81 | .use(retextEnglish) 82 | .use(retextRepeatedWords) 83 | .use(retextStringify) 84 | .process(await read('example.txt')) 85 | 86 | console.error(reporter(file)) 87 | ``` 88 | 89 | …then running `node example.js` yields: 90 | 91 | ```txt 92 | example.txt 93 | 1:7-1:12 warning Unexpected repeated `it`, remove one occurrence it retext-repeated-words 94 | 1:26-1:31 warning Unexpected repeated `to`, remove one occurrence to retext-repeated-words 95 | 1:51-2:4 warning Unexpected repeated `the`, remove one occurrence the retext-repeated-words 96 | 97 | ⚠ 3 warnings 98 | ``` 99 | 100 | ## API 101 | 102 | This package exports no identifiers. 103 | The default export is [`retextRepeatedWords`][api-retext-repeated-words]. 104 | 105 | ### `unified().use(retextRepeatedWords)` 106 | 107 | Check for repeated words. 108 | 109 | ###### Parameters 110 | 111 | There are no parameters. 112 | 113 | ###### Returns 114 | 115 | Transform ([`Transformer`][unified-transformer]). 116 | 117 | ###### Notes 118 | 119 | * Doesn’t warn for certain words which *do* occur twice (`the best exhibition 120 | they had had since`) 121 | * Doesn’t warn for initialisms (`D. D. will pop up with…`) 122 | * Doesn’t warn for capitalised words (`Duran Duran…`) 123 | 124 | ## Messages 125 | 126 | Each message is emitted as a [`VFileMessage`][vfile-message] on `file`, with 127 | `source` set to `'retext-repeated-words'`, `ruleId` to the normalized word, 128 | `actual` to both words, and `expected` to suggestions. 129 | 130 | ## Types 131 | 132 | This package is fully typed with [TypeScript][]. 133 | It exports no additional types. 134 | 135 | ## Compatibility 136 | 137 | Projects maintained by the unified collective are compatible with maintained 138 | versions of Node.js. 139 | 140 | When we cut a new major release, we drop support for unmaintained versions of 141 | Node. 142 | This means we try to keep the current release line, `retext-repeated-words@^5`, 143 | compatible with Node.js 16. 144 | 145 | ## Related 146 | 147 | * [`retext-indefinite-article`](https://github.com/retextjs/retext-indefinite-article) 148 | — check if indefinite articles are used correctly 149 | * [`retext-redundant-acronyms`](https://github.com/retextjs/retext-redundant-acronyms) 150 | — check for redundant acronyms 151 | 152 | ## Contribute 153 | 154 | See [`contributing.md`][contributing] in [`retextjs/.github`][health] for ways 155 | to get started. 156 | See [`support.md`][support] for ways to get help. 157 | 158 | This project has a [code of conduct][coc]. 159 | By interacting with this repository, organization, or community you agree to 160 | abide by its terms. 161 | 162 | ## License 163 | 164 | [MIT][license] © [Titus Wormer][author] 165 | 166 | 167 | 168 | [build-badge]: https://github.com/retextjs/retext-repeated-words/workflows/main/badge.svg 169 | 170 | [build]: https://github.com/retextjs/retext-repeated-words/actions 171 | 172 | [coverage-badge]: https://img.shields.io/codecov/c/github/retextjs/retext-repeated-words.svg 173 | 174 | [coverage]: https://codecov.io/github/retextjs/retext-repeated-words 175 | 176 | [downloads-badge]: https://img.shields.io/npm/dm/retext-repeated-words.svg 177 | 178 | [downloads]: https://www.npmjs.com/package/retext-repeated-words 179 | 180 | [size-badge]: https://img.shields.io/bundlejs/size/retext-repeated-words 181 | 182 | [size]: https://bundlejs.com/?q=retext-repeated-words 183 | 184 | [sponsors-badge]: https://opencollective.com/unified/sponsors/badge.svg 185 | 186 | [backers-badge]: https://opencollective.com/unified/backers/badge.svg 187 | 188 | [collective]: https://opencollective.com/unified 189 | 190 | [chat-badge]: https://img.shields.io/badge/chat-discussions-success.svg 191 | 192 | [chat]: https://github.com/retextjs/retext/discussions 193 | 194 | [npm]: https://docs.npmjs.com/cli/install 195 | 196 | [esm]: https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c 197 | 198 | [esmsh]: https://esm.sh 199 | 200 | [typescript]: https://www.typescriptlang.org 201 | 202 | [health]: https://github.com/retextjs/.github 203 | 204 | [contributing]: https://github.com/retextjs/.github/blob/main/contributing.md 205 | 206 | [support]: https://github.com/retextjs/.github/blob/main/support.md 207 | 208 | [coc]: https://github.com/retextjs/.github/blob/main/code-of-conduct.md 209 | 210 | [license]: license 211 | 212 | [author]: https://wooorm.com 213 | 214 | [retext]: https://github.com/retextjs/retext 215 | 216 | [unified]: https://github.com/unifiedjs/unified 217 | 218 | [unified-transformer]: https://github.com/unifiedjs/unified#transformer 219 | 220 | [vfile-message]: https://github.com/vfile/vfile-message 221 | 222 | [api-retext-repeated-words]: #unifieduseretextrepeatedwords 223 | --------------------------------------------------------------------------------