├── .eslintrc.json ├── .github └── workflows │ ├── ci.yml │ ├── docs.yml │ └── npm-publish.yml ├── .gitignore ├── .prettierignore ├── .prettierrc.json ├── LICENSE ├── README.md ├── README.zh-CN.md ├── bin └── index.js ├── deploy.sh ├── docs ├── .vitepress │ └── config.ts ├── PlaygroundCore.vue ├── contribute.md ├── design.md ├── index.md ├── inspector │ ├── block.vue │ ├── group.vue │ ├── inspector.vue │ ├── labels.css │ ├── legends.vue │ ├── non-block.vue │ ├── single.vue │ └── status.vue ├── playground.md ├── public │ ├── CNAME │ └── zhlint.svg ├── screenshot-browser.png ├── screenshot-cli.png ├── zhlint.es.js ├── zhlint.es.js.map └── zhlint.svg ├── package.json ├── pnpm-lock.yaml ├── scripts └── gen-readme.mjs ├── src ├── hypers │ ├── hexo.ts │ ├── ignore.ts │ ├── md.ts │ ├── types.ts │ └── vuepress.ts ├── ignore.ts ├── index.ts ├── join.ts ├── options.ts ├── parser │ ├── char.ts │ ├── index.ts │ ├── messages.ts │ ├── parse.ts │ ├── travel.ts │ ├── types.ts │ └── util.ts ├── rc │ └── index.ts ├── replace-block.ts ├── report.ts ├── rules │ ├── case-abbrs.ts │ ├── case-html-entity.ts │ ├── case-linebreak.ts │ ├── case-pure-western.ts │ ├── case-zh-units.ts │ ├── index.ts │ ├── messages.ts │ ├── punctuation-unification.ts │ ├── punctuation-width.ts │ ├── space-bracket.ts │ ├── space-code.ts │ ├── space-hyper-mark.ts │ ├── space-letter.ts │ ├── space-punctuation.ts │ ├── space-quotation.ts │ ├── space-trim.ts │ └── util.ts └── run.ts ├── test ├── basic.test.ts ├── debug.test.ts ├── example-article.md ├── example-debug.md ├── example-disabled.md ├── example-ignore.md ├── example-units-fixed.md ├── example-units.md ├── example-vuepress-fixed.md ├── example-vuepress.md ├── examples.test.ts ├── hexo.test.ts ├── lint.test.ts ├── md.test.ts ├── prepare.ts ├── rules.test.ts ├── uncategorized.test.ts └── vuepress.test.ts ├── tsconfig-build.json ├── tsconfig.json └── vite.config.ts /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "root": true, 3 | "extends": ["eslint:recommended", "plugin:@typescript-eslint/recommended"], 4 | "parser": "@typescript-eslint/parser", 5 | "parserOptions": { "project": ["./tsconfig.json", "./vite.config.ts"] }, 6 | "plugins": ["@typescript-eslint"], 7 | "rules": {}, 8 | "ignorePatterns": ["bin/*", "coverage/*", "dist/*", "docs/*", "lib/*"] 9 | } 10 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Node.js CI 2 | 3 | on: 4 | push: 5 | branches-ignore: ['gh-pages'] 6 | pull_request: 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - uses: actions/checkout@v4 14 | - uses: pnpm/action-setup@v3 15 | with: 16 | version: 9 17 | - name: Use Node.js 20.x 18 | uses: actions/setup-node@v4 19 | with: 20 | node-version: 20.x 21 | cache: 'pnpm' 22 | - name: Install dependencies 23 | run: pnpm install --frozen-lockfile 24 | - run: pnpm test 25 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: Docs deployment 2 | 3 | on: 4 | push: 5 | branches: [release] 6 | 7 | jobs: 8 | build: 9 | runs-on: ubuntu-latest 10 | 11 | steps: 12 | - uses: actions/checkout@v4 13 | - uses: pnpm/action-setup@v3 14 | with: 15 | version: 9 16 | - uses: actions/setup-node@v4 17 | with: 18 | node-version: 20 19 | cache: 'pnpm' 20 | - run: | 21 | pnpm install --frozen-lockfile 22 | pnpm run docs:build 23 | cd docs/.vitepress/dist 24 | git init 25 | git config user.name "${GITHUB_ACTOR}" 26 | git config user.email "${GITHUB_ACTOR}@users.noreply.github.com" 27 | git add -A 28 | git status 29 | git commit -m 'deploy' 30 | git push -f https://jinjiang:${{secrets.ACCESS_TOKEN}}@github.com/zhlint-project/zhlint.git master:gh-pages 31 | cd - 32 | -------------------------------------------------------------------------------- /.github/workflows/npm-publish.yml: -------------------------------------------------------------------------------- 1 | name: Node.js Package 2 | 3 | on: 4 | push: 5 | tags: 6 | - "*" 7 | 8 | permissions: 9 | id-token: write 10 | 11 | jobs: 12 | build: 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v4 17 | - uses: pnpm/action-setup@v3 18 | with: 19 | version: 9 20 | - uses: actions/setup-node@v4 21 | with: 22 | node-version: 20.x 23 | cache: 'pnpm' 24 | - name: Install dependencies 25 | run: pnpm install 26 | - run: pnpm test 27 | - run: pnpm prepublish 28 | 29 | publish-npm: 30 | needs: build 31 | runs-on: ubuntu-latest 32 | steps: 33 | - uses: actions/checkout@v4 34 | - uses: pnpm/action-setup@v3 35 | with: 36 | version: 9 37 | - uses: actions/setup-node@v4 38 | with: 39 | node-version: 20.x 40 | registry-url: https://registry.npmjs.org/ 41 | - run: pnpm install --frozen-lockfile 42 | - run: pnpm publish --no-git-checks --provenance 43 | env: 44 | NODE_AUTH_TOKEN: ${{secrets.npm_token}} 45 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | report.*.json 8 | 9 | # Runtime data 10 | pids 11 | *.pid 12 | *.seed 13 | *.pid.lock 14 | 15 | # Directory for instrumented libs generated by jscoverage/JSCover 16 | lib-cov 17 | 18 | # Coverage directory used by tools like istanbul 19 | coverage 20 | 21 | # nyc test coverage 22 | .nyc_output 23 | 24 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 25 | .grunt 26 | 27 | # Bower dependency directory (https://bower.io/) 28 | bower_components 29 | 30 | # node-waf configuration 31 | .lock-wscript 32 | 33 | # Compiled binary addons (https://nodejs.org/api/addons.html) 34 | build/Release 35 | 36 | # Dependency directories 37 | node_modules/ 38 | jspm_packages/ 39 | 40 | # TypeScript v1 declaration files 41 | typings/ 42 | 43 | # Optional npm cache directory 44 | .npm 45 | 46 | # Optional eslint cache 47 | .eslintcache 48 | 49 | # Optional REPL history 50 | .node_repl_history 51 | 52 | # Output of 'npm pack' 53 | *.tgz 54 | 55 | # Yarn Integrity file 56 | .yarn-integrity 57 | 58 | # dotenv environment variables file 59 | .env 60 | 61 | # next.js build output 62 | .next 63 | 64 | # personal plan 65 | todo 66 | 67 | # temp files 68 | .DS_Store 69 | temp 70 | 71 | # generated files 72 | dist 73 | lib 74 | 75 | # vitepress cache 76 | docs/.vitepress/cache/ 77 | -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | coverage 2 | dist 3 | docs/.vitepress/dist 4 | docs/zhlint.es.js 5 | lib 6 | node_modules 7 | temp 8 | test/*.md 9 | pnpm-lock.yaml 10 | README.md 11 | README.zh-CN.md 12 | -------------------------------------------------------------------------------- /.prettierrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "semi": false, 3 | "tabWidth": 2, 4 | "singleQuote": true, 5 | "printWidth": 80, 6 | "trailingComma": "none" 7 | } 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Jinjiang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /bin/index.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | import fs from 'fs' 4 | import minimist from 'minimist' 5 | import * as glob from 'glob' 6 | import gitignore from 'ignore' 7 | import { readRc, runWithConfig, report } from '../lib/index.js' 8 | 9 | const helpMessage = ` 10 | This is zhlint! 11 | 12 | Usage: 13 | zhlint [, ...] 14 | zhlint [, ...] --fix 15 | zhlint --fix 16 | zhlint --fix= 17 | zhlint --output 18 | zhlint --output= 19 | zhlint --help 20 | zhlint --version 21 | 22 | Config arguments: 23 | 24 | --config 25 | .zhlintrc by default 26 | 27 | --ignore 28 | --file-ignore 29 | .zhlintignore by default 30 | 31 | --case-ignore 32 | .zhlintcaseignore by default 33 | 34 | --dir 35 | current directory by default 36 | 37 | Examples: 38 | zhlint foo.md 39 | zhlint foo.md --fix 40 | zhlint *.md 41 | zhlint *.md --fix 42 | zhlint foo.md bar.md 43 | zhlint foo.md bar.md --fix 44 | zhlint --fix foo.md 45 | zhlint --fix=foo.md 46 | zhlint --fix *.md 47 | zhlint --fix=*.md 48 | zhlint foo.md --output dest.md 49 | zhlint foo.md --output=dest.md 50 | `.trim() 51 | 52 | const main = () => { 53 | const argv = minimist(process.argv.slice(2)) 54 | 55 | const help = () => console.log(helpMessage) 56 | 57 | if (argv.v || argv.version) { 58 | console.log(require('../package.json').version) 59 | return 60 | } 61 | 62 | if (argv.h || argv.help) { 63 | help() 64 | return 65 | } 66 | 67 | // To support other CLI conventions like `lint-staged`. 68 | if (typeof argv.fix === 'string') { 69 | argv._.push(argv.fix) 70 | argv.fix = true 71 | } 72 | 73 | if (argv._ && argv._.length) { 74 | const [filePattern] = [...argv._] 75 | const configDir = argv.dir 76 | const configPath = argv.config 77 | const fileIgnorePath = argv.ignore || argv['file-ignore'] 78 | const caseIgnorePath = argv['case-ignore'] 79 | const config = readRc(configDir, configPath, fileIgnorePath, caseIgnorePath) 80 | const fileIgnore = gitignore().add(config.fileIgnores) 81 | const fileIgnoreFilter = fileIgnore.createFilter() 82 | try { 83 | const files = glob.sync(filePattern) 84 | const resultList = files.filter(fileIgnoreFilter).map((file) => { 85 | console.log(`[start] ${file}`) 86 | const origin = fs.readFileSync(file, { encoding: 'utf8' }) 87 | const { result, validations } = runWithConfig(origin, config) 88 | return { 89 | file, 90 | origin, 91 | result, 92 | validations 93 | } 94 | }) 95 | const exitCode = report(resultList) 96 | if (argv.o || argv.output) { 97 | if (files.length === 1) { 98 | const { file, result } = resultList[0] 99 | fs.writeFileSync(argv.o || argv.output, result) 100 | console.log(`[output] ${file} -> ${argv.o || argv.output}`) 101 | } else { 102 | console.error( 103 | `Sorry. If you use argument '--output' or '-o', you could only pass one file as the input.` 104 | ) 105 | } 106 | } else if (argv.f || argv.fix) { 107 | resultList.forEach(({ file, value, result }) => { 108 | if (value !== result) { 109 | fs.writeFileSync(file, result) 110 | console.log(`[fixed] ${file}`) 111 | } 112 | }) 113 | } else { 114 | if (exitCode) { 115 | process.exit(exitCode) 116 | } 117 | } 118 | } catch (e) { 119 | console.error(e) 120 | } 121 | return 122 | } 123 | 124 | help() 125 | } 126 | 127 | main() 128 | -------------------------------------------------------------------------------- /deploy.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | # build 4 | pnpm run docs:build 5 | 6 | # navigate into the build output directory 7 | cd docs/.vitepress/dist 8 | 9 | git init 10 | git add -A 11 | git commit -m 'deploy' 12 | 13 | # if you are deploying to https://.github.io/ 14 | git push -f git@github.com:Jinjiang/zhlint.git master:gh-pages 15 | 16 | cd - 17 | -------------------------------------------------------------------------------- /docs/.vitepress/config.ts: -------------------------------------------------------------------------------- 1 | export default { 2 | base: '/', 3 | title: 'zhlint', 4 | titleTemplate: false, 5 | description: 'A linting tool for Chinese text content.', 6 | head: [ 7 | ['link', { rel: 'icon', href: '/zhlint.svg' }] 8 | ], 9 | 10 | themeConfig: { 11 | nav: [ 12 | { text: 'Home', link: '/' }, 13 | { text: 'Playground', link: '/playground' } 14 | ], 15 | socialLinks: [ 16 | { 17 | icon: 'github', 18 | link: 'https://github.com/zhlint-project/zhlint' 19 | } 20 | ], 21 | editLink: { 22 | pattern: 23 | 'https://github.com/zhlint-project/zhlint/edit/master/docs/:path', 24 | text: 'Help us improve this page!' 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /docs/PlaygroundCore.vue: -------------------------------------------------------------------------------- 1 | 5 | 6 | 52 | 53 | 62 | 63 | 97 | -------------------------------------------------------------------------------- /docs/contribute.md: -------------------------------------------------------------------------------- 1 | # Contribution guide 2 | 3 | ## How to start 4 | 5 | (wip) 6 | 7 | ## Workflow 8 | 9 | (wip) 10 | 11 | ## Project setup 12 | 13 | ### Commands 14 | 15 | Dev and debug 16 | 17 | - `dev` 18 | - `dev:ui` 19 | 20 | Build 21 | 22 | - `build` 23 | - `build:js` 24 | - `build:node` 25 | - `build:browser` 26 | - `build:type` 27 | 28 | Lint and test 29 | 30 | - `format` 31 | - `lint` 32 | - `test` 33 | - `coverage` 34 | 35 | Docs 36 | 37 | - `docs` 38 | - `docs:build` 39 | - `docs:serve` 40 | - `docs:deploy` 41 | 42 | ### Output 43 | 44 | - Bin: `./bin/index.js` (CJS) 45 | - Node.js: `./lib/index.js` (CJS) 46 | - Unkpg/Jsdelivr: `./dist/zhlint.umd.js` (UMD) 47 | - Type defs: `./dist/zhlint.d.ts` 48 | - Published files: 49 | - `bin/`, `lib/`, `dist/` 50 | - `tsconfig.json` 51 | - `docs/*.{svg,png}`, `README.md` 52 | 53 | ### File structure 54 | 55 | Temp 56 | 57 | - `temp/` 58 | - `TODO` 59 | 60 | Git/GitHub related 61 | 62 | - `.github/` 63 | - `.gitignore` 64 | 65 | Code 66 | 67 | - `bin/` 68 | - `src/` 69 | - `lib/` (ignored) 70 | - `dist/` (ignored) 71 | 72 | Types 73 | 74 | - `types/`? 75 | - `api-extractor.json` 76 | 77 | Docs 78 | 79 | - `docs/` 80 | - `docs/.vitepress/` 81 | - `deploy.sh` 82 | 83 | Tests 84 | 85 | - `test/` 86 | - `./stdout.log` 87 | - `./stderr.log` 88 | 89 | Package info 90 | 91 | - `package.json` 92 | - `pnpm-lock.yaml` 93 | - `node_modules/` (ignored) 94 | 95 | Build config 96 | 97 | - `tsconfig-build.json` 98 | - `tsconfig.json` 99 | - `vite.config.js` 100 | 101 | Linting 102 | 103 | - `.eslintrc.json` 104 | - `.prettierignore` 105 | - `.prettierrc.json` 106 | 107 | Basic info 108 | 109 | - `LICENSE` 110 | - `README.md` 111 | - `README.zh-CN.md` 112 | -------------------------------------------------------------------------------- /docs/inspector/block.vue: -------------------------------------------------------------------------------- 1 | 7 | 8 | 22 | -------------------------------------------------------------------------------- /docs/inspector/group.vue: -------------------------------------------------------------------------------- 1 | 23 | 24 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /docs/inspector/inspector.vue: -------------------------------------------------------------------------------- 1 | 26 | 27 | 48 | 49 | 60 | -------------------------------------------------------------------------------- /docs/inspector/labels.css: -------------------------------------------------------------------------------- 1 | .western-letter { 2 | color: #ff0000; 3 | } 4 | 5 | .cjk-char { 6 | color: #0000ff; 7 | } 8 | 9 | .halfwidth-pause-or-stop, 10 | .fullwidth-pause-or-stop, 11 | .halfwidth-other-punctuation, 12 | .fullwidth-other-punctuation, 13 | .quotation-start-value, 14 | .quotation-end-value, 15 | .bracket-mark { 16 | color: #660066; 17 | } 18 | 19 | .non-block, 20 | .hyper-mark, 21 | .hyper-content, 22 | .code-content { 23 | color: #666666; 24 | } 25 | 26 | .changed { 27 | background-color: #ffff00; 28 | } 29 | 30 | .ignored { 31 | background-color: #f0f0f0; 32 | } 33 | 34 | .current { 35 | background-color: #f0f0ff; 36 | } 37 | -------------------------------------------------------------------------------- /docs/inspector/legends.vue: -------------------------------------------------------------------------------- 1 | 14 | 15 | 26 | -------------------------------------------------------------------------------- /docs/inspector/non-block.vue: -------------------------------------------------------------------------------- 1 | 29 | 30 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /docs/inspector/single.vue: -------------------------------------------------------------------------------- 1 | 22 | 23 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /docs/inspector/status.vue: -------------------------------------------------------------------------------- 1 | 114 | 115 | 121 | 122 | 127 | -------------------------------------------------------------------------------- /docs/playground.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'Playground | zhlint' 3 | aside: false 4 | editLink: false 5 | --- 6 | 7 | 10 | 11 | # Playground 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /docs/public/CNAME: -------------------------------------------------------------------------------- 1 | zhlint.jinjiang.dev 2 | -------------------------------------------------------------------------------- /docs/public/zhlint.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 8 | Logo 9 | Drawing exported from Concepts: Smarter Sketching 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /docs/screenshot-browser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhlint-project/zhlint/c2f791db4f35f1ee00b870902e7cbe121c951329/docs/screenshot-browser.png -------------------------------------------------------------------------------- /docs/screenshot-cli.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhlint-project/zhlint/c2f791db4f35f1ee00b870902e7cbe121c951329/docs/screenshot-cli.png -------------------------------------------------------------------------------- /docs/zhlint.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 8 | Logo 9 | Drawing exported from Concepts: Smarter Sketching 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "zhlint", 3 | "version": "0.8.2", 4 | "description": "A linting tool for Chinese language.", 5 | "bin": { 6 | "zhlint": "./bin/index.js" 7 | }, 8 | "type": "module", 9 | "main": "./lib/index.js", 10 | "unpkg": "./dist/zhlint.umd.js", 11 | "jsdelivr": "./dist/zhlint.umd.js", 12 | "types": "./lib/index.d.ts", 13 | "files": [ 14 | "tsconfig.json", 15 | "dist/*", 16 | "bin/*", 17 | "lib/*", 18 | "docs/*.png", 19 | "docs/public/zhlint.svg", 20 | "README.md" 21 | ], 22 | "scripts": { 23 | "prepare": "pnpm run build", 24 | "dev": "vitest", 25 | "debug": "vitest debug.test.ts", 26 | "dev:ui": "vitest --ui", 27 | "build": "pnpm run build:node && pnpm run build:browser", 28 | "build:node": "tsc --project tsconfig-build.json", 29 | "build:browser": "vite build", 30 | "lint": "prettier --cache --check .", 31 | "lint:fix": "prettier --cache --write .", 32 | "lint:eslint": "eslint {src,test}/**.ts --cache", 33 | "lint:eslint:fix": "eslint {src,test}/**.ts --cache --fix", 34 | "test": "pnpm run test:tsc && pnpm run test:vitest", 35 | "test:tsc": "tsc --noEmit", 36 | "test:vitest": "vitest run", 37 | "coverage": "vitest run --coverage", 38 | "docs:dev": "vitepress dev docs", 39 | "docs:runtime": "cp dist/zhlint.es.js* docs/", 40 | "docs:readme": "node scripts/gen-readme.mjs", 41 | "docs:build": "vitepress build docs", 42 | "docs:serve": "vitepress serve docs", 43 | "docs:deploy": "./deploy.sh", 44 | "prepublish": "pnpm run build && pnpm run docs:runtime && pnpm run docs:readme" 45 | }, 46 | "repository": { 47 | "type": "git", 48 | "url": "git+https://github.com/zhlint-project/zhlint.git" 49 | }, 50 | "keywords": [ 51 | "lint", 52 | "zh", 53 | "Chinese" 54 | ], 55 | "author": "Jinjiang ", 56 | "license": "MIT", 57 | "bugs": { 58 | "url": "https://github.com/zhlint-project/zhlint/issues" 59 | }, 60 | "homepage": "https://github.com/zhlint-project/zhlint#readme", 61 | "devDependencies": { 62 | "@types/mdast": "^4.0.3", 63 | "@types/node": "^20.11.17", 64 | "@types/unist": "^3.0.2", 65 | "@typescript-eslint/eslint-plugin": "^6.21.0", 66 | "@typescript-eslint/parser": "^6.21.0", 67 | "@vitest/ui": "^1.2.2", 68 | "eslint": "^8.56.0", 69 | "mdast-util-to-markdown": "^2.1.0", 70 | "node-stdlib-browser": "^1.2.0", 71 | "prettier": "^3.2.5", 72 | "typescript": "^5.3.3", 73 | "vite": "^5.1.1", 74 | "vite-plugin-node-polyfills": "^0.19.0", 75 | "vitepress": "^1.0.0", 76 | "vitest": "^1.2.2" 77 | }, 78 | "dependencies": { 79 | "chalk": "^3.0.0", 80 | "glob": "^10.3.10", 81 | "ignore": "^5.3.1", 82 | "minimist": "^1.2.8", 83 | "remark-custom-container": "^1.3.1", 84 | "remark-frontmatter": "^4.0.1", 85 | "remark-gfm": "^3.0.1", 86 | "remark-parse": "^10.0.0", 87 | "unified": "^10.0.0" 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /scripts/gen-readme.mjs: -------------------------------------------------------------------------------- 1 | import { readFileSync, writeFileSync } from 'fs' 2 | import { isAbsolute, relative, resolve } from 'path' 3 | import { unified } from 'unified' 4 | import markdown from 'remark-parse' 5 | import frontmatter from 'remark-frontmatter' 6 | import { toMarkdown } from 'mdast-util-to-markdown' 7 | 8 | const travelInlines = (node) => { 9 | if (node.type === 'image' && !isAbsolute(node.url)) { 10 | node.url = relative('.', resolve('docs', node.url)) 11 | } 12 | } 13 | 14 | const travelBlocks = (node) => { 15 | if (node.children) { 16 | node.children = node.children.filter( 17 | (child) => child.type !== 'html' && child.type !== 'yaml' 18 | ) 19 | node.children.forEach((child) => { 20 | travelBlocks(child) 21 | }) 22 | } else { 23 | travelInlines(node) 24 | } 25 | } 26 | 27 | const content = readFileSync('docs/index.md', 'utf8') 28 | 29 | const tree = unified().use(markdown).use(frontmatter).parse(content) 30 | 31 | travelBlocks(tree) 32 | 33 | writeFileSync('README.md', toMarkdown(tree, { bullet: '-' })) 34 | -------------------------------------------------------------------------------- /src/hypers/hexo.ts: -------------------------------------------------------------------------------- 1 | import { ParsedStatus } from './types.js' 2 | 3 | // {% x y %}z{% endx %} 4 | // \{\% ([^ ]+?) [^\%]*?\%\} ([^ ]+?) [^\%]*? 5 | // (?:\n|\{(?!\%)|[^\{])*? \n \{(?!\%) [^\{] 6 | // \{\% end(?:\1) \%\} 7 | const matcher = /\{% ([^ ]+?) [^%]*?%\}(?:\n|\{(?!%)|[^{])*?\{% end(?:\1) %\}/g 8 | 9 | const parser = (data: ParsedStatus): ParsedStatus => { 10 | data.modifiedValue = data.modifiedValue.replace( 11 | matcher, 12 | (raw, name, index) => { 13 | const { length } = raw 14 | data.ignoredByParsers.push({ 15 | name, 16 | meta: `hexo-${name}`, 17 | index, 18 | length, 19 | originValue: raw 20 | }) 21 | return '@'.repeat(length) 22 | } 23 | ) 24 | return data 25 | } 26 | 27 | export default parser 28 | -------------------------------------------------------------------------------- /src/hypers/ignore.ts: -------------------------------------------------------------------------------- 1 | import { IgnoredCase } from '../ignore.js' 2 | import { ParsedStatus } from './types.js' 3 | 4 | const ignoredCaseMatcher = 5 | /^(?:(?.+?)-,)?(?.+?)(?:,(?.+?))?(?:,-(?.+?))?$/ 6 | 7 | export const parseIngoredCase = (text: string): IgnoredCase | undefined => { 8 | const matchResult = text.match(ignoredCaseMatcher) 9 | if (matchResult) { 10 | const { prefix, textStart, textEnd, suffix } = 11 | matchResult.groups as IgnoredCase 12 | return { 13 | prefix, 14 | textStart, 15 | textEnd, 16 | suffix 17 | } 18 | } 19 | } 20 | 21 | const parser = (data: ParsedStatus): ParsedStatus => { 22 | const { ignoredByRules, value: raw } = data 23 | const matcher = //g 24 | let result: RegExpExecArray | null 25 | while ((result = matcher.exec(raw)) !== null) { 26 | const ignoredCase = parseIngoredCase(result[1]) 27 | if (ignoredCase) { 28 | ignoredByRules.push(ignoredCase) 29 | } 30 | } 31 | return data 32 | } 33 | 34 | export default parser 35 | -------------------------------------------------------------------------------- /src/hypers/md.ts: -------------------------------------------------------------------------------- 1 | import { unified } from 'unified' 2 | import markdown from 'remark-parse' 3 | import gfm from 'remark-gfm' 4 | import frontmatter from 'remark-frontmatter' 5 | import * as Ast from 'mdast' 6 | import { Node, Position } from 'unist' 7 | import { 8 | isRawMark, 9 | Mark, 10 | MarkSideType, 11 | MarkType, 12 | RawMark 13 | } from '../parser/index.js' 14 | import { Block, ParsedStatus } from './types.js' 15 | 16 | // Position related 17 | 18 | type NormalizedPosition = { 19 | start: number 20 | end: number 21 | } 22 | 23 | const parsePosition = (position?: Position): NormalizedPosition => ({ 24 | start: position?.start?.offset || 0, 25 | end: position?.end?.offset || 0 26 | }) 27 | 28 | // AST related 29 | 30 | const isParent = (node: Node): node is Ast.Parent => { 31 | return (node as Ast.Parent).children !== undefined 32 | } 33 | 34 | type BlockType = Ast.Paragraph | Ast.Heading | Ast.TableCell 35 | const blockTypes: string[] = ['paragraph', 'heading', 'table-cell'] 36 | const isBlock = (node: Node): node is BlockType => { 37 | return blockTypes.indexOf(node.type) >= 0 38 | } 39 | 40 | type InlineContentType = 41 | | Ast.Emphasis 42 | | Ast.Strong 43 | | Ast.Delete 44 | | Ast.Link 45 | | Ast.LinkReference 46 | const inlineContentTypes: string[] = [ 47 | 'emphasis', 48 | 'strong', 49 | 'delete', 50 | 'link', 51 | 'linkReference' 52 | ] 53 | const isInlineContent = (node: Node): node is InlineContentType => { 54 | return inlineContentTypes.indexOf(node.type) >= 0 55 | } 56 | 57 | type InlineRawType = 58 | | Ast.InlineCode 59 | | Ast.Break 60 | | Ast.Image 61 | | Ast.ImageReference 62 | | Ast.FootnoteDefinition 63 | | Ast.Html 64 | const inlineRawTypes: string[] = [ 65 | 'inlineCode', 66 | 'break', 67 | 'image', 68 | 'imageReference', 69 | 'footnoteDefinition', 70 | 'html' 71 | ] 72 | const isInlineRaw = (node: Node): node is InlineRawType => { 73 | return inlineRawTypes.indexOf(node.type) >= 0 74 | } 75 | 76 | // Marks related 77 | 78 | type BlockMark = { 79 | block: BlockType 80 | inlineMarks: InlineMark[] 81 | hyperMarks: Mark[] 82 | value: string 83 | } 84 | 85 | type InlineMark = { 86 | inline: InlineContentType | InlineRawType 87 | raw: boolean 88 | } 89 | 90 | const travelBlocks = (node: Node, blocks: BlockMark[]): void => { 91 | if (isParent(node)) { 92 | node.children.forEach((child) => { 93 | if (child.type === 'yaml') { 94 | return 95 | } 96 | if (isBlock(child)) { 97 | const blockMark: BlockMark = { 98 | block: child, 99 | inlineMarks: [], 100 | hyperMarks: [], 101 | value: '' // to be initialzed 102 | } 103 | blocks.push(blockMark) 104 | travelInlines(child, blockMark) 105 | } else { 106 | travelBlocks(child, blocks) 107 | } 108 | }) 109 | } 110 | } 111 | 112 | const travelInlines = (node: Node, blockMark: BlockMark): void => { 113 | if (isParent(node)) { 114 | node.children.forEach((child) => { 115 | if (isInlineContent(child)) { 116 | blockMark.inlineMarks.push({ inline: child, raw: false }) 117 | travelInlines(child, blockMark) 118 | } 119 | if (isInlineRaw(child)) { 120 | blockMark.inlineMarks.push({ inline: child, raw: true }) 121 | } 122 | }) 123 | } 124 | } 125 | 126 | const processBlockMark = (blockMark: BlockMark, str: string): void => { 127 | const { block, inlineMarks } = blockMark 128 | if (!block.position) { 129 | return 130 | } 131 | const offset = block.position.start.offset || 0 132 | 133 | const marks: Mark[] = [] 134 | const unresolvedCodeMarks: RawMark[] = [] 135 | 136 | // Generate all the marks includes hyper (inline) and raw. 137 | inlineMarks.forEach((inlineMark) => { 138 | const { inline } = inlineMark 139 | if (!inline.position) { 140 | return 141 | } 142 | const startOffset = inline.position.start.offset || 0 143 | const endOffset = inline.position.end.offset || 0 144 | 145 | if (isInlineRaw(inline)) { 146 | const mark: Mark = { 147 | type: MarkType.RAW, 148 | // TODO: typeof RawMark.meta 149 | meta: inline.type, 150 | startIndex: startOffset - offset, 151 | endIndex: endOffset - offset, 152 | startValue: str.substring(startOffset, endOffset), 153 | endValue: '' 154 | } 155 | // TODO: Ast.InlineCode? 156 | if (mark.startValue.match(//)) { 157 | const rawMark: RawMark = { ...mark, code: MarkSideType.LEFT } 158 | unresolvedCodeMarks.push(rawMark) 159 | marks.push(rawMark) 160 | return 161 | } else if (mark.startValue.match(/<\/code.*>/)) { 162 | const rawMark: RawMark = { ...mark, code: MarkSideType.RIGHT } 163 | const leftCode = unresolvedCodeMarks.pop() 164 | if (leftCode) { 165 | leftCode.rightPair = rawMark 166 | } 167 | marks.push(rawMark) 168 | return 169 | } 170 | marks.push(mark) 171 | } else { 172 | const firstChild = inline.children[0] 173 | const lastChild = inline.children[inline.children.length - 1] 174 | if (!firstChild.position || !lastChild.position) { 175 | return 176 | } 177 | const innerStartOffset = firstChild.position.start.offset || 0 178 | const innerEndOffset = lastChild.position.end.offset || 0 179 | const mark: Mark = { 180 | type: MarkType.HYPER, 181 | // TODO: typeof RawMark.meta 182 | meta: inline.type, 183 | startIndex: startOffset - offset, 184 | startValue: str.substring(startOffset, innerStartOffset), 185 | endIndex: innerEndOffset - offset, 186 | endValue: str.substring(innerEndOffset, endOffset) 187 | } 188 | marks.push(mark) 189 | } 190 | }) 191 | 192 | blockMark.value = str.substring( 193 | block.position.start.offset || 0, 194 | block.position.end.offset || 0 195 | ) 196 | 197 | blockMark.hyperMarks = marks 198 | .map((mark) => { 199 | if (isRawMark(mark)) { 200 | if (mark.code === MarkSideType.RIGHT) { 201 | return 202 | } 203 | if (mark.code === MarkSideType.LEFT) { 204 | const { rightPair } = mark 205 | mark.startValue = str.substring( 206 | mark.startIndex + offset, 207 | mark.endIndex + offset 208 | ) 209 | mark.endIndex = rightPair?.endIndex || 0 210 | mark.endValue = '' 211 | delete mark.rightPair 212 | } 213 | } 214 | return mark 215 | }) 216 | .filter(Boolean) as Mark[] 217 | } 218 | 219 | /** 220 | - travel all blocks/lists/tables/rows/cells 221 | - content: paragraph/heading/table-cell 222 | - no content: thematic break/code/html 223 | - for all phrasings: 224 | - no text: inline code/break/image/image ref/footnote ref/html 225 | - marks: emphasis/strong/delete/footnote/link/link ref 226 | */ 227 | const parser = (data: ParsedStatus): ParsedStatus => { 228 | const value = data.value 229 | const modifiedValue = data.modifiedValue 230 | const ignoredByParsers = data.ignoredByParsers 231 | 232 | const blockMarks: BlockMark[] = [] 233 | 234 | const tree: Ast.Root = unified() 235 | .use(markdown) 236 | .use(gfm) 237 | .use(frontmatter) 238 | .parse(modifiedValue) as Ast.Root 239 | 240 | // - travel and record all paragraphs/headings/table-cells into blocks 241 | // - for each block, travel and record all 242 | // - - 'hyper' marks: emphasis/strong/delete/footnote/link/linkRef and continue 243 | // - - 'raw' marks: inlineCode/break/image/imageRef/footnoteRef/html and stop 244 | travelBlocks(tree, blockMarks) 245 | 246 | // for each block marks 247 | // - get block.start.offset 248 | // - for each marks 249 | // - - startIndex: mark.start.offset - offset 250 | // - - startValue: [mark.start.offset - offset, mark.firstChild.start.offset - offset] 251 | // - - endIndex: mark.lastChild.end.offset - offset 252 | // - - endValue: [mark.lastChild.end.offset - offset, mark.end.offset] 253 | blockMarks.forEach((blockMark) => processBlockMark(blockMark, value)) 254 | data.blocks = blockMarks.map((b): Block => { 255 | const position = parsePosition(b.block.position) 256 | ignoredByParsers.forEach(({ index, length, originValue: raw, meta }) => { 257 | if (position.start <= index && position.end >= index + length) { 258 | if (b.hyperMarks) { 259 | b.hyperMarks.push({ 260 | type: MarkType.RAW, 261 | meta, 262 | startIndex: index - position.start, 263 | startValue: raw, 264 | endIndex: index - position.start + length, 265 | endValue: '' 266 | }) 267 | } 268 | } 269 | }) 270 | return { 271 | value: b.value || '', 272 | marks: b.hyperMarks || [], 273 | ...position 274 | } 275 | }) 276 | data.ignoredByParsers = [] 277 | return data 278 | } 279 | 280 | export default parser 281 | -------------------------------------------------------------------------------- /src/hypers/types.ts: -------------------------------------------------------------------------------- 1 | import type { IgnoredCase } from '../ignore.js' 2 | import type { Mark, MutableParseResult } from '../parser/index.js' 3 | 4 | export type ParserIgnoredCase = { 5 | name: string 6 | meta: string 7 | index: number 8 | length: number 9 | originValue: string 10 | } 11 | 12 | export type Block = { 13 | value: string 14 | marks: Mark[] 15 | start: number 16 | end: number 17 | } 18 | 19 | export type ParsedStatus = { 20 | value: string 21 | modifiedValue: string 22 | ignoredByRules: IgnoredCase[] 23 | ignoredByParsers: ParserIgnoredCase[] 24 | blocks: Block[] 25 | } 26 | 27 | export type ParsedBlock = Block & 28 | MutableParseResult & { 29 | originValue: string 30 | } 31 | -------------------------------------------------------------------------------- /src/hypers/vuepress.ts: -------------------------------------------------------------------------------- 1 | import { ParsedStatus } from './types.js' 2 | 3 | // TODO: ::: tips 提示... 4 | // 5 | // ::: xxx\nyyy\nzzz\n:::\n 6 | // - `(?<=^|\n)` + `(:::.*)` 7 | // - `\n` 8 | // - `(.+)` 9 | // - `\n` 10 | // - `(:::)` + `(?=\n|$)` 11 | let matcher: RegExp 12 | 13 | try { 14 | matcher = new RegExp('(?<=^|\\n)(:::.*)\\n([\\s\\S]+?)\\n(:::)(?=\\n|$)', 'g') 15 | } catch { 16 | matcher = /(:::.*)\n([\s\S]+?)\n(:::)/g 17 | } 18 | 19 | const parser = (data: ParsedStatus): ParsedStatus => { 20 | data.modifiedValue = data.modifiedValue.replace( 21 | matcher, 22 | (raw: string, start: string, value: string, end: string, index: number) => { 23 | const { length } = raw 24 | const name = start.substring(3).trim().split(' ')[0] || 'default' 25 | data.ignoredByParsers.push({ 26 | name, 27 | index, 28 | length: start.length, 29 | originValue: start, 30 | meta: `vuepress-${name}-start` 31 | }) 32 | data.ignoredByParsers.push({ 33 | name, 34 | index: index + length - 3, 35 | length: 3, 36 | originValue: end, 37 | meta: `vuepress-${name}-end` 38 | }) 39 | return '@'.repeat(start.length) + '\n' + value + '\n' + '@'.repeat(3) 40 | } 41 | ) 42 | return data 43 | } 44 | 45 | export default parser 46 | -------------------------------------------------------------------------------- /src/ignore.ts: -------------------------------------------------------------------------------- 1 | import { env } from './report.js' 2 | 3 | // Ref: https://github.com/WICG/ScrollToTextFragment 4 | export type IgnoredCase = { 5 | prefix?: string 6 | textStart: string 7 | textEnd?: string 8 | suffix?: string 9 | } 10 | 11 | export type IgnoredMark = { 12 | start: number 13 | end: number 14 | } 15 | 16 | /** 17 | * @param {string} str 18 | * @param {IgnoredCase[]} ignoredCases string which should be skipped 19 | * @return {IgnoredMark[]} 20 | */ 21 | const findIgnoredMarks = ( 22 | str: string, 23 | ignoredCases: IgnoredCase[] = [], 24 | logger: Console = env.defaultLogger 25 | ): IgnoredMark[] => { 26 | const marks: IgnoredMark[] = [] 27 | ignoredCases.forEach(({ prefix, textStart, textEnd, suffix }): void => { 28 | const start = (prefix || '') + textStart 29 | const end = (textEnd || '') + (suffix || '') 30 | const startOffset = prefix ? prefix.length : 0 31 | const endOffset = suffix ? suffix.length : 0 32 | 33 | const findNextMatch = (currentIndex: number): void => { 34 | const startIndex = str.substring(currentIndex).indexOf(start) 35 | if (startIndex === -1) { 36 | return 37 | } 38 | 39 | const possibleStart = currentIndex + startIndex + startOffset 40 | const nextPossibleCurrentIndex = possibleStart + textStart.length 41 | 42 | if (!end) { 43 | if (globalThis.__DEV__) { 44 | logger.log( 45 | `ignore: ${str.substring(possibleStart, nextPossibleCurrentIndex)}` 46 | ) 47 | } 48 | marks.push({ 49 | start: possibleStart, 50 | end: nextPossibleCurrentIndex 51 | }) 52 | findNextMatch(nextPossibleCurrentIndex) 53 | } else { 54 | const endIndex = str.substring(nextPossibleCurrentIndex).indexOf(end) 55 | const possibleEnd = 56 | nextPossibleCurrentIndex + endIndex + (textEnd || '').length 57 | 58 | if (endIndex === -1) { 59 | return 60 | } else { 61 | if (globalThis.__DEV__) { 62 | logger.log(`ignore: ${str.substring(possibleStart, possibleEnd)}`) 63 | } 64 | marks.push({ 65 | start: possibleStart, 66 | end: possibleEnd 67 | }) 68 | findNextMatch(possibleEnd + endOffset) 69 | } 70 | } 71 | } 72 | 73 | findNextMatch(0) 74 | }) 75 | return marks.sort((a, b) => a.start - b.start) 76 | } 77 | 78 | export default findIgnoredMarks 79 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | export { run, runWithConfig } from './run.js' 2 | export { report } from './report.js' 3 | export { readRc } from './rc/index.js' 4 | 5 | export type { Result } from './run.js' 6 | export type { Options } from './options.js' 7 | export type { Config } from './rc/index.js' 8 | export type { Validation, ValidationTarget } from './report.js' 9 | export type { IgnoredCase } from './ignore.js' 10 | export type { ParsedStatus, Block, ParserIgnoredCase } from './hypers/types.js' 11 | export type { 12 | Handler, 13 | Token, 14 | GroupToken, 15 | GroupTokenType, 16 | SingleToken, 17 | SingleTokenType, 18 | Mark, 19 | MarkType 20 | } from './parser/index.js' 21 | -------------------------------------------------------------------------------- /src/join.ts: -------------------------------------------------------------------------------- 1 | import { IgnoredMark } from './ignore.js' 2 | import { Validation, ValidationTarget } from './report.js' 3 | import { 4 | MutableGroupToken as GroupToken, 5 | MutableToken as Token 6 | } from './parser/index.js' 7 | 8 | const isInRange = (start: number, end: number, mark: IgnoredMark) => { 9 | return start <= mark.end && end >= mark.start 10 | } 11 | 12 | type IgnoredFlags = Record & { ignored: boolean } 13 | 14 | const isIgnored = (token: Token, marks: IgnoredMark[] = []): IgnoredFlags => { 15 | const result: IgnoredFlags = { 16 | ignored: false, 17 | [ValidationTarget.VALUE]: false, 18 | [ValidationTarget.SPACE_AFTER]: false, 19 | [ValidationTarget.START_VALUE]: false, 20 | [ValidationTarget.END_VALUE]: false, 21 | [ValidationTarget.INNER_SPACE_BEFORE]: false 22 | } 23 | 24 | // - group: startValue, innerSpaceBefore, endValue, spaceAfter 25 | // - single: raw, spaceAfter 26 | marks.forEach((mark) => { 27 | if (Array.isArray(token)) { 28 | const { 29 | index, 30 | startValue, 31 | innerSpaceBefore, 32 | endIndex = 0, 33 | endValue, 34 | spaceAfter 35 | } = token 36 | if (isInRange(index, index + (startValue || '').length, mark)) { 37 | result[ValidationTarget.SPACE_AFTER] = result.ignored = true 38 | } 39 | if ( 40 | isInRange( 41 | index + (startValue || '').length, 42 | index + (startValue || '').length + (innerSpaceBefore || '').length, 43 | mark 44 | ) 45 | ) { 46 | result[ValidationTarget.INNER_SPACE_BEFORE] = result.ignored = true 47 | } 48 | if (isInRange(endIndex, endIndex + (endValue || '').length, mark)) { 49 | result[ValidationTarget.END_VALUE] = result.ignored = true 50 | } 51 | if ( 52 | isInRange( 53 | endIndex + (endValue || '').length, 54 | endIndex + (endValue || '').length + (spaceAfter || '').length, 55 | mark 56 | ) 57 | ) { 58 | result[ValidationTarget.SPACE_AFTER] = result.ignored = true 59 | } 60 | } else { 61 | const { index, value: value, spaceAfter } = token 62 | if (isInRange(index, index + (value || '').length, mark)) { 63 | result[ValidationTarget.VALUE] = result.ignored = true 64 | } 65 | if ( 66 | isInRange( 67 | index + (value || '').length, 68 | index + (value || '').length + (spaceAfter || '').length, 69 | mark 70 | ) 71 | ) { 72 | result[ValidationTarget.SPACE_AFTER] = result.ignored = true 73 | } 74 | } 75 | }) 76 | return result 77 | } 78 | 79 | const recordValidations = ( 80 | token: Token, 81 | offset = 0, 82 | ignoredFlags: IgnoredFlags, 83 | validations: Validation[] = [], 84 | ignoredValidations: Validation[] = [] 85 | ): void => { 86 | token.validations.forEach((v) => { 87 | const validationWithOffset = { ...v, index: v.index + offset } 88 | if (!ignoredFlags[v.target]) { 89 | validations.push(validationWithOffset) 90 | } else { 91 | ignoredValidations.push(validationWithOffset) 92 | } 93 | }) 94 | } 95 | 96 | /** 97 | * Join tokens back into string 98 | * @param tokens the target group token, the index is relative to the block it belongs to 99 | * @param offset the index of the block, relative to the file it belongs to 100 | * @param ignoredMarks the ignored marks, the index is relative to the block it belongs to 101 | * @param validations the validation list result 102 | * @param isChild whether the group token is a child token of the block 103 | */ 104 | const join = ( 105 | tokens: GroupToken, 106 | offset = 0, 107 | ignoredMarks: IgnoredMark[] = [], 108 | ignoredTokens: Token[] = [], 109 | validations: Validation[] = [], 110 | ignoredValidations: Validation[] = [], 111 | isChild?: boolean 112 | ): string => { 113 | const ignoredFlags = isIgnored(tokens, ignoredMarks) 114 | if (!isChild && ignoredFlags.ignored) { 115 | ignoredTokens.push(tokens) 116 | } 117 | if (!isChild) { 118 | recordValidations( 119 | tokens, 120 | offset, 121 | ignoredFlags, 122 | validations, 123 | ignoredValidations 124 | ) 125 | } 126 | 127 | if (ignoredFlags[ValidationTarget.START_VALUE]) { 128 | tokens.ignoredStartValue = tokens.modifiedStartValue 129 | tokens.modifiedStartValue = tokens.startValue 130 | } 131 | if (ignoredFlags[ValidationTarget.INNER_SPACE_BEFORE]) { 132 | tokens.ignoredInnerSpaceBefore = tokens.modifiedInnerSpaceBefore 133 | tokens.modifiedInnerSpaceBefore = tokens.innerSpaceBefore 134 | } 135 | if (ignoredFlags[ValidationTarget.END_VALUE]) { 136 | tokens.ignoredEndValue = tokens.modifiedEndValue 137 | tokens.modifiedEndValue = tokens.endValue 138 | } 139 | if (ignoredFlags[ValidationTarget.SPACE_AFTER]) { 140 | tokens.ignoredSpaceAfter = tokens.modifiedSpaceAfter 141 | tokens.modifiedSpaceAfter = tokens.spaceAfter 142 | } 143 | 144 | return [ 145 | tokens.modifiedStartValue, 146 | tokens.modifiedInnerSpaceBefore, 147 | ...tokens.map((token) => { 148 | const subIgnoredFlags = isIgnored(token, ignoredMarks) 149 | if (subIgnoredFlags.ignored) { 150 | ignoredTokens.push(token) 151 | } 152 | recordValidations( 153 | token, 154 | offset, 155 | subIgnoredFlags, 156 | validations, 157 | ignoredValidations 158 | ) 159 | if (!Array.isArray(token)) { 160 | if (subIgnoredFlags[ValidationTarget.VALUE]) { 161 | token.ignoredValue = token.modifiedValue 162 | token.modifiedValue = token.value 163 | } 164 | if (subIgnoredFlags[ValidationTarget.SPACE_AFTER]) { 165 | token.ignoredSpaceAfter = token.modifiedSpaceAfter 166 | token.modifiedSpaceAfter = token.spaceAfter 167 | } 168 | 169 | return [token.modifiedValue, token.modifiedSpaceAfter] 170 | .filter(Boolean) 171 | .join('') 172 | } 173 | return join( 174 | token, 175 | offset, 176 | ignoredMarks, 177 | ignoredTokens, 178 | validations, 179 | ignoredValidations, 180 | true 181 | ) 182 | }), 183 | tokens.modifiedEndValue, 184 | tokens.modifiedSpaceAfter 185 | ] 186 | .filter(Boolean) 187 | .join('') 188 | } 189 | 190 | export default join 191 | -------------------------------------------------------------------------------- /src/options.ts: -------------------------------------------------------------------------------- 1 | import { ParsedStatus } from './hypers/types.js' 2 | import { IgnoredCase } from './ignore.js' 3 | import { Options as RuleOptions } from './rules/util.js' 4 | 5 | export type Options = { 6 | logger?: Console 7 | rules?: RuleOptions 8 | hyperParse?: 9 | | (string | ((status: ParsedStatus) => ParsedStatus))[] 10 | | ((status: ParsedStatus) => ParsedStatus) 11 | ignoredCases?: IgnoredCase[] 12 | } 13 | 14 | export type NormalizedOptions = { 15 | logger: Console 16 | rules: RuleOptions 17 | hyperParse: Array<(status: ParsedStatus) => ParsedStatus> 18 | ignoredCases: IgnoredCase[] 19 | } 20 | 21 | import ignore, { parseIngoredCase } from './hypers/ignore.js' 22 | import hexo from './hypers/hexo.js' 23 | import vuepress from './hypers/vuepress.js' 24 | import md from './hypers/md.js' 25 | 26 | import { defaultConfig as defaultRules } from './rules/index.js' 27 | 28 | import { env } from './report.js' 29 | import { Config } from './rc/index.js' 30 | 31 | const hyperParseInfo = [ 32 | { name: 'ignore', value: ignore }, 33 | { name: 'hexo', value: hexo }, 34 | { name: 'vuepress', value: vuepress }, 35 | { name: 'markdown', value: md } 36 | ] 37 | 38 | const arrToMap = ( 39 | arr: { name: string; value: T }[] 40 | ): { [name: string]: T } => 41 | arr.reduce((current, { name, value }) => { 42 | current[name] = value 43 | return current 44 | }, {}) 45 | 46 | const hyperParseMap = 47 | arrToMap<(status: ParsedStatus) => ParsedStatus>(hyperParseInfo) 48 | 49 | const matchCallArray = (calls: unknown[], map: { [name: string]: T }): T[] => 50 | calls 51 | .map((call) => { 52 | switch (typeof call) { 53 | case 'function': 54 | return call 55 | case 'string': 56 | return map[call] 57 | default: 58 | return null 59 | } 60 | }) 61 | .filter(Boolean) as T[] 62 | 63 | const DEPRECATED_OPTIONS = { 64 | halfWidthPunctuation: 'halfwidthPunctuation', 65 | fullWidthPunctuation: 'fullwidthPunctuation', 66 | adjustedFullWidthPunctuation: 'adjustedFullwidthPunctuation', 67 | spaceBetweenHalfWidthLetters: 'spaceBetweenHalfwidthContent', 68 | spaceBetweenHalfWidthContent: 'spaceBetweenHalfwidthContent', 69 | noSpaceBetweenFullWidthLetters: 'noSpaceBetweenFullwidthContent', 70 | noSpaceBetweenFullWidthContent: 'noSpaceBetweenFullwidthContent', 71 | spaceBetweenMixedWidthLetters: 'spaceBetweenMixedwidthContent', 72 | spaceBetweenMixedWidthContent: 'spaceBetweenMixedwidthContent', 73 | noSpaceBeforePunctuation: 'noSpaceBeforePauseOrStop', 74 | spaceAfterHalfWidthPunctuation: 'spaceAfterHalfwidthPauseOrStop', 75 | noSpaceAfterFullWidthPunctuation: 'noSpaceAfterFullwidthPauseOrStop', 76 | spaceOutsideHalfQuote: 'spaceOutsideHalfwidthQuotation', 77 | noSpaceOutsideFullQuote: 'noSpaceOutsideFullwidthQuotation', 78 | noSpaceInsideQuote: 'noSpaceInsideQuotation', 79 | spaceOutsideHalfBracket: 'spaceOutsideHalfwidthBracket', 80 | noSpaceOutsideFullBracket: 'noSpaceOutsideFullwidthBracket', 81 | noSpaceInsideWrapper: 'noSpaceInsideHyperMark', 82 | noSpaceInsideMark: 'noSpaceInsideHyperMark' 83 | } 84 | 85 | const deprecateOptions = (ruleOption: RuleOptions, logger: Console): void => { 86 | for (const oldKey in DEPRECATED_OPTIONS) { 87 | const newKey = DEPRECATED_OPTIONS[oldKey] 88 | if (ruleOption[oldKey]) { 89 | logger.warn(`[deprecate] ${oldKey} is deprecated, use ${newKey} instead`) 90 | ruleOption[newKey] = ruleOption[newKey] ?? ruleOption[oldKey] 91 | delete ruleOption[oldKey] 92 | } 93 | } 94 | } 95 | 96 | export const normalizeOptions = (options: Options): NormalizedOptions => { 97 | const logger = options.logger ?? env.defaultLogger 98 | 99 | const rules = options.rules ?? {} 100 | const preset = rules.preset === 'default' ? defaultRules : {} 101 | deprecateOptions(rules, logger) 102 | 103 | let hyperParse: Array ParsedStatus)> 104 | 105 | if (typeof options.hyperParse === 'function') { 106 | hyperParse = [options.hyperParse] 107 | } else { 108 | hyperParse = options.hyperParse || hyperParseInfo.map((item) => item.name) 109 | } 110 | 111 | const normoalizedOptions: NormalizedOptions = { 112 | logger, 113 | ignoredCases: options.ignoredCases || [], 114 | rules: { ...preset, ...rules }, 115 | hyperParse: matchCallArray<(status: ParsedStatus) => ParsedStatus>( 116 | hyperParse, 117 | hyperParseMap 118 | ) 119 | } 120 | 121 | return normoalizedOptions 122 | } 123 | 124 | export const normalizeConfig = ( 125 | config: Config, 126 | logger: Console = env.defaultLogger 127 | ): NormalizedOptions => { 128 | const options: NormalizedOptions = { 129 | logger, 130 | rules: {}, 131 | hyperParse: [], 132 | ignoredCases: [] 133 | } 134 | let hyperParse: string[] = [] 135 | 136 | // preset 137 | if (config.preset === 'default') { 138 | options.rules = { ...defaultRules } 139 | hyperParse = hyperParseInfo.map((item) => item.name) 140 | } 141 | 142 | // rules 143 | if (config.rules) { 144 | options.rules = { ...options.rules, ...config.rules } 145 | } 146 | 147 | // hyper parsers 148 | if (Array.isArray(config.hyperParsers)) { 149 | hyperParse = config.hyperParsers 150 | } 151 | hyperParse.forEach((x) => { 152 | if (!hyperParseMap[x]) { 153 | logger.log(`The hyper parser ${x} is invalid.`) 154 | return 155 | } 156 | options.hyperParse.push(hyperParseMap[x]) 157 | }) 158 | 159 | // ignored cases 160 | if (config.caseIgnores) { 161 | config.caseIgnores.forEach((x) => { 162 | const ignoredCase = parseIngoredCase(x) 163 | if (ignoredCase) { 164 | options.ignoredCases.push(ignoredCase) 165 | } else { 166 | logger.log(`The format of ignore case: "${x}" is invalid.`) 167 | } 168 | }) 169 | } 170 | 171 | return options 172 | } 173 | -------------------------------------------------------------------------------- /src/parser/char.ts: -------------------------------------------------------------------------------- 1 | import { CharType } from './types.js' 2 | 3 | /** 4 | * NOTE: 5 | * - U+FE41 PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET -> U+300C LEFT CORNER BRACKET, etc. 6 | * - U+2E3A TWO-EM DASH, U+2014 EM DASH x2 7 | * - U+2026 HORIZONTAL ELLIPSIS, U+22EF MIDLINE HORIZONTAL ELLIPSIS 8 | * - U+25CF BLACK CIRCLE (emphasis dots), U+2022 BULLET (emphasis dots), U+00B7 MIDDLE DOT (interpuncts), 9 | * U+2027 HYPHENATION POINT, U+2022 BULLET, U+30FB KATAKANA MIDDLE DOT 10 | * 11 | * Decoration marks: 12 | * - emphasis dots: U+25CF BLACK CIRCLE, U+2022 BULLET 13 | * - book title marks: U+FE4F WAVY LOW LINE 14 | * - proper noun marks: U+FF3F FULLWIDTH LOW LINE 15 | */ 16 | const newCharTypeSet: { [key in CharType]?: string } = { 17 | [CharType.HALFWIDTH_PAUSE_OR_STOP]: ',.;:?!', 18 | [CharType.FULLWIDTH_PAUSE_OR_STOP]: [ 19 | // normal punctuation marks 20 | ',。、;:?!', 21 | // special punctuation marks 22 | '⁈⁇‼⁉' 23 | ].join(''), 24 | [CharType.HALFWIDTH_QUOTATION]: '\'"', 25 | [CharType.FULLWIDTH_QUOTATION]: '‘’“”《》〈〉『』「」【】〖〗', 26 | [CharType.HALFWIDTH_BRACKET]: '()[]{}', 27 | [CharType.FULLWIDTH_BRACKET]: '()〔〕[]{}', 28 | [CharType.HALFWIDTH_OTHER_PUNCTUATION]: [ 29 | // on-keyboard symbols 30 | '~-+*/\\%=&|`<>@#$^', 31 | // symbol of death 32 | '†‡' 33 | ].join(''), 34 | [CharType.FULLWIDTH_OTHER_PUNCTUATION]: [ 35 | // U+2E3A TWO-EM DASH, U+2014 EM DASH 36 | '—⸺', 37 | // U+2026 HORIZONTAL ELLIPSIS, U+22EF MIDLINE HORIZONTAL ELLIPSIS 38 | '…⋯', 39 | // U+FF5E FULLWIDTH TILDE 40 | '~', 41 | // U+25CF BLACK CIRCLE, U+2022 BULLET, U+00B7 MIDDLE DOT, 42 | // U+2027 HYPHENATION POINT, U+30FB KATAKANA MIDDLE DOT 43 | '●•·‧・' 44 | ].join('') 45 | } 46 | 47 | /** 48 | * Check whether the character is full-width or half-width, 49 | * content or punctuation, or empty, or space, or emoji etc. 50 | * Refs: 51 | * - https://unicode.org/charts/ 52 | * - https://jrgraphix.net/research/unicode.php 53 | * - https://mathiasbynens.be/notes/javascript-unicode 54 | * - https://stackoverflow.com/a/21113538 55 | * - https://www.w3.org/International/clreq/#categories_and_usage_of_punctuation_marks 56 | */ 57 | export const checkCharType = (char: string): CharType => { 58 | if (char === '') { 59 | return CharType.EMPTY 60 | } 61 | 62 | // space 63 | if (char.match(/\s/) != null) { 64 | return CharType.SPACE 65 | } 66 | 67 | // punctuation marks 68 | for (const [charType, charSet] of Object.entries(newCharTypeSet)) { 69 | if (charSet?.indexOf(char) >= 0) { 70 | return charType as CharType 71 | } 72 | } 73 | 74 | // 0-9 75 | if (char.match(/[0-9]/) != null) { 76 | return CharType.WESTERN_LETTER 77 | } 78 | 79 | // Basic Latin 80 | if (char.match(/[\u0020-\u007F]/) != null) { 81 | return CharType.WESTERN_LETTER 82 | } 83 | // Latin-1 Supplement 84 | if (char.match(/[\u00A0-\u00FF]/) != null) { 85 | return CharType.WESTERN_LETTER 86 | } 87 | // Latin Extended-A 88 | if (char.match(/[\u0100-\u017F]/) != null) { 89 | return CharType.WESTERN_LETTER 90 | } 91 | // Latin Extended-B 92 | if (char.match(/[\u0180-\u024F]/) != null) { 93 | return CharType.WESTERN_LETTER 94 | } 95 | // Greek and Coptic 96 | if (char.match(/[\u0370-\u03FF]/) != null) { 97 | return CharType.WESTERN_LETTER 98 | } 99 | 100 | // CJK Unified Ideographs 101 | if (char.match(/[\u4E00-\u9FFF]/) != null) { 102 | return CharType.CJK_CHAR 103 | } 104 | // CJK Unified Ideographs Extension A 105 | if (char.match(/[\u3400-\u4DBF]/) != null) { 106 | return CharType.CJK_CHAR 107 | } 108 | // CJK Unified Ideographs Extension B 109 | if ( 110 | char.match(/[\ud840-\ud868][\udc00-\udfff]|\ud869[\udc00-\uded6]/) != null 111 | ) { 112 | return CharType.CJK_CHAR 113 | } 114 | // CJK Unified Ideographs Extension C 115 | if ( 116 | char.match( 117 | /\ud869[\udf00-\udfff]|[\ud86a-\ud86c][\udc00-\udfff]|\ud86d[\udc00-\udf34]/ 118 | ) != null 119 | ) { 120 | return CharType.CJK_CHAR 121 | } 122 | // CJK Unified Ideographs Extension D 123 | if (char.match(/\ud86d[\udf40-\udfff]|\ud86e[\udc00-\udc1d]/) != null) { 124 | return CharType.CJK_CHAR 125 | } 126 | // CJK Compatibility Ideographs 127 | if (char.match(/[\uF900-\uFAFF]/) != null) { 128 | return CharType.CJK_CHAR 129 | } 130 | // CJK Compatibility Forms 131 | if (char.match(/[\uFE30-\uFE4F]/) != null) { 132 | return CharType.CJK_CHAR 133 | } 134 | // CJK Radicals Supplement 135 | if (char.match(/[\u2E80-\u2EFF]/) != null) { 136 | return CharType.CJK_CHAR 137 | } 138 | // Private Use Area (part) 139 | if (char.match(/[\uE815-\uE864]/) != null) { 140 | return CharType.CJK_CHAR 141 | } 142 | // CJK Unified Ideographs Extension B 143 | if (char.match(/[\u{20000}-\u{2A6DF}]/u) != null) { 144 | return CharType.CJK_CHAR 145 | } 146 | // CJK Compatibility Ideographs Supplement 147 | if (char.match(/[\u{2F800}-\u{2FA1F}]/u) != null) { 148 | return CharType.CJK_CHAR 149 | } 150 | 151 | // CJK Symbols and Punctuation 152 | if (char.match(/[\u3000-\u303F]/) != null) { 153 | return CharType.FULLWIDTH_OTHER_PUNCTUATION 154 | } 155 | 156 | return CharType.UNKNOWN 157 | } 158 | -------------------------------------------------------------------------------- /src/parser/index.ts: -------------------------------------------------------------------------------- 1 | export * from './types.js' 2 | export { ParseResult, MutableParseResult } from './parse.js' 3 | export { Handler } from './travel.js' 4 | 5 | export { checkCharType } from './char.js' 6 | export { parse, toMutableResult } from './parse.js' 7 | export { travel } from './travel.js' 8 | -------------------------------------------------------------------------------- /src/parser/messages.ts: -------------------------------------------------------------------------------- 1 | export const BRACKET_NOT_CLOSED = '括号未闭合' 2 | export const BRACKET_NOT_OPEN = '括号未匹配' 3 | 4 | export const QUOTATION_NOT_CLOSED = '引号未闭合' 5 | export const QUOTATION_NOT_OPEN = '引号未匹配' 6 | -------------------------------------------------------------------------------- /src/parser/parse.ts: -------------------------------------------------------------------------------- 1 | import { Validation } from '../report.js' 2 | import { checkCharType } from './char.js' 3 | import { 4 | CharType, 5 | isLetterType, 6 | isPunctuationType, 7 | Mark, 8 | MutableMark, 9 | MarkMap, 10 | MarkSideType, 11 | MarkType, 12 | MutableSingleToken, 13 | MutableGroupToken, 14 | MutableToken, 15 | Token, 16 | GroupToken 17 | } from './types.js' 18 | import { 19 | handleLetter, 20 | handlePunctuation, 21 | appendValue, 22 | addRawContent, 23 | addHyperToken, 24 | finalizeLastToken, 25 | getConnectingSpaceLength, 26 | getHyperMarkMap, 27 | getPreviousToken, 28 | initNewStatus, 29 | isShorthand, 30 | handleErrors 31 | } from './util.js' 32 | import { Options as RuleOptions } from '../rules/util.js' 33 | 34 | export type ParseStatus = { 35 | lastToken?: Token 36 | lastGroup?: GroupToken 37 | lastMark?: Mark 38 | 39 | tokens: GroupToken 40 | marks: Mark[] 41 | groups: GroupToken[] 42 | 43 | markStack: Mark[] 44 | groupStack: GroupToken[] 45 | 46 | errors: Validation[] 47 | } 48 | 49 | export type ParseResult = { 50 | tokens: GroupToken 51 | groups: GroupToken[] 52 | marks: Mark[] 53 | errors: Validation[] 54 | } 55 | 56 | export type MutableParseResult = { 57 | tokens: MutableGroupToken 58 | groups: MutableGroupToken[] 59 | marks: MutableMark[] 60 | errors: Validation[] 61 | } 62 | 63 | /** 64 | * Parse a string into several tokens. 65 | * - half-width content x {1,n} (English words) 66 | * - full-width content x {1,n} (Chinese sentenses without punctuations in between) 67 | * - half-width punctuation -> halfwidth pause or stop punctuation mark 68 | * - width-width punctuation -> fullwidth pause or stop punctuation mark 69 | * - punctuation pair as special marks: brackets -> bracket 70 | * - punctuation pair as a group: quotations -> quotation or book title mark 71 | * - -> halfwidth/fullwidth other punctuation mark 72 | * Besides them there are some special tokens 73 | * - content-hyper from hyperMarks as input 74 | * For spaces they would be included as one or multiple successive spaces in 75 | * - afterSpace after a token or 76 | * - innerSpaceBefore after the left quotation of a group 77 | */ 78 | export const parse = (str: string, hyperMarks: Mark[] = []): ParseResult => { 79 | // init status and hyper marks 80 | const status: ParseStatus = initNewStatus(str, hyperMarks) 81 | const hyperMarkMap: MarkMap = getHyperMarkMap(hyperMarks) 82 | 83 | // travel every character in the string 84 | for (let i = 0; i < str.length; i++) { 85 | const char = str[i] 86 | const type = checkCharType(char) 87 | const hyperMark = hyperMarkMap[i] 88 | 89 | // finally get `status.marks` and `status.lastGroup` as the top-level tokens 90 | // - hyper marks: finalize current token -> add mark 91 | // - space: end current -> move forward -> record space beside 92 | // - punctuation: whether start/end a mark or group, or just add a normal one 93 | // - content: whether start a new one or append into the current one 94 | if (hyperMark) { 95 | // end the last unfinished token 96 | finalizeLastToken(status, i) 97 | // for hyper mark without startValue 98 | delete hyperMarkMap[i] 99 | // check the next token 100 | // - if the mark type is raw 101 | // - append next token 102 | // - else (the mark type is hyper) 103 | // - start: append token 104 | // - end hyper mark: append token, append mark 105 | if (hyperMark.type === MarkType.RAW) { 106 | addRawContent( 107 | status, 108 | i, 109 | str.substring(hyperMark.startIndex, hyperMark.endIndex) 110 | ) 111 | i = hyperMark.endIndex - 1 112 | } else { 113 | if (i === hyperMark.startIndex) { 114 | addHyperToken( 115 | status, 116 | i, 117 | hyperMark, 118 | hyperMark.startValue, 119 | MarkSideType.LEFT 120 | ) 121 | i += hyperMark.startValue.length - 1 122 | } else if (i === hyperMark.endIndex) { 123 | addHyperToken( 124 | status, 125 | i, 126 | hyperMark, 127 | hyperMark.endValue, 128 | MarkSideType.RIGHT 129 | ) 130 | i += hyperMark.endValue.length - 1 131 | } 132 | } 133 | } else if (type === CharType.SPACE) { 134 | // end the last unfinished token 135 | // jump to the next non-space char 136 | // record the last space 137 | // - space after a token 138 | // - inner space before a group 139 | finalizeLastToken(status, i) 140 | if (status.lastGroup) { 141 | const spaceLength = getConnectingSpaceLength(str, i) 142 | const spaces = str.substring(i, i + spaceLength) 143 | if (status.lastGroup.length) { 144 | const lastToken = getPreviousToken(status) 145 | if (lastToken) { 146 | lastToken.spaceAfter = spaces 147 | } 148 | } else { 149 | status.lastGroup.innerSpaceBefore = spaces 150 | } 151 | if (spaceLength - 1 > 0) { 152 | i += spaceLength - 1 153 | } 154 | } 155 | } else if (isShorthand(str, status, i, char)) { 156 | appendValue(status, char) 157 | } else if (isPunctuationType(type)) { 158 | handlePunctuation(i, char, type, status) 159 | } else if (isLetterType(type)) { 160 | handleLetter(i, char, type, status) 161 | } else if (type === CharType.EMPTY) { 162 | // Nothing 163 | } else { 164 | handleLetter(i, char, CharType.WESTERN_LETTER, status) 165 | } 166 | } 167 | finalizeLastToken(status, str.length) 168 | 169 | // handle all the unmatched parsing tokens 170 | handleErrors(status) 171 | 172 | return { 173 | tokens: status.tokens, 174 | groups: status.groups, 175 | marks: status.marks, 176 | errors: status.errors 177 | } 178 | } 179 | 180 | const toMutableToken = (token: Token): MutableToken => { 181 | if (Array.isArray(token)) { 182 | const mutableToken: MutableGroupToken = token as MutableGroupToken 183 | mutableToken.modifiedType = token.type 184 | mutableToken.modifiedValue = token.value 185 | mutableToken.modifiedSpaceAfter = token.spaceAfter 186 | mutableToken.modifiedStartValue = token.startValue 187 | mutableToken.modifiedEndValue = token.endValue 188 | mutableToken.modifiedInnerSpaceBefore = token.innerSpaceBefore 189 | mutableToken.validations = [] 190 | token.forEach(toMutableToken) 191 | return mutableToken 192 | } else { 193 | const mutableToken: MutableSingleToken = token as MutableSingleToken 194 | mutableToken.modifiedType = token.type 195 | mutableToken.modifiedValue = token.value 196 | mutableToken.modifiedSpaceAfter = token.spaceAfter 197 | mutableToken.validations = [] 198 | return mutableToken 199 | } 200 | } 201 | 202 | const toMutableMark = (mark: Mark): MutableMark => { 203 | const mutableMark: MutableMark = mark as MutableMark 204 | mutableMark.modifiedStartValue = mark.startValue 205 | mutableMark.modifiedEndValue = mark.endValue 206 | return mutableMark 207 | } 208 | 209 | export const toMutableResult = ( 210 | result: ParseResult, 211 | options: RuleOptions = {} 212 | ): MutableParseResult => { 213 | if (!options.noSinglePair) { 214 | result.errors.length = 0 215 | } 216 | 217 | toMutableToken(result.tokens) 218 | result.marks.forEach(toMutableMark) 219 | return result as MutableParseResult 220 | } 221 | -------------------------------------------------------------------------------- /src/parser/travel.ts: -------------------------------------------------------------------------------- 1 | import { MutableGroupToken, GroupToken, MutableToken, Token } from './types.js' 2 | 3 | export type Handler = ( 4 | token: MutableToken | Token, 5 | index: number, 6 | group: MutableGroupToken | GroupToken 7 | ) => void 8 | 9 | export const travel = ( 10 | group: MutableGroupToken | GroupToken, 11 | handler: Handler 12 | ): void => { 13 | for (let i = 0; i < group.length; i++) { 14 | const token = group[i] 15 | handler(token, i, group) 16 | if (Array.isArray(token)) { 17 | travel(token, handler) 18 | } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/parser/types.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * This file contains the types for the parser. 5 | * 6 | * - Chars 7 | * - Pairs 8 | * - Marks 9 | * - Tokens 10 | */ 11 | 12 | import { Validation } from '../report.js' 13 | 14 | // Char 15 | 16 | export enum CharType { 17 | EMPTY = 'empty', 18 | 19 | SPACE = 'space', 20 | 21 | WESTERN_LETTER = 'western-letter', 22 | CJK_CHAR = 'cjk-char', 23 | 24 | // periods, commas, secondary commas, colons, semicolons, exclamation marks, question marks, etc. 25 | HALFWIDTH_PAUSE_OR_STOP = 'halfwidth-pause-or-stop', 26 | FULLWIDTH_PAUSE_OR_STOP = 'fullwidth-pause-or-stop', 27 | 28 | // single, double, corner, white corner 29 | // + book title marks 30 | // left x right 31 | HALFWIDTH_QUOTATION = 'halfwidth-quotation', 32 | FULLWIDTH_QUOTATION = 'fullwidth-quotation', 33 | 34 | // parentheses 35 | HALFWIDTH_BRACKET = 'halfwidth-bracket', 36 | FULLWIDTH_BRACKET = 'fullwidth-bracket', 37 | 38 | // // parenthesis, black lenticular brackets, white lenticular brackets, 39 | // // square brackets, tortoise shell brackets, curly brackets 40 | // // left x right 41 | // PARENTHESIS = 'parenthesis', 42 | // // double angle brackets, angle brackets 43 | // // left x right 44 | // BOOK_TITLE_MARK = 'book-title', 45 | 46 | // dashes, ellipsis, connector marks, interpuncts, proper noun marks, solidi, etc. 47 | HALFWIDTH_OTHER_PUNCTUATION = 'halfwidth-other-punctuation', 48 | FULLWIDTH_OTHER_PUNCTUATION = 'fullwidth-other-punctuation', 49 | 50 | // // ⁈, ⁇, ‼, ⁉ 51 | // SPECIAL_PUNCTUATION_MARK = 'special-punctuation', 52 | 53 | UNKNOWN = 'unknown' 54 | } 55 | 56 | type CharSet = { 57 | [setName: string]: string 58 | } 59 | 60 | export const BRACKET_CHAR_SET: CharSet = { 61 | left: '([{(〔[{', 62 | right: ')]})〕]}' 63 | } 64 | export const QUOTATION_CHAR_SET: CharSet = { 65 | left: `“‘《〈『「【〖`, 66 | right: `”’》〉』」】〗`, 67 | neutral: `'"` 68 | } 69 | export const SHORTHAND_CHARS = `'’` 70 | export const SHORTHAND_PAIR_SET: CharSet = { 71 | [`'`]: `'`, 72 | [`’`]: `‘` 73 | } 74 | 75 | const FULLWIDTH_PAIRS = `“”‘’()〔〕[]{}《》〈〉「」『』【】〖〗` 76 | 77 | export const isFullwidthPair = (str: string): boolean => 78 | FULLWIDTH_PAIRS.indexOf(str) >= 0 79 | 80 | // Reusable 81 | 82 | type Pair = { 83 | startIndex: number 84 | startValue: string 85 | endIndex: number 86 | endValue: string 87 | } 88 | 89 | type MutablePair = { 90 | modifiedStartValue: string 91 | ignoredStartValue?: string 92 | modifiedEndValue: string 93 | ignoredEndValue?: string 94 | } 95 | 96 | // Mark 97 | 98 | /** 99 | * Marks are hyper info, including content and wrappers. 100 | * They are categorized by parsers, not by usage. 101 | */ 102 | export enum MarkType { 103 | /** 104 | * Brackets 105 | */ 106 | BRACKETS = 'brackets', 107 | /** 108 | * Inline Markdown marks 109 | */ 110 | HYPER = 'hyper', 111 | /** 112 | * - \`xxx\` 113 | * - <code>xxx</code> 114 | * - Hexo/VuePress container 115 | * - Other html code 116 | */ 117 | RAW = 'raw' 118 | } 119 | 120 | export enum MarkSideType { 121 | LEFT = 'left', 122 | RIGHT = 'right' 123 | } 124 | 125 | export type Mark = Pair & { 126 | type: MarkType 127 | meta?: string // TODO: AST type enum 128 | } 129 | 130 | export type RawMark = Mark & { 131 | code: MarkSideType 132 | rightPair?: RawMark 133 | } 134 | 135 | export type MutableMark = Mark & MutablePair 136 | 137 | export type MutableRawMark = RawMark & MutablePair 138 | 139 | export type MarkMap = { 140 | [index: number]: Mark 141 | } 142 | 143 | export const isRawMark = (mark: Mark): mark is RawMark => { 144 | return (mark as RawMark).code !== undefined 145 | } 146 | 147 | // Token type 148 | 149 | export type LetterType = CharType.WESTERN_LETTER | CharType.CJK_CHAR 150 | 151 | export type PauseOrStopType = 152 | | CharType.HALFWIDTH_PAUSE_OR_STOP 153 | | CharType.FULLWIDTH_PAUSE_OR_STOP 154 | 155 | export type QuotationType = 156 | | CharType.HALFWIDTH_QUOTATION 157 | | CharType.FULLWIDTH_QUOTATION 158 | 159 | export type BracketType = 160 | | CharType.HALFWIDTH_BRACKET 161 | | CharType.FULLWIDTH_BRACKET 162 | 163 | export type OtherPunctuationType = 164 | | CharType.HALFWIDTH_OTHER_PUNCTUATION 165 | | CharType.FULLWIDTH_OTHER_PUNCTUATION 166 | 167 | export type SinglePunctuationType = PauseOrStopType | OtherPunctuationType 168 | 169 | export type PunctuationType = SinglePunctuationType | BracketType 170 | 171 | export type NormalContentTokenType = LetterType | SinglePunctuationType 172 | 173 | export type HalfwidthPuntuationType = 174 | | CharType.HALFWIDTH_PAUSE_OR_STOP 175 | | CharType.HALFWIDTH_QUOTATION 176 | | CharType.HALFWIDTH_BRACKET 177 | | CharType.HALFWIDTH_OTHER_PUNCTUATION 178 | 179 | export type FullwidthPuntuationType = 180 | | CharType.FULLWIDTH_PAUSE_OR_STOP 181 | | CharType.FULLWIDTH_QUOTATION 182 | | CharType.FULLWIDTH_BRACKET 183 | | CharType.FULLWIDTH_OTHER_PUNCTUATION 184 | 185 | export type HalfwidthTokenType = 186 | | CharType.WESTERN_LETTER 187 | | FullwidthPuntuationType 188 | 189 | export type FullwidthTokenType = CharType.CJK_CHAR | FullwidthPuntuationType 190 | 191 | /** 192 | * TODO: paired html tags should be hyper mark 193 | */ 194 | export enum HyperTokenType { 195 | /** 196 | * Brackets 197 | */ 198 | BRACKET_MARK = 'bracket-mark', 199 | /** 200 | * Inline Markdown marks 201 | */ 202 | HYPER_MARK = 'hyper-mark', 203 | 204 | /** 205 | * - \`xxx\` 206 | * - <code>xxx</code> 207 | */ 208 | CODE_CONTENT = 'code-content', 209 | /** 210 | * - Hexo/VuePress container 211 | * - Other html code 212 | */ 213 | HYPER_CONTENT = 'hyper-content', 214 | 215 | /** 216 | * Unpaired brackets/quotations 217 | */ 218 | UNMATCHED = 'unmatched', 219 | /** 220 | * For indeterminate tokens 221 | */ 222 | INDETERMINATED = 'indeterminated' 223 | } 224 | 225 | export enum GroupTokenType { 226 | GROUP = 'group' 227 | } 228 | 229 | export type SingleTokenType = NormalContentTokenType | HyperTokenType 230 | 231 | export type TokenType = SingleTokenType | GroupTokenType 232 | 233 | export type NonTokenCharType = 234 | | BracketType 235 | | QuotationType 236 | | CharType.EMPTY 237 | | CharType.SPACE 238 | | CharType.UNKNOWN 239 | 240 | export type GeneralType = TokenType | NonTokenCharType 241 | 242 | export const getHalfwidthTokenType = (type: TokenType): TokenType => { 243 | switch (type) { 244 | case CharType.CJK_CHAR: 245 | return CharType.WESTERN_LETTER 246 | case CharType.FULLWIDTH_PAUSE_OR_STOP: 247 | return CharType.HALFWIDTH_PAUSE_OR_STOP 248 | case CharType.FULLWIDTH_OTHER_PUNCTUATION: 249 | return CharType.HALFWIDTH_OTHER_PUNCTUATION 250 | } 251 | return type 252 | } 253 | 254 | export const getFullwidthTokenType = (type: TokenType): TokenType => { 255 | switch (type) { 256 | case CharType.WESTERN_LETTER: 257 | return CharType.CJK_CHAR 258 | case CharType.HALFWIDTH_PAUSE_OR_STOP: 259 | return CharType.FULLWIDTH_PAUSE_OR_STOP 260 | case CharType.HALFWIDTH_OTHER_PUNCTUATION: 261 | return CharType.FULLWIDTH_OTHER_PUNCTUATION 262 | } 263 | return type 264 | } 265 | 266 | export type NonCodeVisibleTokenType = 267 | | NormalContentTokenType 268 | | HyperTokenType.BRACKET_MARK 269 | | GroupTokenType.GROUP 270 | 271 | export type VisibleTokenType = 272 | | NonCodeVisibleTokenType 273 | | HyperTokenType.CODE_CONTENT 274 | 275 | export type InvisibleTokenType = HyperTokenType.HYPER_MARK 276 | 277 | export type VisibilityUnknownTokenType = HyperTokenType.HYPER_CONTENT 278 | 279 | export const isLetterType = (type: GeneralType): type is LetterType => { 280 | return type === CharType.WESTERN_LETTER || type === CharType.CJK_CHAR 281 | } 282 | 283 | export const isPauseOrStopType = ( 284 | type: GeneralType 285 | ): type is PauseOrStopType => { 286 | return ( 287 | type === CharType.HALFWIDTH_PAUSE_OR_STOP || 288 | type === CharType.FULLWIDTH_PAUSE_OR_STOP 289 | ) 290 | } 291 | 292 | export const isQuotationType = (type: GeneralType): type is QuotationType => { 293 | return ( 294 | type === CharType.HALFWIDTH_QUOTATION || 295 | type === CharType.FULLWIDTH_QUOTATION 296 | ) 297 | } 298 | 299 | export const isBracketType = (type: GeneralType): type is BracketType => { 300 | return ( 301 | type === CharType.HALFWIDTH_BRACKET || type === CharType.FULLWIDTH_BRACKET 302 | ) 303 | } 304 | 305 | export const isOtherPunctuationType = ( 306 | type: GeneralType 307 | ): type is OtherPunctuationType => { 308 | return ( 309 | type === CharType.HALFWIDTH_OTHER_PUNCTUATION || 310 | type === CharType.FULLWIDTH_OTHER_PUNCTUATION 311 | ) 312 | } 313 | 314 | export const isSinglePunctuationType = ( 315 | type: GeneralType 316 | ): type is SinglePunctuationType => { 317 | return isPauseOrStopType(type) || isOtherPunctuationType(type) 318 | } 319 | 320 | export const isPunctuationType = ( 321 | type: GeneralType 322 | ): type is PunctuationType => { 323 | return ( 324 | isPauseOrStopType(type) || 325 | isQuotationType(type) || 326 | isBracketType(type) || 327 | isOtherPunctuationType(type) 328 | ) 329 | } 330 | 331 | export const isHalfwidthPunctuationType = ( 332 | type: GeneralType 333 | ): type is HalfwidthPuntuationType => { 334 | return ( 335 | type === CharType.HALFWIDTH_PAUSE_OR_STOP || 336 | type === CharType.HALFWIDTH_QUOTATION || 337 | type === CharType.HALFWIDTH_BRACKET || 338 | type === CharType.HALFWIDTH_OTHER_PUNCTUATION 339 | ) 340 | } 341 | 342 | export const isHalfwidthType = ( 343 | type: GeneralType 344 | ): type is HalfwidthTokenType => { 345 | return type === CharType.WESTERN_LETTER || isHalfwidthPunctuationType(type) 346 | } 347 | 348 | export const isFullwidthPunctuationType = ( 349 | type: GeneralType 350 | ): type is FullwidthPuntuationType => { 351 | return ( 352 | type === CharType.FULLWIDTH_PAUSE_OR_STOP || 353 | type === CharType.FULLWIDTH_QUOTATION || 354 | type === CharType.FULLWIDTH_BRACKET || 355 | type === CharType.FULLWIDTH_OTHER_PUNCTUATION 356 | ) 357 | } 358 | 359 | export const isFullwidthType = ( 360 | type: GeneralType 361 | ): type is FullwidthTokenType => { 362 | return type === CharType.CJK_CHAR || isFullwidthPunctuationType(type) 363 | } 364 | 365 | export const isNonCodeVisibleType = (type: GeneralType): type is LetterType => { 366 | return ( 367 | isLetterType(type) || 368 | isSinglePunctuationType(type) || 369 | type === HyperTokenType.BRACKET_MARK || 370 | type === GroupTokenType.GROUP 371 | ) 372 | } 373 | 374 | export const isVisibleType = (type: GeneralType): type is VisibleTokenType => { 375 | return isNonCodeVisibleType(type) || type === HyperTokenType.CODE_CONTENT 376 | } 377 | 378 | export const isInvisibleType = ( 379 | type: GeneralType 380 | ): type is InvisibleTokenType => { 381 | // OTHERS? 382 | return type === HyperTokenType.HYPER_MARK 383 | } 384 | 385 | export const isVisibilityUnknownType = ( 386 | type: GeneralType 387 | ): type is VisibilityUnknownTokenType => { 388 | return type === HyperTokenType.HYPER_CONTENT 389 | } 390 | 391 | // Token 392 | 393 | type CommonToken = { 394 | index: number 395 | length: number 396 | 397 | value: string 398 | spaceAfter: string 399 | 400 | mark?: Mark 401 | markSide?: MarkSideType 402 | } 403 | 404 | type MutableCommonToken = CommonToken & { 405 | modifiedValue: string 406 | ignoredValue?: string 407 | modifiedSpaceAfter: string 408 | ignoredSpaceAfter?: string 409 | validations: Validation[] 410 | } 411 | 412 | export type SingleToken = CommonToken & { 413 | type: SingleTokenType 414 | } 415 | 416 | export type MutableSingleToken = MutableCommonToken & { 417 | type: SingleTokenType 418 | modifiedType: SingleTokenType 419 | ignoredType?: SingleTokenType 420 | } 421 | 422 | export type GroupToken = Array & 423 | CommonToken & 424 | Pair & { 425 | type: GroupTokenType 426 | innerSpaceBefore: string 427 | } 428 | 429 | export type MutableGroupToken = Array & 430 | MutableCommonToken & 431 | Pair & 432 | MutablePair & { 433 | type: GroupTokenType 434 | modifiedType: GroupTokenType 435 | ignoredType?: GroupTokenType 436 | innerSpaceBefore: string 437 | modifiedInnerSpaceBefore: string 438 | ignoredInnerSpaceBefore?: string 439 | } 440 | 441 | export type Token = SingleToken | GroupToken 442 | 443 | export type MutableToken = MutableSingleToken | MutableGroupToken 444 | -------------------------------------------------------------------------------- /src/rc/index.ts: -------------------------------------------------------------------------------- 1 | import { Options } from '../options.js' 2 | 3 | import { resolve } from 'path' 4 | import fs from 'fs' 5 | 6 | import { env } from '../report.js' 7 | 8 | // to walk around https://github.com/davidmyersdev/vite-plugin-node-polyfills/issues/82 9 | const { existsSync, readFileSync } = fs || {} 10 | 11 | type PathResult = { 12 | config: string | undefined 13 | fileIgnore: string | undefined 14 | caseIgnore: string | undefined 15 | } 16 | 17 | const DEFAULT_CONFIG_PATH = '.zhlintrc' 18 | const DEFAULT_FILE_IGNORE_PATH = '.zhlintignore' 19 | const DEFAULT_CASE_IGNORE_PATH = '.zhlintcaseignore' 20 | 21 | const resolvePath = ( 22 | dir: string, 23 | config: string, 24 | fileIgnore: string, 25 | caseIgnore: string, 26 | logger: Console = env.defaultLogger 27 | ): PathResult => { 28 | const result: PathResult = { 29 | config: undefined, 30 | fileIgnore: undefined, 31 | caseIgnore: undefined 32 | } 33 | 34 | dir = resolve(dir ?? '.') 35 | if (!existsSync(dir)) { 36 | logger.log(`"${dir}" does not exist.`) 37 | return result 38 | } 39 | 40 | config = resolve(dir, config ?? DEFAULT_CONFIG_PATH) 41 | if (existsSync(config)) { 42 | result.config = config 43 | } else { 44 | logger.log( 45 | `Config file "${config}" does not exist. Will proceed as default.` 46 | ) 47 | } 48 | 49 | fileIgnore = resolve(dir, fileIgnore ?? DEFAULT_FILE_IGNORE_PATH) 50 | if (existsSync(fileIgnore)) { 51 | result.fileIgnore = fileIgnore 52 | } else { 53 | logger.log( 54 | `Global ignored cases file "${fileIgnore}" does not exist. Will proceed as none.` 55 | ) 56 | } 57 | 58 | caseIgnore = resolve(dir, caseIgnore ?? DEFAULT_CASE_IGNORE_PATH) 59 | if (existsSync(caseIgnore)) { 60 | result.caseIgnore = caseIgnore 61 | } else { 62 | logger.log( 63 | `Global ignored cases file "${caseIgnore}" does not exist. Will proceed as none.` 64 | ) 65 | } 66 | 67 | return result 68 | } 69 | 70 | export type Config = { 71 | preset?: string 72 | rules?: Options['rules'] 73 | hyperParsers?: string[] 74 | fileIgnores?: string[] 75 | caseIgnores?: string[] 76 | } 77 | 78 | // eslint-disable-next-line @typescript-eslint/no-explicit-any 79 | const readJSONSync = (filepath: string): any => { 80 | const output = readFileSync(filepath, { encoding: 'utf8' }) 81 | return JSON.parse(output) 82 | } 83 | 84 | const resolveConfig = ( 85 | normalizedConfigPath: string | undefined, 86 | normalizedFileIgnorePath: string | undefined, 87 | normalizedCaseIgnorePath: string | undefined, 88 | logger: Console = env.defaultLogger 89 | ): Config => { 90 | const result: Config = { 91 | preset: 'default' 92 | } 93 | 94 | if (normalizedConfigPath) { 95 | try { 96 | const config = readJSONSync(normalizedConfigPath) as Config 97 | if (typeof config.preset === 'string') { 98 | result.preset = config.preset 99 | } 100 | if (typeof config.rules === 'object') { 101 | result.rules = config.rules 102 | } 103 | if (Array.isArray(config.hyperParsers)) { 104 | result.hyperParsers = config.hyperParsers 105 | } 106 | if (Array.isArray(config.fileIgnores)) { 107 | result.fileIgnores = config.fileIgnores 108 | } 109 | if (Array.isArray(config.caseIgnores)) { 110 | result.caseIgnores = config.caseIgnores 111 | } 112 | } catch (error) { 113 | logger.log( 114 | `Failed to read "${normalizedConfigPath}": ${(error as Error).message}` 115 | ) 116 | } 117 | } 118 | 119 | if (normalizedFileIgnorePath) { 120 | try { 121 | const fileIgnores = readFileSync(normalizedFileIgnorePath, { 122 | encoding: 'utf8' 123 | }) 124 | fileIgnores 125 | .split(/\n/) 126 | .map((x) => x.trim()) 127 | .forEach((x) => { 128 | if (!x) { 129 | return 130 | } 131 | if (!result.fileIgnores) { 132 | result.fileIgnores = [] 133 | } 134 | if (result.fileIgnores.indexOf(x) === -1) { 135 | result.fileIgnores.push(x) 136 | } 137 | }) 138 | } catch (error) { 139 | logger.log( 140 | `Failed to read "${normalizedFileIgnorePath}": ${(error as Error).message}` 141 | ) 142 | } 143 | } 144 | 145 | if (normalizedCaseIgnorePath) { 146 | try { 147 | const caseIgnores = readFileSync(normalizedCaseIgnorePath, { 148 | encoding: 'utf8' 149 | }) 150 | caseIgnores 151 | .split(/\n/) 152 | .map((x) => x.trim()) 153 | .forEach((x) => { 154 | if (!x) { 155 | return 156 | } 157 | if (!result.caseIgnores) { 158 | result.caseIgnores = [] 159 | } 160 | if (result.caseIgnores.indexOf(x) === -1) { 161 | result.caseIgnores.push(x) 162 | } 163 | }) 164 | } catch (error) { 165 | logger.log( 166 | `Failed to read "${normalizedCaseIgnorePath}": ${(error as Error).message}` 167 | ) 168 | } 169 | } 170 | 171 | return result 172 | } 173 | 174 | export const readRc = ( 175 | dir: string, 176 | config: string, 177 | fileIgnore: string, 178 | caseIgnore: string, 179 | logger: Console = env.defaultLogger 180 | ): Config => { 181 | const { 182 | config: normalizedConfigPath, 183 | fileIgnore: normalizedFileIgnorePath, 184 | caseIgnore: normalizedCaseIgnorePath 185 | } = resolvePath(dir, config, fileIgnore, caseIgnore, logger) 186 | return resolveConfig( 187 | normalizedConfigPath, 188 | normalizedFileIgnorePath, 189 | normalizedCaseIgnorePath, 190 | logger 191 | ) 192 | } 193 | -------------------------------------------------------------------------------- /src/replace-block.ts: -------------------------------------------------------------------------------- 1 | import type { ParsedBlock } from './hypers/types.js' 2 | 3 | export type Piece = ParsedBlock | NonBlock 4 | 5 | export type NonBlock = { 6 | nonBlock: true 7 | start: number 8 | end: number 9 | value: string 10 | } 11 | 12 | export const isBlock = (piece: Piece): piece is ParsedBlock => { 13 | return !('nonBlock' in piece) 14 | } 15 | 16 | const replaceBlocks = ( 17 | str: string, 18 | blocks: ParsedBlock[] 19 | ): { 20 | value: string 21 | pieces: Piece[] 22 | } => { 23 | if (blocks.length === 0) { 24 | return { 25 | value: str, 26 | pieces: [{ value: str, start: 0, end: str.length, nonBlock: true }] 27 | } 28 | } 29 | 30 | const pieces = blocks.reduce((pieces: Piece[], block, index) => { 31 | const { start, end } = block 32 | const lastPiece = pieces[pieces.length - 1] 33 | const nextStart = lastPiece ? lastPiece.end : 0 34 | 35 | // non-block piece before the current block. 36 | if (nextStart < start) { 37 | const nonBlockPiece: NonBlock = { 38 | nonBlock: true, 39 | start: nextStart, 40 | end: start, 41 | value: '' 42 | } 43 | nonBlockPiece.value = str.substring( 44 | nonBlockPiece.start, 45 | nonBlockPiece.end 46 | ) 47 | pieces.push(nonBlockPiece) 48 | } 49 | 50 | // The current block piece. 51 | pieces.push(block) 52 | 53 | // Tailing non-block piece. 54 | if (index === blocks.length - 1 && end !== str.length) { 55 | const nonBlockPiece: NonBlock = { 56 | nonBlock: true, 57 | start: end, 58 | end: str.length, 59 | value: '' 60 | } 61 | nonBlockPiece.value = str.substring( 62 | nonBlockPiece.start, 63 | nonBlockPiece.end 64 | ) 65 | pieces.push(nonBlockPiece) 66 | } 67 | return pieces 68 | }, []) 69 | 70 | const value = pieces.map(({ value }) => value).join('') 71 | 72 | return { value, pieces } 73 | } 74 | 75 | export default replaceBlocks 76 | -------------------------------------------------------------------------------- /src/report.ts: -------------------------------------------------------------------------------- 1 | import chalk from 'chalk' 2 | import { 3 | CharType, 4 | checkCharType, 5 | isFullwidthPunctuationType, 6 | isHalfwidthPunctuationType 7 | } from './parser/index.js' 8 | 9 | export const env: { 10 | stdout: NodeJS.WritableStream 11 | stderr: NodeJS.WritableStream 12 | // eslint-disable-next-line @typescript-eslint/no-explicit-any 13 | defaultLogger: Console 14 | } = { 15 | stdout: globalThis?.process?.stdout, 16 | stderr: globalThis?.process?.stderr, 17 | defaultLogger: console 18 | } 19 | 20 | if (globalThis.__DEV__) { 21 | // eslint-disable-next-line @typescript-eslint/no-var-requires 22 | const fs = require('fs') 23 | // eslint-disable-next-line @typescript-eslint/no-var-requires 24 | const { Console: NativeConsole } = require('console') 25 | env.stdout = fs.createWriteStream('./stdout.log', { encoding: 'utf-8' }) 26 | env.stderr = fs.createWriteStream('./stderr.log', { encoding: 'utf-8' }) 27 | env.defaultLogger = new NativeConsole(env.stdout, env.stderr) 28 | } 29 | 30 | type Position = { 31 | offset: number 32 | row: number 33 | column: number 34 | line: string 35 | } 36 | 37 | const getPositionByOffset = (str: string, offset: number): Position => { 38 | const rows = str.split('\n') 39 | const rowLengthList = rows.map((substr) => substr.length) 40 | const position = { 41 | offset, 42 | row: 0, 43 | column: 0, 44 | line: '' 45 | } 46 | while (position.offset >= 0 && rows.length) { 47 | position.row++ 48 | position.column = position.offset 49 | position.line = rows.shift() || '' 50 | position.offset -= (rowLengthList.shift() || 0) + 1 51 | } 52 | return position 53 | } 54 | 55 | export enum ValidationTarget { 56 | VALUE = 'value', 57 | START_VALUE = 'startValue', 58 | END_VALUE = 'endValue', 59 | SPACE_AFTER = 'spaceAfter', 60 | INNER_SPACE_BEFORE = 'innerSpaceBefore' 61 | } 62 | 63 | export type Validation = { 64 | // the type and value of message 65 | name: string 66 | message: string 67 | 68 | // position of the token 69 | index: number 70 | length: number 71 | 72 | // which part of the token the error comes from 73 | target: ValidationTarget 74 | } 75 | 76 | const adjustedFullwidthPunctuations = `“”‘’` 77 | 78 | const generateMarker = (str: string, index: number): string => { 79 | const prefix = str.substring(0, index) 80 | let fullwidthCount = 0 81 | let halfwidthCount = 0 82 | for (let i = 0; i < prefix.length; i++) { 83 | const charType = checkCharType(prefix[i]) 84 | if ( 85 | charType === CharType.CJK_CHAR || 86 | (isFullwidthPunctuationType(charType) && 87 | adjustedFullwidthPunctuations.indexOf(prefix[i]) === -1) 88 | ) { 89 | fullwidthCount++ 90 | } else if ( 91 | charType === CharType.WESTERN_LETTER || 92 | (isHalfwidthPunctuationType(charType) && 93 | adjustedFullwidthPunctuations.indexOf(prefix[i]) !== -1) || 94 | charType === CharType.SPACE 95 | ) { 96 | halfwidthCount++ 97 | } 98 | } 99 | return ( 100 | ' '.repeat(halfwidthCount) + 101 | ' '.repeat(fullwidthCount) + 102 | `${chalk.red('^')}` 103 | ) 104 | } 105 | 106 | export const reportItem = ( 107 | file: string | undefined = '', 108 | str: string, 109 | validations: Validation[], 110 | logger = env.defaultLogger 111 | ) => { 112 | validations.forEach(({ index, length, target, message }) => { 113 | // 0. final index and position 114 | const finalIndex = 115 | target === 'spaceAfter' || target === 'endValue' ? index + length : index 116 | const { row, column, line } = getPositionByOffset(str, finalIndex) 117 | 118 | // 1. headline 119 | const fileDisplay = `${chalk.blue.bgWhite(file)}${file ? ':' : ''}` 120 | const positionDisplay = `${chalk.yellow(row)}:${chalk.yellow(column)}` 121 | const headline = `${fileDisplay}${positionDisplay} - ${message}` 122 | 123 | // 2. display fragment 124 | const displayRange = 20 125 | const displayStart = column - displayRange < 0 ? 0 : column - displayRange 126 | const displayEnd = 127 | column + length + displayRange > line.length - 1 128 | ? line.length 129 | : column + length + displayRange 130 | const displayFragment = line 131 | .substring(displayStart, displayEnd) 132 | .replace(/\n/g, '\\n') 133 | 134 | // 3. marker below 135 | const markerBelow = generateMarker(displayFragment, column - displayStart) 136 | 137 | logger.error(`${headline}\n\n${displayFragment}\n${markerBelow}\n`) 138 | }) 139 | } 140 | 141 | export type Result = { 142 | // the basic info and availability of the file 143 | file?: string 144 | disabled: boolean 145 | 146 | // the original content of the file 147 | origin: string 148 | 149 | // all the error messages 150 | validations: Validation[] 151 | } 152 | 153 | export const report = (resultList: Result[], logger = env.defaultLogger) => { 154 | let errorCount = 0 155 | const invalidFiles: string[] = [] 156 | resultList 157 | .filter(({ file, disabled }) => { 158 | if (disabled) { 159 | if (file) { 160 | logger.log(`${chalk.blue.bgWhite(file)}: disabled`) 161 | } else { 162 | logger.log(`disabled`) 163 | } 164 | return false 165 | } 166 | return true 167 | }) 168 | .forEach(({ file, origin, validations }: Result) => { 169 | reportItem(file, origin, validations, logger) 170 | errorCount += validations.length 171 | if (file && validations.length) { 172 | invalidFiles.push(file) 173 | } 174 | }) 175 | if (errorCount) { 176 | const errors: string[] = [] 177 | errors.push('Invalid files:') 178 | errors.push('- ' + invalidFiles.join('\n- ') + '\n') 179 | errors.push(`Found ${errorCount} ${errorCount > 1 ? 'errors' : 'error'}.`) 180 | logger.error(errors.join('\n')) 181 | return 1 182 | } else { 183 | logger.log(`No error found.`) 184 | } 185 | } 186 | -------------------------------------------------------------------------------- /src/rules/case-abbrs.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileoverview 3 | * 4 | * This rule is used to revert changes of abbreviations. 5 | * 6 | * Details: 7 | * - the point is rever the trailing dot 8 | */ 9 | 10 | import { 11 | CharType, 12 | Handler, 13 | MutableGroupToken, 14 | MutableToken 15 | } from '../parser/index.js' 16 | import { ValidationTarget } from '../report.js' 17 | import { 18 | findTokenAfter, 19 | findTokenBefore, 20 | Options, 21 | removeValidationOnTarget 22 | } from './util.js' 23 | 24 | const defaultSkippedAbbrs = [ 25 | 'Mr.', 26 | 'Mrs.', 27 | 'Dr.', 28 | 'Jr.', 29 | 'Sr.', 30 | 'vs.', 31 | 'etc.', 32 | 'i.e.', 33 | 'e.g.', 34 | 'a.k.a.' 35 | ] 36 | 37 | const reverseAbbrsIntoChars = (abbrs: string[]): string[][] => { 38 | return abbrs.map((str) => str.split('.').reverse().slice(1)) 39 | } 40 | 41 | const matchAbbr = ( 42 | token: MutableToken, 43 | group: MutableGroupToken, 44 | reversedAbbrChars: string[][] 45 | ): boolean => { 46 | // find previous token 47 | const tokenBefore = findTokenBefore(group, token) 48 | if (tokenBefore && !tokenBefore.spaceAfter) { 49 | // get the next matching abbr chars by removing the last char and filtering 50 | const matchedAbbrChars = reversedAbbrChars 51 | .filter( 52 | (abbr) => tokenBefore.value && abbr[0].toLowerCase() === tokenBefore.value.toLowerCase() 53 | ) 54 | .map((abbr) => abbr.slice(1)) 55 | 56 | // keep matching until any abbr chars fully matched 57 | // then return true 58 | if (matchedAbbrChars.length) { 59 | const lastMatched = matchedAbbrChars[matchedAbbrChars.length - 1] 60 | if (lastMatched.length) { 61 | const tokenBeforeBefore = findTokenBefore(group, tokenBefore) 62 | if ( 63 | tokenBeforeBefore && 64 | !tokenBeforeBefore.spaceAfter && 65 | tokenBeforeBefore.value === '.' 66 | ) { 67 | const result = matchAbbr(tokenBeforeBefore, group, matchedAbbrChars) 68 | if (result) { 69 | return true 70 | } 71 | } 72 | } else { 73 | return true 74 | } 75 | } 76 | } 77 | 78 | return false 79 | } 80 | 81 | const generateHandler = (options: Options): Handler => { 82 | const reversedAbbrChars = reverseAbbrsIntoChars(options.skipAbbrs || []) 83 | 84 | return (token: MutableToken, _: number, group: MutableGroupToken) => { 85 | // skip non-dot tokens 86 | if (token.value !== '.') { 87 | return 88 | } 89 | 90 | // make sure it's the ending dot of the abbr 91 | const tokenAfter = findTokenAfter(group, token) 92 | if ( 93 | tokenAfter && 94 | tokenAfter.type === CharType.WESTERN_LETTER && 95 | !token.spaceAfter 96 | ) { 97 | return 98 | } 99 | 100 | // keep the dot if the previous tokens match any abbr 101 | if (matchAbbr(token, group, reversedAbbrChars)) { 102 | token.modifiedValue = '.' 103 | token.modifiedType = token.type 104 | removeValidationOnTarget(token, ValidationTarget.VALUE) 105 | } 106 | } 107 | } 108 | 109 | export const defaultConfig: Options = { 110 | skipAbbrs: defaultSkippedAbbrs 111 | } 112 | 113 | export default generateHandler 114 | -------------------------------------------------------------------------------- /src/rules/case-html-entity.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileoverview 3 | * 4 | * This rule is used to revert changes of HTML entities. 5 | * 6 | * Details: 7 | * - to match `&;` 8 | */ 9 | 10 | import { 11 | CharType, 12 | Handler, 13 | MutableGroupToken, 14 | MutableToken 15 | } from '../parser/index.js' 16 | import { ValidationTarget } from '../report.js' 17 | import { 18 | findWrappersBetween, 19 | findNonCodeVisibleTokenAfter, 20 | findTokenAfter, 21 | Options, 22 | removeValidationOnTarget 23 | } from './util.js' 24 | 25 | const generateHandler = (options: Options): Handler => { 26 | options 27 | 28 | return (token: MutableToken, _: number, group: MutableGroupToken) => { 29 | // skip non-& tokens 30 | if (token.value !== '&') { 31 | return 32 | } 33 | 34 | // skip non-half-width-content tokens 35 | const tokenAfter = findTokenAfter(group, token) 36 | if ( 37 | !tokenAfter || 38 | tokenAfter.type !== CharType.WESTERN_LETTER || 39 | token.spaceAfter 40 | ) { 41 | return 42 | } 43 | 44 | // skip non-semicolon tokens 45 | const thirdToken = findTokenAfter(group, tokenAfter) 46 | if (!thirdToken || thirdToken.value !== ';' || tokenAfter.spaceAfter) { 47 | return 48 | } 49 | 50 | // revert & 51 | token.modifiedValue = token.value 52 | token.modifiedType = token.type 53 | token.modifiedSpaceAfter = token.spaceAfter 54 | removeValidationOnTarget(token, ValidationTarget.VALUE) 55 | removeValidationOnTarget(token, ValidationTarget.SPACE_AFTER) 56 | 57 | // revert half-width content 58 | tokenAfter.modifiedValue = tokenAfter.value 59 | tokenAfter.modifiedType = tokenAfter.type 60 | tokenAfter.modifiedSpaceAfter = tokenAfter.spaceAfter 61 | removeValidationOnTarget(tokenAfter, ValidationTarget.VALUE) 62 | removeValidationOnTarget(tokenAfter, ValidationTarget.SPACE_AFTER) 63 | 64 | // revert ; 65 | thirdToken.modifiedValue = thirdToken.value 66 | thirdToken.modifiedType = thirdToken.type 67 | removeValidationOnTarget(thirdToken, ValidationTarget.VALUE) 68 | removeValidationOnTarget(thirdToken, ValidationTarget.SPACE_AFTER) 69 | 70 | const nextToken = findNonCodeVisibleTokenAfter(group, thirdToken) 71 | if (nextToken) { 72 | const { spaceHost } = findWrappersBetween(group, thirdToken, nextToken) 73 | if (spaceHost) { 74 | spaceHost.modifiedSpaceAfter = spaceHost.spaceAfter 75 | removeValidationOnTarget(spaceHost, ValidationTarget.SPACE_AFTER) 76 | } 77 | } 78 | } 79 | } 80 | 81 | export const defaultConfig: Options = {} 82 | 83 | export default generateHandler 84 | -------------------------------------------------------------------------------- /src/rules/case-linebreak.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileoverview 3 | * 4 | * This rule is used to revert changes of spaceAfter with linebreaks. 5 | * And it's compulsory. 6 | */ 7 | 8 | // TODO: better blockquote and indentation handling in markdown 9 | 10 | import { Handler, MutableToken } from '../parser/index.js' 11 | import { ValidationTarget } from '../report.js' 12 | import { Options, removeValidationOnTarget } from './util.js' 13 | 14 | const generateHandler = (options: Options): Handler => { 15 | // do nothing 16 | options 17 | 18 | return (token: MutableToken) => { 19 | if (token.spaceAfter && token.spaceAfter.match(/\n/)) { 20 | removeValidationOnTarget(token, ValidationTarget.SPACE_AFTER) 21 | token.modifiedSpaceAfter = token.spaceAfter 22 | } 23 | } 24 | } 25 | 26 | export const defaultConfig: Options = {} 27 | 28 | export default generateHandler 29 | -------------------------------------------------------------------------------- /src/rules/case-pure-western.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileoverview 3 | * 4 | * This rule is resetting all the validations in sentences which are full of 5 | * western letters and punctuations. 6 | * 7 | * Options 8 | * - skipPureWestern: boolean | undefined 9 | */ 10 | 11 | import { 12 | Handler, 13 | MutableGroupToken, 14 | MutableToken, 15 | GroupTokenType, 16 | isFullwidthType 17 | } from '../parser/index.js' 18 | import { ValidationTarget } from '../report.js' 19 | import { Options, removeValidationOnTarget } from './util.js' 20 | 21 | const findNonWestern = (group: MutableGroupToken): boolean => { 22 | return group.some((token) => { 23 | if (token.type === GroupTokenType.GROUP) { 24 | return findNonWestern(token) 25 | } 26 | if (isFullwidthType(token.type)) { 27 | if (token.value.match(/[‘’“”]/)) { 28 | return false 29 | } 30 | return true 31 | } 32 | }) 33 | } 34 | 35 | const resetValidation = (group: MutableGroupToken): void => { 36 | group.modifiedSpaceAfter = group.spaceAfter 37 | group.modifiedInnerSpaceBefore = group.innerSpaceBefore 38 | group.modifiedStartValue = group.startValue 39 | group.modifiedEndValue = group.endValue 40 | group.validations.length = 0 41 | group.forEach((token) => { 42 | token.validations.length = 0 43 | token.modifiedSpaceAfter = token.spaceAfter 44 | if (token.type === GroupTokenType.GROUP) { 45 | resetValidation(token) 46 | } else { 47 | token.modifiedType = token.type 48 | token.modifiedValue = token.value 49 | } 50 | }) 51 | } 52 | 53 | const generateHandler = (options: Options): Handler => { 54 | const skipPureWestern = options?.skipPureWestern 55 | 56 | return (_: MutableToken, index: number, group: MutableGroupToken) => { 57 | if (!skipPureWestern) { 58 | return 59 | } 60 | 61 | if (!group.startValue && index === 0) { 62 | const hasNonWestern = findNonWestern(group) 63 | if (!hasNonWestern) { 64 | resetValidation(group) 65 | } 66 | } 67 | } 68 | } 69 | 70 | export const defaultConfig: Options = { 71 | skipPureWestern: true 72 | } 73 | 74 | export default generateHandler 75 | -------------------------------------------------------------------------------- /src/rules/case-zh-units.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileoverview 3 | * 4 | * This rule is used to revert changes of spaceAfter between numbers and 5 | * Chinese units. 6 | */ 7 | 8 | import { 9 | CharType, 10 | checkCharType, 11 | Handler, 12 | MutableGroupToken, 13 | MutableToken 14 | } from '../parser/index.js' 15 | import { ValidationTarget } from '../report.js' 16 | import { 17 | findWrappersBetween, 18 | findNonCodeVisibleTokenAfter, 19 | findNonCodeVisibleTokenBefore, 20 | Options, 21 | removeValidationOnTarget 22 | } from './util.js' 23 | 24 | const defaultSkippedZhUnits = `年月日天号时分秒` 25 | 26 | const generateHandler = (options: Options): Handler => { 27 | const skippedZhUnits = options?.skipZhUnits || '' 28 | const matcherStr = skippedZhUnits 29 | .split('') 30 | .filter((x) => checkCharType(x) === CharType.CJK_CHAR) 31 | .join('') 32 | const unitMatcher = new RegExp(`^[${matcherStr}]`) 33 | 34 | return (token: MutableToken, _: number, group: MutableGroupToken) => { 35 | // make sure the value is a number 36 | if (token.type === CharType.WESTERN_LETTER && token.value.match(/^\d+$/)) { 37 | // make sure the value after is a Chinese unit 38 | const tokenAfter = findNonCodeVisibleTokenAfter(group, token) 39 | 40 | if (Array.isArray(tokenAfter)) return 41 | if (tokenAfter && tokenAfter.value.match(unitMatcher)) { 42 | // make sure there is no space between originally 43 | const { spaceHost: spaceHostAfter, tokens: tokenSeqAfter } = 44 | findWrappersBetween(group, token, tokenAfter) 45 | const hasSpaceAfterOriginally = tokenSeqAfter.some((x) => x.spaceAfter) 46 | if (hasSpaceAfterOriginally) { 47 | return 48 | } 49 | 50 | // if any token before 51 | const tokenBefore = findNonCodeVisibleTokenBefore(group, token) 52 | if (tokenBefore) { 53 | // make sure there is no space between originally 54 | const { spaceHost: spaceHostBefore, tokens: tokenSeqBefore } = 55 | findWrappersBetween(group, tokenBefore, token) 56 | const hasSpaceBeforeOriginally = tokenSeqBefore.some( 57 | (x) => x.spaceAfter 58 | ) 59 | if (hasSpaceBeforeOriginally) { 60 | return 61 | } 62 | 63 | // revert non-space before 64 | if (spaceHostBefore) { 65 | spaceHostBefore.modifiedSpaceAfter = '' 66 | removeValidationOnTarget( 67 | spaceHostBefore, 68 | ValidationTarget.SPACE_AFTER 69 | ) 70 | } 71 | } 72 | 73 | // revert non-space after 74 | if (spaceHostAfter) { 75 | spaceHostAfter.modifiedSpaceAfter = '' 76 | removeValidationOnTarget(spaceHostAfter, ValidationTarget.SPACE_AFTER) 77 | } 78 | } 79 | } 80 | } 81 | } 82 | 83 | export const defaultConfig: Options = { 84 | skipZhUnits: defaultSkippedZhUnits 85 | } 86 | 87 | export default generateHandler 88 | -------------------------------------------------------------------------------- /src/rules/index.ts: -------------------------------------------------------------------------------- 1 | import { Handler } from '../parser/index.js' 2 | import { Options } from './util.js' 3 | 4 | import genSpaceTrimGenerateHandler from './space-trim.js' 5 | 6 | import genPunctuationWidthHandler from './punctuation-width.js' 7 | import genPunctuationUnificationHandler from './punctuation-unification.js' 8 | 9 | import genAbbrsHandler from './case-abbrs.js' 10 | 11 | import genSpaceOfHyperMarkHandler from './space-hyper-mark.js' 12 | import genSpaceOfCodeHandler from './space-code.js' 13 | import genSpaceOfLetterHandler from './space-letter.js' 14 | import genSpaceOfPunctuationHandler from './space-punctuation.js' 15 | import genSpaceOfQuotationHandler from './space-quotation.js' 16 | import genSpaceOfBracketHandler from './space-bracket.js' 17 | 18 | import genLinebreakHandler from './case-linebreak.js' 19 | import genZhUnitsHandler from './case-zh-units.js' 20 | import genHtmlEntityHandler from './case-html-entity.js' 21 | 22 | import genSkipPureWesternHandler from './case-pure-western.js' 23 | 24 | const generateHandlers = (options: Options): Handler[] => { 25 | return [ 26 | genSpaceTrimGenerateHandler(options), 27 | 28 | genPunctuationWidthHandler(options), 29 | genPunctuationUnificationHandler(options), 30 | 31 | genAbbrsHandler(options), 32 | 33 | genSpaceOfHyperMarkHandler(options), 34 | genSpaceOfCodeHandler(options), 35 | genSpaceOfLetterHandler(options), 36 | genSpaceOfPunctuationHandler(options), 37 | genSpaceOfQuotationHandler(options), 38 | genSpaceOfBracketHandler(options), 39 | genLinebreakHandler(options), 40 | 41 | genZhUnitsHandler(options), 42 | genHtmlEntityHandler(options), 43 | 44 | genSkipPureWesternHandler(options) 45 | ] 46 | } 47 | 48 | export const defaultConfig: Options = { 49 | noSinglePair: true, 50 | halfwidthPunctuation: `()[]{}`, 51 | fullwidthPunctuation: `,。:;?!“”‘’`, 52 | adjustedFullwidthPunctuation: `“”‘’`, 53 | unifiedPunctuation: 'simplified', 54 | spaceBetweenHalfwidthContent: true, 55 | noSpaceBetweenFullwidthContent: true, 56 | spaceBetweenMixedwidthContent: true, 57 | noSpaceBeforePauseOrStop: true, 58 | spaceAfterHalfwidthPauseOrStop: true, 59 | noSpaceAfterFullwidthPauseOrStop: true, 60 | spaceOutsideHalfwidthQuotation: true, 61 | noSpaceOutsideFullwidthQuotation: true, 62 | noSpaceInsideQuotation: true, 63 | spaceOutsideHalfwidthBracket: true, 64 | noSpaceOutsideFullwidthBracket: true, 65 | noSpaceInsideBracket: true, 66 | spaceOutsideCode: true, 67 | noSpaceInsideHyperMark: true, 68 | trimSpace: true, 69 | skipZhUnits: `年月日天号时分秒`, 70 | skipAbbrs: [ 71 | 'Mr.', 72 | 'Mrs.', 73 | 'Dr.', 74 | 'Jr.', 75 | 'Sr.', 76 | 'vs.', 77 | 'etc.', 78 | 'i.e.', 79 | 'e.g.', 80 | 'a.k.a.' 81 | ], 82 | skipPureWestern: true 83 | } 84 | 85 | export default generateHandlers 86 | -------------------------------------------------------------------------------- /src/rules/messages.ts: -------------------------------------------------------------------------------- 1 | export const CODE_SPACE_OUTSIDE = '此处内联代码的外部需要一个空格' 2 | export const CODE_NOSPACE_OUTSIDE = '此处内联代码的外部不需要空格' 3 | 4 | export const MARKDOWN_SPACE_OUTSIDE = '此处 Markdown 标记的空格需要在外部' 5 | export const MARKDOWN_NOSPACE_INSIDE = '此处 Markdown 标记的内部不需要空格' 6 | 7 | export const PUNCTUATION_UNIFICATION = '此处字符需要统一' 8 | export const PUNCTUATION_UNIFICATION_TRADITIONAL = '此处标点符号需要统一到繁体' 9 | export const PUNCTUATION_UNIFICATION_SIMPLIFIED = '此处标点符号需要统一到简体' 10 | 11 | export const PUNCTUATION_FULL_WIDTH = '此处标点符号需要使用全角' 12 | export const PUNCTUATION_HALF_WIDTH = '此处标点符号需要使用半角' 13 | 14 | export const PUNCTUATION_NOSPACE_BEFORE = '此处标点符号前不需要空格' 15 | export const PUNCTUATION_NOSPACE_AFTER = '此处标点符号后不需要空格' 16 | export const PUNCTUATION_SPACE_AFTER = '此处标点符号后需要一个空格' 17 | 18 | export const BRACKET_NOSPACE_INSIDE = '此处括号的内部不需要空格' 19 | export const BRACKET_NOSPACE_OUTSIDE = '此处括号的外部不需要空格' 20 | export const BRACKET_SPACE_OUTSIDE = '此处括号的外部需要一个空格' 21 | 22 | export const CONTENT_SPACE_HALF_WIDTH = '此处半角内容之间需要一个空格' 23 | export const CONTENT_NOSPACE_FULL_WIDTH = '此处全角内容之间不需要空格' 24 | export const CONTENT_SPACE_MIXED_WIDTH = '此处中英文内容之间需要一个空格' 25 | export const CONTENT_NOSPACE_MIXED_WIDTH = '此处中英文内容之间需要一个空格' 26 | 27 | export const QUOTATION_NOSPACE_INSIDE = '此处引号的内部不需要空格' 28 | export const QUOTATION_NOSPACE_OUTSIDE = '此处引号的外部不需要空格' 29 | export const QUOTATION_SPACE_OUTSIDE = '此处引号的外部需要一个空格' 30 | 31 | export const TRIM_SPACE = '此处需要去除外部空格' 32 | -------------------------------------------------------------------------------- /src/rules/punctuation-unification.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileoverview 3 | * 4 | * This rule will unify similar punctuations into the same one. 5 | * Usually, it's just about Chinese quotations. 6 | * 7 | * Options: 8 | * - unifiedPunctuation: "traditional" | "simplified" | Record & { default: boolean } 9 | */ 10 | 11 | import { GroupTokenType, Handler, MutableToken } from '../parser/index.js' 12 | import { PUNCTUATION_UNIFICATION } from './messages.js' 13 | import { checkEndValue, checkStartValue, checkValue, Options } from './util.js' 14 | 15 | const defaultUnifiedMap: Record = { 16 | // U+2047 DOUBLE QUESTION MARK, U+203C DOUBLE EXCLAMATION MARK 17 | // U+2048 QUESTION EXCLAMATION MARK, U+2049 EXCLAMATION QUESTION MARK 18 | '??': ['⁇'], 19 | '!!': ['‼'], 20 | '?!': ['⁈'], 21 | '!?': ['⁉'], 22 | 23 | // U+002F SOLIDUS, U+FF0F FULLWIDTH SOLIDUS 24 | '/': ['/', '/'], 25 | 26 | // U+FF5E FULLWIDTH TILDE 27 | '~': ['~', '~'], 28 | 29 | // U+2026 HORIZONTAL ELLIPSIS, U+22EF MIDLINE HORIZONTAL ELLIPSIS 30 | '…': ['…', '⋯'], 31 | 32 | // U+25CF BLACK CIRCLE, U+2022 BULLET, U+00B7 MIDDLE DOT, 33 | // U+2027 HYPHENATION POINT, U+30FB KATAKANA MIDDLE DOT 34 | '·': ['●', '•', '·', '‧', '・'] 35 | } 36 | 37 | const simplifiedUnifiedMap: Record = { 38 | '“': ['「'], 39 | '”': ['」'], 40 | '‘': ['『'], 41 | '’': ['』'] 42 | } 43 | 44 | const traditionalUnifiedMap: Record = { 45 | '「': ['“'], 46 | '」': ['”'], 47 | '『': ['‘'], 48 | '』': ['’'] 49 | } 50 | 51 | const revertUnifiedMap = ( 52 | unifiedMap: Record 53 | ): Record => { 54 | const result: Record = {} 55 | for (const key in unifiedMap) { 56 | const value = unifiedMap[key] 57 | value.forEach((v) => { 58 | result[v] = key 59 | }) 60 | } 61 | return result 62 | } 63 | 64 | const getRevertedUnifiedMap = (options: Options): Record => { 65 | const unifiedOption = options?.unifiedPunctuation 66 | const langType = typeof unifiedOption === 'string' ? unifiedOption : undefined 67 | const unifiedMap: Record = {} 68 | 69 | if (langType) { 70 | Object.assign(unifiedMap, defaultUnifiedMap) 71 | if (langType === 'simplified') { 72 | Object.assign(unifiedMap, simplifiedUnifiedMap) 73 | } else if (langType === 'traditional') { 74 | Object.assign(unifiedMap, traditionalUnifiedMap) 75 | } 76 | } else if (typeof unifiedOption === 'object') { 77 | if (unifiedOption.default) { 78 | Object.assign(unifiedMap, defaultUnifiedMap) 79 | } 80 | Object.entries(unifiedOption).forEach(([key, value]) => { 81 | if (value === true) { 82 | unifiedMap[key] = defaultUnifiedMap[key] 83 | } else if (value === false) { 84 | delete unifiedMap[key] 85 | } else { 86 | unifiedMap[key] = value 87 | } 88 | }) 89 | } 90 | 91 | return revertUnifiedMap(unifiedMap) 92 | } 93 | 94 | const generateHandler = (options: Options): Handler => { 95 | const charMap = getRevertedUnifiedMap(options) 96 | 97 | const handlerPunctuationUnified = (token: MutableToken) => { 98 | if (token.type === GroupTokenType.GROUP) { 99 | if (Object.prototype.hasOwnProperty.call(charMap, token.modifiedStartValue)) { 100 | checkStartValue( 101 | token, 102 | charMap[token.modifiedStartValue], 103 | PUNCTUATION_UNIFICATION 104 | ) 105 | } 106 | if (Object.prototype.hasOwnProperty.call(charMap, token.modifiedEndValue)) { 107 | checkEndValue( 108 | token, 109 | charMap[token.modifiedEndValue], 110 | PUNCTUATION_UNIFICATION 111 | ) 112 | } 113 | return 114 | } else { 115 | if (Object.prototype.hasOwnProperty.call(charMap, token.modifiedValue)) { 116 | checkValue( 117 | token, 118 | charMap[token.modifiedValue], 119 | undefined, 120 | PUNCTUATION_UNIFICATION 121 | ) 122 | } 123 | } 124 | } 125 | 126 | return handlerPunctuationUnified 127 | } 128 | 129 | export const defaultConfig: Options = { 130 | unifiedPunctuation: 'simplified' 131 | } 132 | 133 | export default generateHandler 134 | -------------------------------------------------------------------------------- /src/rules/punctuation-width.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileoverview 3 | * 4 | * This rule will format each punctuation into the right width options. 5 | * 6 | * Options: 7 | * - halfwidthPunctuation: string = `()[]{}` 8 | * - fullwidthPunctuation: string = `,。:;?!“”‘’` 9 | * - adjustedFullwidthPunctuation: string = `“”‘’` 10 | * 11 | * Details: 12 | * - skip half-width punctuations between half-width content without space 13 | * - skip successive multiple half-width punctuations 14 | */ 15 | 16 | import { 17 | GroupTokenType, 18 | Handler, 19 | MutableGroupToken, 20 | MutableToken, 21 | HyperTokenType, 22 | isSinglePunctuationType, 23 | getFullwidthTokenType, 24 | getHalfwidthTokenType 25 | } from '../parser/index.js' 26 | import { PUNCTUATION_FULL_WIDTH, PUNCTUATION_HALF_WIDTH } from './messages.js' 27 | import { 28 | checkValue, 29 | checkEndValue, 30 | checkStartValue, 31 | isHalfwidthPunctuationWithoutSpaceAround, 32 | isSuccessiveHalfwidthPunctuation, 33 | Options 34 | } from './util.js' 35 | 36 | type WidthPairList = Array<[halfwidth: string, fullwidth: string]> 37 | type WidthSidePairList = Array< 38 | [halfwidth: string, fullwidthLeftSide: string, fullwidthRightSide: string] 39 | > 40 | type AlterMap = Record 41 | type AlterPairMap = Record 42 | 43 | const widthPairList: WidthPairList = [ 44 | [`,`, `,`], 45 | [`.`, `。`], 46 | [`;`, `;`], 47 | [`:`, `:`], 48 | [`?`, `?`], 49 | [`!`, `!`], 50 | [`(`, `(`], 51 | [`)`, `)`], 52 | [`[`, `[`], 53 | [`]`, `]`], 54 | [`{`, `{`], 55 | [`}`, `}`] 56 | ] 57 | const widthSidePairList: WidthSidePairList = [ 58 | [`"`, `“`, `”`], 59 | [`'`, `‘`, `’`] 60 | ] 61 | 62 | const defaultHalfwidthOption = `()[]{}` 63 | const defaultFullwidthOption = `,。:;?!“”‘’` 64 | const defaultAdjustedFullwidthOption = `“”‘’` 65 | 66 | const checkAdjusted = (token: MutableToken, adjusted: string): void => { 67 | if (adjusted.indexOf(token.modifiedValue) >= 0) { 68 | token.modifiedType = getHalfwidthTokenType(token.type) 69 | } 70 | } 71 | 72 | const parseOptions = ( 73 | options: Options 74 | ): { 75 | halfwidthMap: AlterMap 76 | fullwidthMap: AlterMap 77 | fullwidthPairMap: AlterPairMap 78 | adjusted: string 79 | } => { 80 | const halfwidthOption = options?.halfwidthPunctuation || '' 81 | const fullwidthOption = options?.fullwidthPunctuation || '' 82 | const adjustedFullwidthOption = options?.adjustedFullwidthPunctuation || '' 83 | 84 | const halfwidthMap: AlterMap = {} 85 | const fullwidthMap: AlterMap = {} 86 | const fullwidthPairMap: AlterPairMap = {} 87 | 88 | widthPairList.forEach(([halfwidth, fullwidth]) => { 89 | if (halfwidthOption.indexOf(halfwidth) >= 0) { 90 | halfwidthMap[fullwidth] = halfwidth 91 | } 92 | if (fullwidthOption.indexOf(fullwidth) >= 0) { 93 | fullwidthMap[halfwidth] = fullwidth 94 | } 95 | }) 96 | widthSidePairList.forEach(([half, left, right]) => { 97 | if (halfwidthOption.indexOf(half) >= 0) { 98 | halfwidthMap[left] = half 99 | halfwidthMap[right] = half 100 | } 101 | if ( 102 | fullwidthOption.indexOf(left) >= 0 || 103 | fullwidthOption.indexOf(right) >= 0 104 | ) { 105 | fullwidthPairMap[half] = [left, right] 106 | } 107 | }) 108 | 109 | return { 110 | halfwidthMap, 111 | fullwidthMap, 112 | fullwidthPairMap, 113 | adjusted: adjustedFullwidthOption 114 | } 115 | } 116 | 117 | const generateHandler = (options: Options): Handler => { 118 | const { halfwidthMap, fullwidthMap, fullwidthPairMap, adjusted } = 119 | parseOptions(options) 120 | 121 | const handleHyperSpaceOption: Handler = ( 122 | token: MutableToken, 123 | _, 124 | group: MutableGroupToken 125 | ) => { 126 | // skip non-punctuation/quotation/bracket situations 127 | if ( 128 | !isSinglePunctuationType(token.type) && 129 | token.type !== HyperTokenType.BRACKET_MARK && 130 | token.type !== GroupTokenType.GROUP 131 | ) { 132 | return 133 | } 134 | 135 | // skip halfwidth punctuations between halfwidth content without space 136 | if (isHalfwidthPunctuationWithoutSpaceAround(group, token)) { 137 | return 138 | } 139 | 140 | // skip successive multiple half-width punctuations 141 | if (isSuccessiveHalfwidthPunctuation(group, token)) { 142 | return 143 | } 144 | 145 | // 1. normal punctuations in the alter width map 146 | // 2. brackets in the alter width map 147 | if ( 148 | isSinglePunctuationType(token.type) || 149 | token.type === HyperTokenType.BRACKET_MARK 150 | ) { 151 | const value = token.modifiedValue 152 | if (fullwidthMap[value]) { 153 | checkValue( 154 | token, 155 | fullwidthMap[value], 156 | getFullwidthTokenType(token.type), 157 | PUNCTUATION_FULL_WIDTH 158 | ) 159 | checkAdjusted(token, adjusted) 160 | } else if (halfwidthMap[value]) { 161 | checkValue( 162 | token, 163 | halfwidthMap[value], 164 | getHalfwidthTokenType(token.type), 165 | PUNCTUATION_HALF_WIDTH 166 | ) 167 | } 168 | return 169 | } 170 | 171 | // 3. quotations in the alter pair map 172 | const startValue = (token as MutableGroupToken).modifiedStartValue 173 | const endValue = (token as MutableGroupToken).modifiedEndValue 174 | if (fullwidthPairMap[startValue]) { 175 | checkStartValue( 176 | token, 177 | fullwidthPairMap[startValue][0], 178 | PUNCTUATION_FULL_WIDTH 179 | ) 180 | } else if (halfwidthMap[startValue]) { 181 | checkStartValue( 182 | token, 183 | halfwidthMap[startValue][0], 184 | PUNCTUATION_HALF_WIDTH 185 | ) 186 | } 187 | if (fullwidthPairMap[endValue]) { 188 | checkEndValue( 189 | token, 190 | fullwidthPairMap[endValue][1], 191 | PUNCTUATION_FULL_WIDTH 192 | ) 193 | } else if (halfwidthMap[endValue]) { 194 | checkEndValue(token, halfwidthMap[endValue][1], PUNCTUATION_HALF_WIDTH) 195 | } 196 | } 197 | return handleHyperSpaceOption 198 | } 199 | 200 | export const defaultConfig: Options = { 201 | halfwidthPunctuation: defaultHalfwidthOption, 202 | fullwidthPunctuation: defaultFullwidthOption, 203 | adjustedFullwidthPunctuation: defaultAdjustedFullwidthOption 204 | } 205 | 206 | export default generateHandler 207 | -------------------------------------------------------------------------------- /src/rules/space-bracket.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileoverview 3 | * 4 | * This rule is checking spaces besides brackets. 5 | * 6 | * Options 7 | * - noSpaceInsideBracket: boolean | undefined 8 | * - spaceOutsideHalfBracket: boolean | undefined 9 | * - nospaceOutsideFullBracket: boolean | undefined 10 | * 11 | * Details: 12 | * - noSpaceInsideBracket: 13 | * - left-bracket x anything 14 | * - non-left-bracket x right-bracket 15 | * - spaceOutsideHalfBracket: 16 | * - right-half-bracket x left-half-bracket 17 | * - right-half-bracket x content/left-quotation/code 18 | * - content/right-quotation/code x left-half-bracket 19 | * - noSpaceOutsideFullBracket: 20 | * - right-full-bracket x left-full-bracket 21 | * - right-full-bracket x content/left-quotation/code 22 | * - content/right-quotation/code x left-full-bracket 23 | */ 24 | 25 | import { 26 | CharType, 27 | GroupTokenType, 28 | Handler, 29 | isLetterType, 30 | isFullwidthPair, 31 | MarkSideType, 32 | MutableGroupToken, 33 | MutableSingleToken, 34 | MutableToken, 35 | HyperTokenType 36 | } from '../parser/index.js' 37 | import { 38 | checkSpaceAfter, 39 | findVisibleTokenAfter, 40 | findVisibleTokenBefore, 41 | findWrappersBetween, 42 | findTokenAfter, 43 | findTokenBefore, 44 | Options 45 | } from './util.js' 46 | import { 47 | BRACKET_NOSPACE_INSIDE, 48 | BRACKET_NOSPACE_OUTSIDE, 49 | BRACKET_SPACE_OUTSIDE 50 | } from './messages.js' 51 | 52 | const isFullWidth = (char: string, adjusted: string): boolean => { 53 | return isFullwidthPair(char) && adjusted.indexOf(char) === -1 54 | } 55 | 56 | const shouldSkip = ( 57 | before: MutableToken | undefined, 58 | beforeTokenSeq: MutableToken[], 59 | token: MutableSingleToken, 60 | afterTokenSeq: MutableToken[], 61 | after: MutableToken | undefined 62 | ): boolean => { 63 | if (!before || !after) { 64 | return false 65 | } 66 | if (isFullwidthPair(token.value) || isFullwidthPair(token.modifiedValue)) { 67 | return false 68 | } 69 | if ( 70 | beforeTokenSeq.filter((x) => x.spaceAfter).length || 71 | afterTokenSeq.filter((x) => x.spaceAfter).length 72 | ) { 73 | return false 74 | } 75 | return ( 76 | // x(x 77 | // ^ 78 | (before.type === CharType.WESTERN_LETTER || 79 | // x() 80 | // ^ 81 | (before.value === '(' && token.value === ')')) && 82 | // x)x 83 | // ^ 84 | (after.type === CharType.WESTERN_LETTER || 85 | // ()x 86 | // ^ 87 | (token.value === '(' && after.value === ')')) 88 | ) 89 | } 90 | 91 | const generateHandler = (options: Options): Handler => { 92 | const noInsideBracketOption = options.noSpaceInsideBracket 93 | const spaceOutsideHalfBracketOption = options.spaceOutsideHalfwidthBracket 94 | const noSpaceOutsideFullBracketOption = options.noSpaceOutsideFullwidthBracket 95 | const adjustedFullWidthOption = options.adjustedFullwidthPunctuation || '' 96 | 97 | return (token: MutableToken, _: number, group: MutableGroupToken) => { 98 | // skip non-bracket tokens 99 | if (token.type !== HyperTokenType.BRACKET_MARK) { 100 | return 101 | } 102 | 103 | // 1. no space inside bracket 104 | if (noInsideBracketOption) { 105 | if (token.markSide === MarkSideType.LEFT) { 106 | // no space after 107 | const tokenAfter = findTokenAfter(group, token) 108 | if (tokenAfter) { 109 | checkSpaceAfter(token, '', BRACKET_NOSPACE_INSIDE) 110 | } 111 | } else { 112 | // no space before 113 | const tokenBefore = findTokenBefore(group, token) 114 | if ( 115 | tokenBefore && 116 | // dedupe 117 | tokenBefore.markSide !== MarkSideType.LEFT 118 | ) { 119 | checkSpaceAfter(tokenBefore, '', BRACKET_NOSPACE_INSIDE) 120 | } 121 | } 122 | } 123 | 124 | // skip bracket between half-width content without spaces 125 | // or empty brackets beside half-width content without spaces 126 | const contentTokenBefore = findVisibleTokenBefore(group, token) 127 | const contentTokenAfter = findVisibleTokenAfter(group, token) 128 | const { spaceHost: beforeSpaceHost, tokens: beforeTokenSeq } = 129 | findWrappersBetween(group, contentTokenBefore, token) 130 | const { spaceHost: afterSpaceHost, tokens: afterTokenSeq } = 131 | findWrappersBetween(group, token, contentTokenAfter) 132 | if ( 133 | shouldSkip( 134 | contentTokenBefore, 135 | beforeTokenSeq, 136 | token, 137 | afterTokenSeq, 138 | contentTokenAfter 139 | ) 140 | ) { 141 | return 142 | } 143 | 144 | // 2. spaces outside half/full bracket 145 | if ( 146 | typeof spaceOutsideHalfBracketOption !== 'undefined' || 147 | noSpaceOutsideFullBracketOption 148 | ) { 149 | const fullWidth = isFullWidth( 150 | token.modifiedValue, 151 | adjustedFullWidthOption 152 | ) 153 | 154 | // 2.1 right-bracket x left-bracket 155 | if (contentTokenAfter) { 156 | if ( 157 | token.markSide === MarkSideType.RIGHT && 158 | contentTokenAfter.markSide === MarkSideType.LEFT 159 | ) { 160 | if (afterSpaceHost) { 161 | const hasFullWidth = 162 | fullWidth || 163 | isFullWidth( 164 | contentTokenAfter.modifiedValue, 165 | adjustedFullWidthOption 166 | ) 167 | 168 | // 2.1.1 any-full-bracket 169 | // 2.1.2 right-half-bracket x left-half-bracket 170 | if (hasFullWidth) { 171 | if (noSpaceOutsideFullBracketOption) { 172 | checkSpaceAfter(token, '', BRACKET_NOSPACE_OUTSIDE) 173 | } 174 | } else { 175 | // skip no spaces between 176 | if (afterTokenSeq.filter((x) => x.spaceAfter).length > 0) { 177 | if (typeof spaceOutsideHalfBracketOption !== 'undefined') { 178 | const spaceAfter = spaceOutsideHalfBracketOption ? ' ' : '' 179 | const message = spaceOutsideHalfBracketOption 180 | ? BRACKET_SPACE_OUTSIDE 181 | : BRACKET_NOSPACE_OUTSIDE 182 | checkSpaceAfter(token, spaceAfter, message) 183 | } 184 | } 185 | } 186 | } 187 | } 188 | } 189 | 190 | // 2.2 content/right-quotation/code x left-bracket 191 | // 2.3 right-racket x content/left-quotation/code 192 | if (token.markSide === MarkSideType.LEFT) { 193 | if ( 194 | contentTokenBefore && 195 | (isLetterType(contentTokenBefore.type) || 196 | contentTokenBefore.type === GroupTokenType.GROUP || 197 | contentTokenBefore.type === HyperTokenType.CODE_CONTENT) 198 | ) { 199 | if (beforeSpaceHost) { 200 | // 2.2.1 content/right-quotation/code x left-full-bracket 201 | // 2.2.2 content/right-quotation/code x left-half-bracket 202 | if ( 203 | fullWidth || 204 | (contentTokenBefore.type === GroupTokenType.GROUP && 205 | isFullWidth( 206 | contentTokenBefore.modifiedEndValue, 207 | adjustedFullWidthOption 208 | )) 209 | ) { 210 | if (noSpaceOutsideFullBracketOption) { 211 | checkSpaceAfter(beforeSpaceHost, '', BRACKET_NOSPACE_OUTSIDE) 212 | } 213 | } else { 214 | if (typeof spaceOutsideHalfBracketOption !== 'undefined') { 215 | const spaceAfter = spaceOutsideHalfBracketOption ? ' ' : '' 216 | const message = spaceOutsideHalfBracketOption 217 | ? BRACKET_SPACE_OUTSIDE 218 | : BRACKET_NOSPACE_OUTSIDE 219 | checkSpaceAfter(beforeSpaceHost, spaceAfter, message) 220 | } 221 | } 222 | } 223 | } 224 | } else { 225 | if ( 226 | contentTokenAfter && 227 | (isLetterType(contentTokenAfter.type) || 228 | contentTokenAfter.type === GroupTokenType.GROUP || 229 | contentTokenAfter.type === HyperTokenType.CODE_CONTENT) 230 | ) { 231 | if (afterSpaceHost) { 232 | // 2.3.1 right-full-bracket x content/left-quotation/code 233 | // 2.4.2 right-half-bracket x content/left-quotation/code 234 | if ( 235 | fullWidth || 236 | (contentTokenAfter.type === GroupTokenType.GROUP && 237 | isFullWidth( 238 | contentTokenAfter.modifiedStartValue, 239 | adjustedFullWidthOption 240 | )) 241 | ) { 242 | if (noSpaceOutsideFullBracketOption) { 243 | checkSpaceAfter(afterSpaceHost, '', BRACKET_NOSPACE_OUTSIDE) 244 | } 245 | } else { 246 | if (typeof spaceOutsideHalfBracketOption !== 'undefined') { 247 | const spaceAfter = spaceOutsideHalfBracketOption ? ' ' : '' 248 | const message = spaceOutsideHalfBracketOption 249 | ? BRACKET_SPACE_OUTSIDE 250 | : BRACKET_NOSPACE_OUTSIDE 251 | checkSpaceAfter(afterSpaceHost, spaceAfter, message) 252 | } 253 | } 254 | } 255 | } 256 | } 257 | } 258 | } 259 | } 260 | 261 | export const defaultConfig: Options = { 262 | spaceOutsideHalfBracket: true, 263 | noSpaceInsideBracket: true 264 | } 265 | 266 | export default generateHandler 267 | -------------------------------------------------------------------------------- /src/rules/space-code.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileoverview 3 | * 4 | * This rule will decide whether to keep a space outside inline code with 5 | * content like: 6 | * - xxx `foo` xxx 7 | * - xxx foo xxx 8 | * in markdown/html. 9 | * 10 | * Options: 11 | * - spaceOutsideCode: boolean | undefined 12 | * - `true`: keep one space outside (default) 13 | * - `false`: no space outside 14 | * - `undefined`: do nothing, just keep the original format 15 | * 16 | * Details: 17 | * - code x code 18 | * - content x code 19 | * - code x content 20 | */ 21 | 22 | import { 23 | Options, 24 | checkSpaceAfter, 25 | findVisibleTokenAfter, 26 | findVisibleTokenBefore, 27 | findWrappersBetween 28 | } from './util.js' 29 | import { 30 | Handler, 31 | isLetterType, 32 | MutableGroupToken, 33 | MutableToken, 34 | HyperTokenType 35 | } from '../parser/index.js' 36 | import { CODE_NOSPACE_OUTSIDE, CODE_SPACE_OUTSIDE } from './messages.js' 37 | 38 | const generateHandler = (options: Options): Handler => { 39 | const needSpaceOption = options?.spaceOutsideCode 40 | const spaceAfter = needSpaceOption ? ' ' : '' 41 | const message = needSpaceOption ? CODE_SPACE_OUTSIDE : CODE_NOSPACE_OUTSIDE 42 | const handleHyperSpaceOption: Handler = ( 43 | token: MutableToken, 44 | _, 45 | group: MutableGroupToken 46 | ) => { 47 | // skip if there is no options 48 | if (typeof needSpaceOption === 'undefined') { 49 | return 50 | } 51 | 52 | // skip non-code tokens 53 | if (token.type !== HyperTokenType.CODE_CONTENT) { 54 | return 55 | } 56 | 57 | // skip non-after-token situations 58 | const contentTokenBefore = findVisibleTokenBefore(group, token) 59 | const contentTokenAfter = findVisibleTokenAfter(group, token) 60 | const { spaceHost: beforeSpaceHost } = findWrappersBetween( 61 | group, 62 | contentTokenBefore, 63 | token 64 | ) 65 | const { spaceHost: afterSpaceHost } = findWrappersBetween( 66 | group, 67 | token, 68 | contentTokenAfter 69 | ) 70 | 71 | // content x code 72 | if (contentTokenBefore && isLetterType(contentTokenBefore.type)) { 73 | beforeSpaceHost && checkSpaceAfter(beforeSpaceHost, spaceAfter, message) 74 | } 75 | // code x content or code x code 76 | if ( 77 | contentTokenAfter && 78 | (isLetterType(contentTokenAfter.type) || 79 | contentTokenAfter.type === HyperTokenType.CODE_CONTENT) 80 | ) { 81 | afterSpaceHost && checkSpaceAfter(afterSpaceHost, spaceAfter, message) 82 | } 83 | } 84 | return handleHyperSpaceOption 85 | } 86 | 87 | export const defaultConfig: Options = { 88 | spaceOutsideCode: true 89 | } 90 | 91 | export default generateHandler 92 | -------------------------------------------------------------------------------- /src/rules/space-hyper-mark.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileoverview 3 | * 4 | * This rule is to ensure all the existing spaces should be outside hyper 5 | * marks like *, _, [, ], etc. 6 | * 7 | * Options: 8 | * - noSpaceInsideMark: boolean | undefined 9 | * 10 | * For example: 11 | * - `x _ ** yyy ** _ z` should be `x _**yyy**_ z` 12 | * 13 | * Details: 14 | * - left-mark x left-mark: `x _ **yyy**_ z` 15 | * ^^^ 16 | * - right-mark x right-mark: `x _**yyy** _ z` 17 | * ^^^ 18 | * - left-mark x non-mark: `x _** yyy**_ z` 19 | * ^^^ 20 | * - non-mark x right-mark: `x _**yyy **_ z` 21 | * ^^^ 22 | */ 23 | 24 | import { 25 | Options, 26 | checkSpaceAfter, 27 | findTokenAfter, 28 | isWrapper, 29 | getWrapperSide 30 | } from './util.js' 31 | import { 32 | Handler, 33 | MarkSideType, 34 | MutableGroupToken, 35 | MutableToken 36 | } from '../parser/index.js' 37 | import { MARKDOWN_NOSPACE_INSIDE } from './messages.js' 38 | 39 | const generateHandler = (options: Options): Handler => { 40 | const noSpaceInsideMarkOption = options?.noSpaceInsideHyperMark 41 | 42 | return (token: MutableToken, _, group: MutableGroupToken) => { 43 | // skip if there is no options 44 | if (!noSpaceInsideMarkOption) { 45 | return 46 | } 47 | 48 | // skip non-after-token situations 49 | const tokenAfter = findTokenAfter(group, token) 50 | if (!tokenAfter) { 51 | return 52 | } 53 | 54 | // skip non-mark situations 55 | if (!isWrapper(token) && !isWrapper(tokenAfter)) { 56 | return 57 | } 58 | 59 | // 1. left x left, right x right 60 | // 2. left x non-mark 61 | // 3. non-mark x right 62 | const markSideBefore = getWrapperSide(token) 63 | const markSideAfter = getWrapperSide(tokenAfter) 64 | if (markSideBefore === markSideAfter) { 65 | checkSpaceAfter(token, '', MARKDOWN_NOSPACE_INSIDE) 66 | } else if (markSideBefore === MarkSideType.LEFT && !isWrapper(tokenAfter)) { 67 | checkSpaceAfter(token, '', MARKDOWN_NOSPACE_INSIDE) 68 | } else if (markSideAfter === MarkSideType.RIGHT && !isWrapper(token)) { 69 | checkSpaceAfter(token, '', MARKDOWN_NOSPACE_INSIDE) 70 | } 71 | } 72 | } 73 | 74 | export const defaultConfig: Options = { 75 | noSpaceInsideMark: true 76 | } 77 | 78 | export default generateHandler 79 | -------------------------------------------------------------------------------- /src/rules/space-letter.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileoverview 3 | * 4 | * This rule is used to check whether there should be a space between 5 | * content. 6 | * 7 | * Options: 8 | * - spaceBetweenHalfwidthContent: boolean | undefined 9 | * - `true`: ensure one space between half-width content (default) 10 | * - `false` or `undefined`: do nothing, just keep the original format 11 | * - noSpaceBetweenFullwidthContent: boolean | undefined 12 | * - `true`: remove the space between full-width content (default) 13 | * - `false` or `undefined`: do nothing, just keep the original format 14 | * - spaceBetweenMixedwidthContent: boolean | undefined 15 | * - `true`: keep one space between width-mixed content (default) 16 | * - `false`: no space between width-mixed content 17 | * - `undefined`: do nothing, just keep the original format 18 | * 19 | * Examples (betweenMixedWidthContent = true): 20 | * - *a*啊 -> *a* 啊 21 | * - *a *啊 -> *a* 啊 22 | * - *啊*a -> *啊* a 23 | * - *啊 *a -> *啊* a 24 | * 25 | * Examples (betweenMixedWidthContent = false): 26 | * - *a* 啊 -> *a*啊 27 | * - *a *啊 -> *a*啊 28 | * - *啊* a -> *啊*a 29 | * - *啊 *a -> *啊*a 30 | */ 31 | 32 | import { 33 | CharType, 34 | Handler, 35 | isLetterType, 36 | MutableGroupToken, 37 | MutableToken 38 | } from '../parser/index.js' 39 | import { 40 | checkSpaceAfter, 41 | findVisibleTokenAfter, 42 | findWrappersBetween, 43 | Options 44 | } from './util.js' 45 | import { 46 | CONTENT_NOSPACE_FULL_WIDTH, 47 | CONTENT_NOSPACE_MIXED_WIDTH, 48 | CONTENT_SPACE_HALF_WIDTH, 49 | CONTENT_SPACE_MIXED_WIDTH 50 | } from './messages.js' 51 | 52 | const generateHandler = (options: Options): Handler => { 53 | const onlyOneBetweenHalfwidthContentOption = 54 | options?.spaceBetweenHalfwidthContent 55 | const noBetweenFullwidthContentOption = 56 | options?.noSpaceBetweenFullwidthContent 57 | const betweenMixedwidthContentOption = options?.spaceBetweenMixedwidthContent 58 | 59 | return (token: MutableToken, _: number, group: MutableGroupToken) => { 60 | // skip non-content tokens 61 | if (!isLetterType(token.type)) { 62 | return 63 | } 64 | 65 | // skip non-content after-tokens 66 | const contentTokenAfter = findVisibleTokenAfter(group, token) 67 | if (!contentTokenAfter || !isLetterType(contentTokenAfter.type)) { 68 | return 69 | } 70 | 71 | // find the space host 72 | const { spaceHost, tokens } = findWrappersBetween( 73 | group, 74 | token, 75 | contentTokenAfter 76 | ) 77 | 78 | // skip if the space host is not found 79 | if (!spaceHost) { 80 | return 81 | } 82 | 83 | // 1. half x half, full x full 84 | // 2. half x full, full x half 85 | if (contentTokenAfter.type === token.type) { 86 | // skip without custom option 87 | if (token.type === CharType.WESTERN_LETTER) { 88 | if (!onlyOneBetweenHalfwidthContentOption) { 89 | return 90 | } 91 | // skip if half-content x marks x half-content 92 | if ( 93 | tokens.length > 1 && 94 | tokens.filter((token) => token.spaceAfter).length === 0 95 | ) { 96 | return 97 | } 98 | } else { 99 | if (!noBetweenFullwidthContentOption) { 100 | return 101 | } 102 | } 103 | 104 | const spaceAfter = token.type === CharType.WESTERN_LETTER ? ' ' : '' 105 | const message = 106 | token.type === CharType.WESTERN_LETTER 107 | ? CONTENT_SPACE_HALF_WIDTH 108 | : CONTENT_NOSPACE_FULL_WIDTH 109 | 110 | checkSpaceAfter(spaceHost, spaceAfter, message) 111 | } else { 112 | // skip without custom option 113 | if (typeof betweenMixedwidthContentOption === 'undefined') { 114 | return 115 | } 116 | 117 | const spaceAfter = betweenMixedwidthContentOption ? ' ' : '' 118 | const message = betweenMixedwidthContentOption 119 | ? CONTENT_SPACE_MIXED_WIDTH 120 | : CONTENT_NOSPACE_MIXED_WIDTH 121 | 122 | checkSpaceAfter(spaceHost, spaceAfter, message) 123 | } 124 | } 125 | } 126 | 127 | export const defaultConfig: Options = { 128 | spaceBetweenHalfWidthContent: true, 129 | noSpaceBetweenFullWidthContent: true, 130 | spaceBetweenMixedWidthContent: true 131 | } 132 | 133 | export default generateHandler 134 | -------------------------------------------------------------------------------- /src/rules/space-punctuation.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileoverview 3 | * 4 | * This rule is checking spaces besides normal punctuations. 5 | * Usually, for full-width punctuations, we don't need any spaces around. 6 | * For half-width punctuations, we need a space after that. 7 | * 8 | * Options 9 | * - noSpaceBeforePunctuation: boolean | undefined 10 | * - `true`: remove spaces before a half-width punctuation (default) 11 | * - `false` or `undefined`: do nothing, just keep the original format 12 | * - spaceAfterHalfWidthPunctuation: boolean | undefined 13 | * - `true`: ensure one space after a half-width punctuation (default) 14 | * - `false` or `undefined`: do nothing, just keep the original format 15 | * - noSpaceAfterFullWidthPunctuation: boolean | undefined 16 | * - `true`: remove spaces around a full-width punctuation (default) 17 | * - `false` or `undefined`: do nothing, just keep the original format 18 | * 19 | * Details: 20 | * - noSpaceBeforePunctuation: 21 | * content/right-quotation/right-bracket/code x punctuation 22 | * - spaceAfterHalfWidthPunctuation: 23 | * half x content/left-quotation/left-bracket/code 24 | * - noSpaceAfterFullWidthPunctuation: 25 | * full x content/left-quotation/left-bracket/code 26 | * 27 | * - skip half-width punctuations between half-width content without space 28 | * - skip successive multiple half-width punctuations 29 | */ 30 | 31 | import { 32 | GroupTokenType, 33 | Handler, 34 | isLetterType, 35 | isPauseOrStopType, 36 | MarkSideType, 37 | MutableGroupToken, 38 | MutableToken, 39 | HyperTokenType, 40 | isFullwidthPunctuationType, 41 | isHalfwidthPunctuationType 42 | } from '../parser/index.js' 43 | import { 44 | checkSpaceAfter, 45 | findVisibleTokenAfter, 46 | findVisibleTokenBefore, 47 | findWrappersBetween, 48 | isHalfwidthPunctuationWithoutSpaceAround, 49 | isSuccessiveHalfwidthPunctuation, 50 | Options 51 | } from './util.js' 52 | import { 53 | PUNCTUATION_NOSPACE_AFTER, 54 | PUNCTUATION_NOSPACE_BEFORE, 55 | PUNCTUATION_SPACE_AFTER 56 | } from './messages.js' 57 | 58 | const generateHandler = (options: Options): Handler => { 59 | const noBeforePunctuationOption = options?.noSpaceBeforePauseOrStop 60 | const oneAfterHalfWidthPunctuationOption = 61 | options?.spaceAfterHalfwidthPauseOrStop 62 | const noAfterFullWidthPunctuationOption = 63 | options?.noSpaceAfterFullwidthPauseOrStop 64 | 65 | return (token: MutableToken, _: number, group: MutableGroupToken) => { 66 | // skip non-punctuation tokens and non-normal punctuations 67 | if (!isPauseOrStopType(token.type)) { 68 | return 69 | } 70 | 71 | // skip half-width punctuations between half-width content without space 72 | if (isHalfwidthPunctuationWithoutSpaceAround(group, token)) { 73 | return 74 | } 75 | 76 | // skip successive multiple half-width punctuations 77 | if (isSuccessiveHalfwidthPunctuation(group, token)) { 78 | return 79 | } 80 | 81 | // 1. content/right-quotation/right-bracket/code x punctuation 82 | if (noBeforePunctuationOption) { 83 | const contentTokenBefore = findVisibleTokenBefore(group, token) 84 | if ( 85 | contentTokenBefore && 86 | // content 87 | (isLetterType(contentTokenBefore.type) || 88 | // right-quotation 89 | contentTokenBefore.type === GroupTokenType.GROUP || 90 | // right-bracket 91 | (contentTokenBefore.type === HyperTokenType.BRACKET_MARK && 92 | contentTokenBefore.markSide === MarkSideType.RIGHT) || 93 | // code 94 | contentTokenBefore.type === HyperTokenType.CODE_CONTENT) 95 | ) { 96 | const { spaceHost } = findWrappersBetween( 97 | group, 98 | contentTokenBefore, 99 | token 100 | ) 101 | 102 | if (spaceHost) { 103 | checkSpaceAfter(spaceHost, '', PUNCTUATION_NOSPACE_BEFORE) 104 | } 105 | } 106 | } 107 | 108 | // 2. half/full x content/left-quotation/left-bracket/code 109 | if ( 110 | (isFullwidthPunctuationType(token.modifiedType) && 111 | noAfterFullWidthPunctuationOption) || 112 | (isHalfwidthPunctuationType(token.modifiedType) && 113 | oneAfterHalfWidthPunctuationOption) 114 | ) { 115 | const spaceAfter = isHalfwidthPunctuationType(token.modifiedType) 116 | ? ' ' 117 | : '' 118 | const message = isHalfwidthPunctuationType(token.modifiedType) 119 | ? PUNCTUATION_SPACE_AFTER 120 | : PUNCTUATION_NOSPACE_AFTER 121 | 122 | const contentTokenAfter = findVisibleTokenAfter(group, token) 123 | if ( 124 | contentTokenAfter && 125 | // content 126 | (isLetterType(contentTokenAfter.type) || 127 | // left-quotation 128 | contentTokenAfter.type === GroupTokenType.GROUP || 129 | // left-bracket 130 | (contentTokenAfter.type === HyperTokenType.BRACKET_MARK && 131 | contentTokenAfter.markSide === MarkSideType.LEFT) || 132 | // code 133 | contentTokenAfter.type === HyperTokenType.CODE_CONTENT) 134 | ) { 135 | const { spaceHost } = findWrappersBetween( 136 | group, 137 | token, 138 | contentTokenAfter 139 | ) 140 | 141 | if (spaceHost) { 142 | checkSpaceAfter(spaceHost, spaceAfter, message) 143 | } 144 | } 145 | } 146 | } 147 | } 148 | 149 | export const defaultConfig: Options = { 150 | noSpaceBeforePunctuation: true, 151 | spaceAfterHalfWidthPunctuation: true, 152 | noSpaceAfterFullWidthPunctuation: true 153 | } 154 | 155 | export default generateHandler 156 | -------------------------------------------------------------------------------- /src/rules/space-quotation.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileoverview 3 | * 4 | * This rule is checking spaces besides quotations. 5 | * 6 | * Options 7 | * - noSpaceInsideQuotation: boolean | undefined 8 | * - spaceOutsideHalfwidthQuotation: boolean | undefined 9 | * - noSpaceOutsideFullwidthQuotation: boolean | undefined 10 | * 11 | * Details: 12 | * - noSpaceInsideQuotation: 13 | * - left-quotation x right-quotation 14 | * - content/punctuation/right-quotation/right-bracket/code/unknown/container x right-quotation 15 | * - left-quotation x content/punctuation/left-quotation/left-bracket/code/unknown/container 16 | * - spaceOutsideHalfwidthQuotation: 17 | * - right-half-quotation x left-half-quotation 18 | * - content/code x left-half-quotation 19 | * - right-half-quotation x content/code 20 | * - noSpaceOutsideFullwidthQuotation: 21 | * - right-full-quotation x left-full-quotation 22 | * - content/code x left-full-quotation 23 | * - right-full-quotation x content/code 24 | */ 25 | 26 | import { 27 | GroupTokenType, 28 | Handler, 29 | isLetterType, 30 | isFullwidthPair, 31 | MarkSideType, 32 | MutableGroupToken, 33 | MutableToken, 34 | HyperTokenType 35 | } from '../parser/index.js' 36 | import { 37 | checkInnerSpaceBefore, 38 | checkSpaceAfter, 39 | findWrappersBetween, 40 | findNonCodeVisibleTokenAfter, 41 | findNonCodeVisibleTokenBefore, 42 | Options 43 | } from './util.js' 44 | import { 45 | QUOTATION_NOSPACE_INSIDE, 46 | QUOTATION_NOSPACE_OUTSIDE, 47 | QUOTATION_SPACE_OUTSIDE 48 | } from './messages.js' 49 | 50 | const isFullWidth = (char: string, adjusted: string): boolean => { 51 | return isFullwidthPair(char) && adjusted.indexOf(char) === -1 52 | } 53 | 54 | const generateHandler = (options: Options): Handler => { 55 | const noSpaceInsideQuotationOption = options.noSpaceInsideQuotation 56 | const spaceOutsideHalfQuotationOption = options.spaceOutsideHalfwidthQuotation 57 | const noSpaceOutsideFullQuotationOption = 58 | options.noSpaceOutsideFullwidthQuotation 59 | const adjustedFullWidthOption = options.adjustedFullwidthPunctuation || '' 60 | 61 | return (token: MutableToken, _: number, group: MutableGroupToken) => { 62 | // skip non-group tokens 63 | if (token.type !== GroupTokenType.GROUP) { 64 | return 65 | } 66 | 67 | // 1. no space inside quotation 68 | if (noSpaceInsideQuotationOption) { 69 | // 1.1 left-quotation x content/punctuation/left-quotation/left-bracket/code/unknown/container 70 | const firstInsdieToken = token[0] 71 | if ( 72 | firstInsdieToken && 73 | firstInsdieToken.markSide !== MarkSideType.RIGHT 74 | ) { 75 | checkInnerSpaceBefore(token, '', QUOTATION_NOSPACE_INSIDE) 76 | } 77 | 78 | // 1.2 content/punctuation/right-quotation/right-bracket/code/unknown/container x right-quotation 79 | const lastInsideToken = token[token.length - 1] 80 | if (lastInsideToken && lastInsideToken.markSide !== MarkSideType.LEFT) { 81 | checkSpaceAfter(lastInsideToken, '', QUOTATION_NOSPACE_INSIDE) 82 | } 83 | 84 | // 1.3 left-quotation x right-quotation 85 | if (!firstInsdieToken) { 86 | checkInnerSpaceBefore(token, '', QUOTATION_NOSPACE_INSIDE) 87 | } 88 | } 89 | 90 | // 2. space outside half/full quotation 91 | if ( 92 | typeof spaceOutsideHalfQuotationOption !== 'undefined' || 93 | noSpaceOutsideFullQuotationOption 94 | ) { 95 | // 2.1 right-quotation x left-quotation 96 | const contentTokenAfter = findNonCodeVisibleTokenAfter(group, token) 97 | if ( 98 | contentTokenAfter && 99 | contentTokenAfter.type === GroupTokenType.GROUP 100 | ) { 101 | const { spaceHost } = findWrappersBetween( 102 | group, 103 | token, 104 | contentTokenAfter 105 | ) 106 | if (spaceHost) { 107 | const fullWidth = 108 | isFullWidth(token.modifiedEndValue, adjustedFullWidthOption) || 109 | isFullWidth( 110 | contentTokenAfter.modifiedStartValue, 111 | adjustedFullWidthOption 112 | ) 113 | // 2.1.1 right-full-quotation x left-full-quotation 114 | // 2.1.2 right-half-quotation x left-half-quotation 115 | if (fullWidth) { 116 | if (noSpaceOutsideFullQuotationOption) { 117 | checkSpaceAfter(spaceHost, '', QUOTATION_SPACE_OUTSIDE) 118 | } 119 | } else { 120 | if (typeof spaceOutsideHalfQuotationOption !== 'undefined') { 121 | const spaceAfter = spaceOutsideHalfQuotationOption ? ' ' : '' 122 | const message = spaceOutsideHalfQuotationOption 123 | ? QUOTATION_SPACE_OUTSIDE 124 | : QUOTATION_NOSPACE_OUTSIDE 125 | checkSpaceAfter(spaceHost, spaceAfter, message) 126 | } 127 | } 128 | } 129 | } 130 | 131 | // 2.2 content/code x left-quotation 132 | const contentTokenBefore = findNonCodeVisibleTokenBefore(group, token) 133 | if ( 134 | contentTokenBefore && 135 | (isLetterType(contentTokenBefore.type) || 136 | contentTokenBefore.type === HyperTokenType.CODE_CONTENT) 137 | ) { 138 | const { spaceHost } = findWrappersBetween( 139 | group, 140 | contentTokenBefore, 141 | token 142 | ) 143 | if (spaceHost) { 144 | const fullWidth = isFullWidth( 145 | token.modifiedStartValue, 146 | adjustedFullWidthOption 147 | ) 148 | 149 | // 2.2.1 content/code x left-full-quotation 150 | // 2.2.2 content/code x left-half-quotation 151 | if (fullWidth) { 152 | if (noSpaceOutsideFullQuotationOption) { 153 | checkSpaceAfter(spaceHost, '', QUOTATION_NOSPACE_OUTSIDE) 154 | } 155 | } else { 156 | if (typeof spaceOutsideHalfQuotationOption !== 'undefined') { 157 | const spaceAfter = spaceOutsideHalfQuotationOption ? ' ' : '' 158 | const message = spaceOutsideHalfQuotationOption 159 | ? QUOTATION_SPACE_OUTSIDE 160 | : QUOTATION_NOSPACE_OUTSIDE 161 | checkSpaceAfter(spaceHost, spaceAfter, message) 162 | } 163 | } 164 | } 165 | } 166 | 167 | // 2.3 right-quotation x content/code 168 | if ( 169 | contentTokenAfter && 170 | (isLetterType(contentTokenAfter.type) || 171 | contentTokenAfter.type === HyperTokenType.CODE_CONTENT) 172 | ) { 173 | const { spaceHost } = findWrappersBetween( 174 | group, 175 | token, 176 | contentTokenAfter 177 | ) 178 | if (spaceHost) { 179 | const fullWidth = isFullWidth( 180 | token.modifiedEndValue, 181 | adjustedFullWidthOption 182 | ) 183 | 184 | // 2.3.1 right-full-quotation x content/code 185 | // 2.3.2 right-half-quotation x content/code 186 | if (fullWidth) { 187 | if (noSpaceOutsideFullQuotationOption) { 188 | checkSpaceAfter(spaceHost, '', QUOTATION_NOSPACE_OUTSIDE) 189 | } 190 | } else { 191 | if (typeof spaceOutsideHalfQuotationOption !== 'undefined') { 192 | const spaceAfter = spaceOutsideHalfQuotationOption ? ' ' : '' 193 | const message = spaceOutsideHalfQuotationOption 194 | ? QUOTATION_SPACE_OUTSIDE 195 | : QUOTATION_NOSPACE_OUTSIDE 196 | checkSpaceAfter(spaceHost, spaceAfter, message) 197 | } 198 | } 199 | } 200 | } 201 | } 202 | } 203 | } 204 | 205 | export const defaultConfig: Options = { 206 | spaceOutsideHalfwidthQuotation: true, 207 | noSpaceInsideQuotation: true, 208 | noSpaceOutsideFullwidthQuotation: true 209 | } 210 | 211 | export default generateHandler 212 | -------------------------------------------------------------------------------- /src/rules/space-trim.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileoverview 3 | * 4 | * This rule is triming spaces of the whole string. 5 | * 6 | * Options 7 | * - trimSpace: boolean | undefined 8 | */ 9 | 10 | import { Handler, MutableGroupToken, MutableToken } from '../parser/index.js' 11 | import { TRIM_SPACE } from './messages.js' 12 | import { 13 | checkInnerSpaceBefore, 14 | checkSpaceAfter, 15 | findVisibleTokenBefore, 16 | findConnectedWrappers, 17 | isWrapper, 18 | Options 19 | } from './util.js' 20 | 21 | const generateHandler = (options: Options): Handler => { 22 | const trimSpaceOption = options?.trimSpace 23 | 24 | return (token: MutableToken, index: number, group: MutableGroupToken) => { 25 | if (!trimSpaceOption) { 26 | return 27 | } 28 | 29 | // make sure it's the whole string 30 | if (!group.startValue && index === 0) { 31 | // remove inner space before 32 | if (group.modifiedInnerSpaceBefore) { 33 | checkInnerSpaceBefore(group, '', TRIM_SPACE) 34 | } 35 | 36 | // remove all spaces after beginning marks 37 | if (isWrapper(token)) { 38 | findConnectedWrappers(group, token).forEach((x) => 39 | checkSpaceAfter(x, '', TRIM_SPACE) 40 | ) 41 | } 42 | 43 | // get last visible content token -> remove all spaces after 44 | const lastToken = group[group.length - 1] 45 | if (lastToken) { 46 | // 1. last token is a mark -> find last visible content token 47 | // 2. last token is visible content 48 | if (isWrapper(lastToken)) { 49 | const lastContentToken = findVisibleTokenBefore(group, token) 50 | if (lastContentToken) { 51 | findConnectedWrappers(group, lastToken).forEach((x) => 52 | checkSpaceAfter(x, '', TRIM_SPACE) 53 | ) 54 | checkSpaceAfter(lastContentToken, '', TRIM_SPACE) 55 | } 56 | } else { 57 | checkSpaceAfter(lastToken, '', TRIM_SPACE) 58 | } 59 | } 60 | } 61 | } 62 | } 63 | 64 | export const defaultConfig: Options = { 65 | trimSpace: true 66 | } 67 | 68 | export default generateHandler 69 | -------------------------------------------------------------------------------- /src/run.ts: -------------------------------------------------------------------------------- 1 | import type { 2 | ParsedBlock, 3 | ParsedStatus, 4 | ParserIgnoredCase 5 | } from './hypers/types.js' 6 | import type { Validation } from './report.js' 7 | import type { NormalizedOptions, Options } from './options.js' 8 | import type { Config } from './rc/index.js' 9 | import type { IgnoredCase } from './ignore.js' 10 | import type { Piece } from './replace-block.js' 11 | 12 | import { normalizeOptions, normalizeConfig } from './options.js' 13 | import { MutableToken, parse, toMutableResult, travel } from './parser/index.js' 14 | import generateHandlers from './rules/index.js' 15 | import findIgnoredMarks from './ignore.js' 16 | import join from './join.js' 17 | import replaceBlocks from './replace-block.js' 18 | 19 | export type { Options } from './options.js' 20 | 21 | export type DebugInfo = { 22 | pieces: Piece[] 23 | blocks: ParsedBlock[] 24 | ignoredCases: IgnoredCase[] 25 | ignoredByParsers: ParserIgnoredCase[] 26 | ignoredTokens: MutableToken[] 27 | parserErrors: Validation[] 28 | ruleErrors: Validation[] 29 | ignoredRuleErrors: Validation[] 30 | } 31 | 32 | export type Result = { 33 | file?: string 34 | disabled?: boolean 35 | origin: string 36 | result: string 37 | validations: Validation[] 38 | __debug__?: DebugInfo 39 | } 40 | 41 | export const run = (str: string, options: Options = {}): Result => { 42 | const normalizedOptions = normalizeOptions(options) 43 | return lint(str, normalizedOptions) 44 | } 45 | 46 | export const runWithConfig = (str: string, config: Config): Result => { 47 | const normalizedOptions = normalizeConfig(config) 48 | return lint(str, normalizedOptions) 49 | } 50 | 51 | const lint = (str: string, normalizedOptions: NormalizedOptions): Result => { 52 | // return if the file is totally ignored 53 | const disabledMatcher = //g 54 | if (str.match(disabledMatcher)) { 55 | return { origin: str, result: str, validations: [], disabled: true } 56 | } 57 | 58 | const { logger, ignoredCases, rules, hyperParse } = normalizedOptions 59 | 60 | // init status 61 | // str -> ignoredByRules, ignoredByParsers 62 | // blocks -> marks, ignoredMarks 63 | const status: ParsedStatus = { 64 | value: str, 65 | modifiedValue: str, 66 | ignoredByRules: ignoredCases, 67 | ignoredByParsers: [], 68 | blocks: [ 69 | { 70 | value: str, 71 | marks: [], 72 | start: 0, 73 | end: str.length - 1 74 | } 75 | ] 76 | } 77 | 78 | const ignoredTokens: MutableToken[] = [] 79 | const parserErrors: Validation[] = [] 80 | const ruleErrors: Validation[] = [] 81 | const ignoredRuleErrors: Validation[] = [] 82 | 83 | // Run all the hyper parsers 84 | const parsedStatus = hyperParse.reduce( 85 | (current, parse) => parse(current), 86 | status 87 | ) 88 | 89 | // 1. Parse each block without ignoredByParsers 90 | // 2. Parse all ignoredByRules into marks for each block 91 | // 3. Run all rule processes for each block 92 | // 4. Join all tokens with ignoredMarks and all errors for each block 93 | // 5. Replace each block back to the string 94 | const ruleHandlers = generateHandlers(rules) 95 | const modifiedBlocks: ParsedBlock[] = parsedStatus.blocks.map( 96 | ({ value, marks, start, end }) => { 97 | let lastValue = value 98 | 99 | if (globalThis.__DEV__) { 100 | logger.log('[Original block value]') 101 | logger.log(lastValue) 102 | } 103 | 104 | const result = toMutableResult(parse(value, marks), rules) 105 | parserErrors.push(...result.errors) 106 | 107 | const ignoredMarks = findIgnoredMarks( 108 | value, 109 | status.ignoredByRules, 110 | logger 111 | ) 112 | 113 | ruleHandlers.forEach((rule) => { 114 | travel(result.tokens, rule) 115 | if (globalThis.__DEV__) { 116 | const currentValue = join( 117 | result.tokens, 118 | start, 119 | ignoredMarks, 120 | [], 121 | [], 122 | [] 123 | ) 124 | if (lastValue !== currentValue) { 125 | logger.log(`[After process by ${rule.name}]`) 126 | logger.log(currentValue) 127 | } 128 | lastValue = currentValue 129 | } 130 | }) 131 | 132 | lastValue = join( 133 | result.tokens, 134 | start, 135 | ignoredMarks, 136 | ignoredTokens, 137 | ruleErrors, 138 | ignoredRuleErrors 139 | ) 140 | 141 | if (globalThis.__DEV__) { 142 | logger.log('[Eventual block value]') 143 | logger.log(lastValue + '\n') 144 | } 145 | 146 | return { 147 | ...result, 148 | start, 149 | end, 150 | value: lastValue, 151 | originValue: value 152 | } 153 | } 154 | ) 155 | 156 | const result = replaceBlocks(str, modifiedBlocks) 157 | 158 | const debugInfo: DebugInfo = { 159 | pieces: result.pieces, 160 | blocks: modifiedBlocks, 161 | ignoredCases: parsedStatus.ignoredByRules, 162 | ignoredByParsers: parsedStatus.ignoredByParsers, 163 | ignoredTokens, 164 | parserErrors, 165 | ruleErrors, 166 | ignoredRuleErrors 167 | } 168 | 169 | return { 170 | origin: str, 171 | result: result.value, 172 | validations: [...parserErrors, ...ruleErrors], 173 | __debug__: debugInfo 174 | } 175 | } 176 | 177 | export default run 178 | -------------------------------------------------------------------------------- /test/debug.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, test } from 'vitest' 2 | 3 | import fs from 'fs' 4 | import path from 'path' 5 | import run from '../src/run.js' 6 | 7 | describe.skip('debug', () => { 8 | test('ignore HTML comment', () => { 9 | const input = fs.readFileSync( 10 | path.resolve(__dirname, './example-debug.md'), 11 | { encoding: 'utf8' } 12 | ) 13 | const output = run(input) 14 | const { origin, result, __debug__ } = output 15 | const { pieces, blocks } = __debug__ || {} 16 | console.log({ 17 | origin, 18 | result, 19 | pieces 20 | }) 21 | blocks?.forEach((block) => { 22 | console.log(block.tokens) 23 | }) 24 | }) 25 | }) 26 | -------------------------------------------------------------------------------- /test/example-article.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 介绍 3 | type: guide 4 | order: 2 5 | --- 6 | 7 | ## Vue.js 是什么 8 | 9 | 10 | 11 | Vue (读音 /vjuː/,类似于 **view**) 是一套用于构建用户界面的**渐进式框架**。与其它大型框架不同的是,Vue 被设计为可以自底向上逐层应用。Vue 的核心库只关注视图层,不仅易于上手,还便于与第三方库或既有项目整合。另一方面,当与[现代化的工具链](single-file-components.html)以及各种[支持类库](https://github.com/vuejs/awesome-vue#libraries--plugins)结合使用时,Vue 也完全能够为复杂的单页应用提供驱动。 12 | 13 | 如果你想在深入学习 Vue 之前对它有更多了解,我们制作了一个视频,带您了解其核心概念和一个示例工程。 14 | 15 | 16 | 如果你已经是有经验的前端开发者,想知道 Vue 与其它库/框架有哪些区别,请查看[对比其它框架](comparison.html)。 17 | 18 | ## 起步 19 | 20 | 21 | 22 |

官方指南假设你已了解关于 HTML、CSS 和 JavaScript 的中级知识。如果你刚开始学习前端开发,将框架作为你的第一步可能不是最好的主意——掌握好基础知识再来吧!之前有其它框架的使用经验会有帮助,但这不是必需的。

23 | 24 | 安装 25 | 26 | 尝试 Vue.js 最简单的方法是使用 [JSFiddle 上的 Hello World 例子](https://jsfiddle.net/chrisvfritz/50wL7mdz/)。你可以在浏览器新标签页中打开它,跟着例子学习一些基础用法。或者你也可以创建一个 .html 文件,然后通过如下方式引入 Vue: 27 | 28 | ``` html 29 | 30 | 31 | ``` 32 | 33 | 或者: 34 | 35 | ``` html 36 | 37 | 38 | ``` 39 | 40 | [安装教程](/guide/installation.html)给出了更多安装 Vue 的方式。请注意我们**不推荐**新手直接使用 `vue-cli`,尤其是在你还不熟悉基于 Node.js 的构建工具时。 41 | 42 | 如果你喜欢交互式的东西,你也可以查阅[这个 Scrimba 上的系列教程](https://scrimba.com/g/gvuedocs),它揉合了录屏和代码试验田,并允许你随时暂停和播放。 43 | 44 | ## 声明式渲染 45 | 46 | 47 | 48 | Vue.js 的核心是一个允许采用简洁的模板语法来声明式地将数据渲染进 DOM 的系统: 49 | 50 | ``` html 51 |
52 | {{ message }} 53 |
54 | ``` 55 | ``` js 56 | var app = new Vue({ 57 | el: '#app', 58 | data: { 59 | message: 'Hello Vue!' 60 | } 61 | }) 62 | ``` 63 | {% raw %} 64 |
65 | {{ message }} 66 |
67 | 75 | {% endraw %} 76 | 77 | 我们已经成功创建了第一个 Vue 应用!看起来这跟渲染一个字符串模板非常类似,但是 Vue 在背后做了大量工作。现在数据和 DOM 已经被建立了关联,所有东西都是**响应式的**。我们要怎么确认呢?打开你的浏览器的 JavaScript 控制台 (就在这个页面打开),并修改 `app.message` 的值,你将看到上例相应地更新。 78 | 79 | 除了文本插值,我们还可以像这样来绑定元素特性: 80 | 81 | ``` html 82 |
83 | 84 | 鼠标悬停几秒钟查看此处动态绑定的提示信息! 85 | 86 |
87 | ``` 88 | ``` js 89 | var app2 = new Vue({ 90 | el: '#app-2', 91 | data: { 92 | message: '页面加载于 ' + new Date().toLocaleString() 93 | } 94 | }) 95 | ``` 96 | {% raw %} 97 |
98 | 99 | 鼠标悬停几秒钟查看此处动态绑定的提示信息! 100 | 101 |
102 | 110 | {% endraw %} 111 | 112 | 这里我们遇到了一点新东西。你看到的 `v-bind` 特性被称为**指令**。指令带有前缀 `v-`,以表示它们是 Vue 提供的特殊特性。可能你已经猜到了,它们会在渲染的 DOM 上应用特殊的响应式行为。在这里,该指令的意思是:“将这个元素节点的 `title` 特性和 Vue 实例的 `message` 属性保持一致”。 113 | 114 | 如果你再次打开浏览器的 JavaScript 控制台,输入 `app2.message = '新消息'`,就会再一次看到这个绑定了 `title` 特性的 HTML 已经进行了更新。 115 | 116 | ## 条件与循环 117 | 118 | 119 | 120 | 控制切换一个元素是否显示也相当简单: 121 | 122 | ``` html 123 |
124 |

现在你看到我了

125 |
126 | ``` 127 | ``` js 128 | var app3 = new Vue({ 129 | el: '#app-3', 130 | data: { 131 | seen: true 132 | } 133 | }) 134 | ``` 135 | {% raw %} 136 |
137 | 现在你看到我了 138 |
139 | 147 | {% endraw %} 148 | 149 | 继续在控制台输入 `app3.seen = false`,你会发现之前显示的消息消失了。 150 | 151 | 这个例子演示了我们不仅可以把数据绑定到 DOM 文本或特性,还可以绑定到 DOM **结构**。此外,Vue 也提供一个强大的过渡效果系统,可以在 Vue 插入/更新/移除元素时自动应用[过渡效果](transitions.html)。 152 | 153 | 还有其它很多指令,每个都有特殊的功能。例如,`v-for` 指令可以绑定数组的数据来渲染一个项目列表: 154 | 155 | ``` html 156 |
157 |
    158 |
  1. 159 | {{ todo.text }} 160 |
  2. 161 |
162 |
163 | ``` 164 | ``` js 165 | var app4 = new Vue({ 166 | el: '#app-4', 167 | data: { 168 | todos: [ 169 | { text: '学习 JavaScript' }, 170 | { text: '学习 Vue' }, 171 | { text: '整个牛项目' } 172 | ] 173 | } 174 | }) 175 | ``` 176 | {% raw %} 177 |
178 |
    179 |
  1. 180 | {{ todo.text }} 181 |
  2. 182 |
183 |
184 | 196 | {% endraw %} 197 | 198 | 在控制台里,输入 `app4.todos.push({ text: '新项目' })`,你会发现列表最后添加了一个新项目。 199 | 200 | ## 处理用户输入 201 | 202 | 203 | 204 | 为了让用户和你的应用进行交互,我们可以用 `v-on` 指令添加一个事件监听器,通过它调用在 Vue 实例中定义的方法: 205 | 206 | ``` html 207 |
208 |

{{ message }}

209 | 210 |
211 | ``` 212 | ``` js 213 | var app5 = new Vue({ 214 | el: '#app-5', 215 | data: { 216 | message: 'Hello Vue.js!' 217 | }, 218 | methods: { 219 | reverseMessage: function () { 220 | this.message = this.message.split('').reverse().join('') 221 | } 222 | } 223 | }) 224 | ``` 225 | {% raw %} 226 |
227 |

{{ message }}

228 | 229 |
230 | 243 | {% endraw %} 244 | 245 | 注意在 `reverseMessage` 方法中,我们更新了应用的状态,但没有触碰 DOM——所有的 DOM 操作都由 Vue 来处理,你编写的代码只需要关注逻辑层面即可。 246 | 247 | Vue 还提供了 `v-model` 指令,它能轻松实现表单输入和应用状态之间的双向绑定。 248 | 249 | ``` html 250 |
251 |

{{ message }}

252 | 253 |
254 | ``` 255 | ``` js 256 | var app6 = new Vue({ 257 | el: '#app-6', 258 | data: { 259 | message: 'Hello Vue!' 260 | } 261 | }) 262 | ``` 263 | {% raw %} 264 |
265 |

{{ message }}

266 | 267 |
268 | 276 | {% endraw %} 277 | 278 | ## 组件化应用构建 279 | 280 | 281 | 282 | 组件系统是 Vue 的另一个重要概念,因为它是一种抽象,允许我们使用小型、独立和通常可复用的组件构建大型应用。仔细想想,几乎任意类型的应用界面都可以抽象为一个组件树: 283 | 284 | ![Component Tree](/images/components.png) 285 | 286 | 在 Vue 里,一个组件本质上是一个拥有预定义选项的一个 Vue 实例。在 Vue 中注册组件很简单: 287 | 288 | ``` js 289 | // 定义名为 todo-item 的新组件 290 | Vue.component('todo-item', { 291 | template: '
  • 这是个待办项
  • ' 292 | }) 293 | 294 | var app = new Vue(...) 295 | ``` 296 | 297 | 现在你可以用它构建另一个组件模板: 298 | 299 | ``` html 300 |
      301 | 302 | 303 |
    304 | ``` 305 | 306 | 但是这样会为每个待办项渲染同样的文本,这看起来并不炫酷。我们应该能从父作用域将数据传到子组件才对。让我们来修改一下组件的定义,使之能够接受一个 [prop](components.html#通过-Prop-向子组件传递数据): 307 | 308 | ``` js 309 | Vue.component('todo-item', { 310 | // todo-item 组件现在接受一个 311 | // "prop",类似于一个自定义特性。 312 | // 这个 prop 名为 todo。 313 | props: ['todo'], 314 | template: '
  • {{ todo.text }}
  • ' 315 | }) 316 | ``` 317 | 318 | 现在,我们可以使用 `v-bind` 指令将待办项传到循环输出的每个组件中: 319 | 320 | ``` html 321 |
    322 |
      323 | 329 | 334 |
    335 |
    336 | ``` 337 | 338 | ``` js 339 | Vue.component('todo-item', { 340 | props: ['todo'], 341 | template: '
  • {{ todo.text }}
  • ' 342 | }) 343 | 344 | var app7 = new Vue({ 345 | el: '#app-7', 346 | data: { 347 | groceryList: [ 348 | { id: 0, text: '蔬菜' }, 349 | { id: 1, text: '奶酪' }, 350 | { id: 2, text: '随便其它什么人吃的东西' } 351 | ] 352 | } 353 | }) 354 | ``` 355 | {% raw %} 356 |
    357 |
      358 | 359 |
    360 |
    361 | 377 | {% endraw %} 378 | 379 | 尽管这只是一个刻意设计的例子,但是我们已经设法将应用分割成了两个更小的单元。子单元通过 prop 接口与父单元进行了良好的解耦。我们现在可以进一步改进 `` 组件,提供更为复杂的模板和逻辑,而不会影响到父单元。 380 | 381 | 在一个大型应用中,有必要将整个应用程序划分为组件,以使开发更易管理。在[后续教程](components.html)中我们将详述组件,不过这里有一个 (假想的) 例子,以展示使用了组件的应用模板是什么样的: 382 | 383 | ``` html 384 |
    385 | 386 | 387 | 388 | 389 | 390 |
    391 | ``` 392 | 393 | ### 与自定义元素的关系 394 | 395 | 你可能已经注意到 Vue 组件非常类似于**自定义元素**——它是 [Web 组件规范](https://www.w3.org/wiki/WebComponents/)的一部分,这是因为 Vue 的组件语法部分参考了该规范。例如 Vue 组件实现了 [Slot API](https://github.com/w3c/webcomponents/blob/gh-pages/proposals/Slots-Proposal.md) 与 `is` 特性。但是,还是有几个关键差别: 396 | 397 | 1. Web Components 规范已经完成并通过,但未被所有浏览器原生实现。目前 Safari 10.1+、Chrome 54+ 和 Firefox 63+ 原生支持 Web Components。相比之下,Vue 组件不需要任何 polyfill,并且在所有支持的浏览器 (IE9 及更高版本) 之下表现一致。必要时,Vue 组件也可以包装于原生自定义元素之内。 398 | 399 | 2. Vue 组件提供了纯自定义元素所不具备的一些重要功能,最突出的是跨组件数据流、自定义事件通信以及构建工具集成。 400 | 401 | 虽然 Vue 内部没有使用自定义元素,不过在应用使用自定义元素、或以自定义元素形式发布时,[依然有很好的互操作性](https://custom-elements-everywhere.com/#vue)。Vue CLI 也支持将 Vue 组件构建成为原生的自定义元素。 402 | 403 | ## 准备好了吗? 404 | 405 | 我们刚才简单介绍了 Vue 核心最基本的功能——本教程的其余部分将更加详细地涵盖这些功能以及其它高级功能,所以请务必读完整个教程! 406 | 407 | 408 | -------------------------------------------------------------------------------- /test/example-debug.md: -------------------------------------------------------------------------------- 1 | hello world 2 | -------------------------------------------------------------------------------- /test/example-disabled.md: -------------------------------------------------------------------------------- 1 | 2 | text before (text inside) text after 3 | 4 | 5 | vm.$on( event, callback ) 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /test/example-ignore.md: -------------------------------------------------------------------------------- 1 | 2 | text before (text inside) text after 3 | 4 | 5 | vm.$on( event, callback ) 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /test/example-units-fixed.md: -------------------------------------------------------------------------------- 1 | mark-raw:a `b` c `d` e `f` g `h` i 中文 2 | 3 | `TODO: should be mark-type:a__[b](x)__c __[d](y)__ e 中文` 4 | mark-type:a__[b](x)__c__ [d](y) __e 中文 5 | 6 | unify-punctuation:中文,中文 (中文) 中文 ‘中文’ 中文 “中文” 中文 (中文)(中文) 中文 (中文)。 7 | 8 | case-abbr:Pure JavaScript (a.k.a. Vanilla) 中文 9 | 10 | case-html-entity:中文< & >中文 11 | 12 | space-punctuation:中文。中文 (中文) 中文。中文。中文 (中文) 中文。 13 | 14 | case-math-exp:1+1=2 1 + 1 = 2 1 + 2=3 2020/01/01 2020-01-01 vue-custom-element 100% a/b Chrome 53+ 中文 15 | 16 | `TODO: should be case-backslash:a \# b 中文\# __中文__ \# 中文 __\#__ __中文__\#中文__\#__` 17 | case-backslash:a \# b 中文\# __中文__ \# 中文 __\#__ __中文__\#中文 __\#__ 18 | 19 | space-brackets:(x)a(b)c (d) e (f) g (h) i (j) k (l) m __(a)__ b (__c__) d(e) 中文 20 | 21 | space-quotations:a “hello world” b 中文 22 | 23 | case-traditional:a “b ‘c’ d” e 中文 24 | 25 | case-datetime:2020/01/02 01:20:30 中文 2020 年1月1日0天0号0时0分00秒 26 | 27 | case-ellipsis:中文...中文...a...b... 中文 ... 中文 ... a ... b ... 28 | 29 | case-raw:`AC`/`DC` 中文 30 | 31 | case-linebreak: 32 | this is 33 | a 34 | multiline 35 | text 中文 36 | 37 | 3 minite(s) left 中文 38 | 39 | 😉  中文 40 | -------------------------------------------------------------------------------- /test/example-units.md: -------------------------------------------------------------------------------- 1 | mark-raw:a `b` c `d`e`f` g`h`i 中文 2 | 3 | `TODO: should be mark-type:a__[b](x)__c __[d](y)__ e 中文` 4 | mark-type:a__[b](x)__c__[ d ](y)__e 中文 5 | 6 | unify-punctuation:中文,中文 (中文) 中文'中文'中文"中文"中文 (中文)(中文)中文 (中文)。 7 | 8 | case-abbr:Pure JavaScript (a.k.a. Vanilla) 中文 9 | 10 | case-html-entity:中文< & >中文 11 | 12 | space-punctuation:中文 。 中文(中文)中文。中文 . 中文(中文)中文. 13 | 14 | case-math-exp:1+1=2 1 + 1 = 2 1 + 2=3 2020/01/01 2020-01-01 vue-custom-element 100% a/b Chrome 53+ 中文 15 | 16 | `TODO: should be case-backslash:a \# b 中文\# __中文__ \# 中文 __\#__ __中文__\#中文__\#__` 17 | case-backslash:a \# b 中文\# __中文__ \# 中文 __\#__ __中文__\#中文__\#__ 18 | 19 | space-brackets:(x)a(b)c (d )e( f) g ( h ) i(j)k (l) m __( a )__ b( __c__ )d(e) 中文 20 | 21 | space-quotations: a " hello world " b 中文 22 | 23 | case-traditional:a「b『c』d」e 中文 24 | 25 | case-datetime:2020/01/02 01:20:30 中文 2020 年1月1日0天0号0时0分00秒 26 | 27 | case-ellipsis:中文...中文...a...b... 中文 ... 中文 ... a ... b ... 28 | 29 | case-raw:`AC`/`DC` 中文 30 | 31 | case-linebreak: 32 | this is 33 | a 34 | multiline 35 | text 中文 36 | 37 | 3 minite(s) left 中文 38 | 39 | 😉  中文 40 | -------------------------------------------------------------------------------- /test/example-vuepress-fixed.md: -------------------------------------------------------------------------------- 1 | ::: warning 警告 2 | 自动在中文和 English 之间加入空格 3 | ::: 4 | 自动在中文和 English 之间加入空格 5 | ::: warning 警告 6 | 自动在中文和 English 之间加入空格 7 | ::: 8 | 自动在中文和 English 之间加入空格 9 | ::: warning 警告 10 | 自动在中文和 English 之间加入空格 11 | ::: -------------------------------------------------------------------------------- /test/example-vuepress.md: -------------------------------------------------------------------------------- 1 | ::: warning 警告 2 | 自动在中文和English之间加入空格 3 | ::: 4 | 自动在中文和English之间加入空格 5 | ::: warning 警告 6 | 自动在中文和English之间加入空格 7 | ::: 8 | 自动在中文和English之间加入空格 9 | ::: warning 警告 10 | 自动在中文和English之间加入空格 11 | ::: -------------------------------------------------------------------------------- /test/examples.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, test, expect } from 'vitest' 2 | 3 | import fs from 'fs' 4 | import path from 'path' 5 | import run from '../src/run.js' 6 | import { options } from './prepare.js' 7 | 8 | const parsePosition = (str, index) => { 9 | const rows = str.split('\n') 10 | const rowLengthList = rows.map((substr) => substr.length) 11 | let row = 0 12 | let column = 0 13 | let line = '' 14 | while (index >= 0 && rows.length) { 15 | row++ 16 | column = index 17 | line = rows.shift() 18 | index -= rowLengthList.shift() + 1 19 | } 20 | return { 21 | offset: index, 22 | row, 23 | column, 24 | line 25 | } 26 | } 27 | 28 | // const expectedValidationsInfo = { 29 | // 1: [20, 21, 26, 29], 30 | // 3: [22, 25, 27, 34], 31 | // 5: [20, 24, 27, 31, 35, 37, 41, 44, 47, 48, 51, 55, 58], 32 | // 7: [], 33 | // 9: [], 34 | // 11: [20, 22, 25, 29, 34, 35, 39, 42, 45], 35 | // 13: [15, 16, 17, 18, 35, 36], 36 | // 15: [24, 53, 55, 57], 37 | // 17: [26, 30, 37, 39, 43, 45, 48, 50, 57, 59, 66, 72], 38 | // 19: [15, 30, 28], 39 | // 21: [18, 25, 20, 23], 40 | // 23: [36, 41], 41 | // 25: [32, 35, 39, 42, 46, 48, 52, 54], 42 | // 27: [] 43 | // } 44 | 45 | describe('combo lint', () => { 46 | test('rule units', () => { 47 | const input = fs.readFileSync( 48 | path.resolve(__dirname, './example-units.md'), 49 | { encoding: 'utf8' } 50 | ) 51 | const output = fs.readFileSync( 52 | path.resolve(__dirname, './example-units-fixed.md'), 53 | { encoding: 'utf8' } 54 | ) 55 | const { result, validations, disabled } = run(input, options) 56 | expect(result).toBe(output) 57 | expect(!disabled).toBeTruthy() 58 | const validationsByLine = {} 59 | validations.forEach((v) => { 60 | const { index, length, target } = v 61 | const finalIndex = 62 | target === 'spaceAfter' || target === 'endValue' 63 | ? index + length 64 | : index 65 | const { row, column } = parsePosition(input, finalIndex) 66 | validationsByLine[row] = validationsByLine[row] || {} 67 | validationsByLine[row][column] = v 68 | }) 69 | // Object.keys(expectedValidationsInfo).forEach((row) => { 70 | // const info = expectedValidationsInfo[row] 71 | // const lineValidations = validationsByLine[row] || {} 72 | // expect(Object.keys(lineValidations).length).toBe(info.length) 73 | // info.forEach((column) => expect(lineValidations[column]).toBeTruthy()) 74 | // }) 75 | }) 76 | test('ignore HTML comment', () => { 77 | const input = fs.readFileSync( 78 | path.resolve(__dirname, './example-ignore.md'), 79 | { encoding: 'utf8' } 80 | ) 81 | const { result, validations, disabled } = run(input, options) 82 | expect(result).toBe(input) 83 | expect(validations.length).toBe(0) 84 | expect(!disabled).toBeTruthy() 85 | }) 86 | test('disabled HTML comment', () => { 87 | const input = fs.readFileSync( 88 | path.resolve(__dirname, './example-disabled.md'), 89 | { encoding: 'utf8' } 90 | ) 91 | const { result, validations, disabled } = run(input, options) 92 | expect(result).toBe(input) 93 | expect(validations.length).toBe(0) 94 | expect(disabled).toBe(true) 95 | }) 96 | test('support vuepress-special syntax', () => { 97 | const input = fs.readFileSync( 98 | path.resolve(__dirname, './example-vuepress.md'), 99 | { encoding: 'utf8' } 100 | ) 101 | const output = fs.readFileSync( 102 | path.resolve(__dirname, './example-vuepress-fixed.md'), 103 | { encoding: 'utf8' } 104 | ) 105 | const { result, validations } = run(input, options) 106 | expect(result).toBe(output) 107 | expect(validations.length).toBe(10) 108 | }) 109 | test('vuejs guide article', () => { 110 | const input = fs.readFileSync( 111 | path.resolve(__dirname, './example-article.md'), 112 | { encoding: 'utf8' } 113 | ) 114 | expect(run(input, options).result).toBe(input) 115 | }) 116 | }) 117 | -------------------------------------------------------------------------------- /test/hexo.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, test, expect } from 'vitest' 2 | 3 | import run from '../src/run.js' 4 | import { options } from './prepare.js' 5 | 6 | const getOutput = (str: string) => run(str, options).result 7 | 8 | describe('hexo lint', () => { 9 | test('[hexo] one-line raw', () => { 10 | expect(getOutput('`_x_` {% raw %}hello{% endraw %}')).toBe( 11 | '`_x_` {% raw %}hello{% endraw %}' 12 | ) 13 | }) 14 | 15 | test('[hexo] multiline raw', () => { 16 | expect(getOutput(`{% raw %}\n\n{% endraw %}`)).toBe( 17 | `{% raw %}\n\n{% endraw %}` 18 | ) 19 | }) 20 | 21 | test('[hexo] codeblock', () => { 22 | expect( 23 | getOutput( 24 | `{% codeblock lang:js %}\nalias: [‘/manage’ ,‘/administer’ ,‘/administrate’ ]\n{% endcodeblock %}` 25 | ) 26 | ).toBe( 27 | `{% codeblock lang:js %}\nalias: [‘/manage’ ,‘/administer’ ,‘/administrate’ ]\n{% endcodeblock %}` 28 | ) 29 | }) 30 | }) 31 | -------------------------------------------------------------------------------- /test/lint.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, test, expect } from 'vitest' 2 | 3 | import run, { Options } from '../src/run.js' 4 | import { options } from './prepare.js' 5 | 6 | const getOutput = (...args: [string, Options?]) => run(...args).result 7 | 8 | describe('lint with different arguments', () => { 9 | test('ignored cases', () => { 10 | expect( 11 | getOutput('汉字和English之间需要有空格比如 half width content。', { 12 | ...options, 13 | ignoredCases: [{ textStart: '和English之间' }] 14 | }) 15 | ).toBe('汉字和English之间需要有空格比如 half width content。') 16 | }) 17 | test('ignored cases from Vue docs', () => { 18 | const output = run('# SSR? {#ssr}', { 19 | ...options, 20 | ignoredCases: [{ textStart: '? {#' }] 21 | }) 22 | expect(output.result).toBe('# SSR? {#ssr}') 23 | expect(output.validations.length).toBe(0) 24 | }) 25 | }) 26 | -------------------------------------------------------------------------------- /test/md.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, test, expect } from 'vitest' 2 | 3 | import run from '../src/run.js' 4 | import markdownParser from '../src/hypers/md.js' 5 | import { ParsedStatus } from '../src/hypers/types.js' 6 | import { options } from './prepare.js' 7 | 8 | const getOutput = (str: string) => run(str, options).result 9 | 10 | describe('parser with markdown', () => { 11 | test('[md parser] single paragraph', () => { 12 | const text = 'X [xxx](xxx) X *y* __x__ `ss` _0_ ~~asd~~ *asf**asf**adsf*' 13 | const data: ParsedStatus = { 14 | value: text, 15 | modifiedValue: text, 16 | ignoredByRules: [], 17 | ignoredByParsers: [], 18 | blocks: [ 19 | { 20 | value: text, 21 | marks: [], 22 | start: 0, 23 | end: text.length - 1 24 | } 25 | ] 26 | } 27 | const result = markdownParser(data).blocks 28 | const marks = [ 29 | { 30 | type: 'hyper', 31 | meta: 'link', 32 | startIndex: 2, 33 | startValue: '[', 34 | endIndex: 6, 35 | endValue: '](xxx)' 36 | }, 37 | { 38 | type: 'hyper', 39 | meta: 'emphasis', 40 | startIndex: 15, 41 | startValue: '*', 42 | endIndex: 17, 43 | endValue: '*' 44 | }, 45 | { 46 | type: 'hyper', 47 | meta: 'strong', 48 | startIndex: 19, 49 | startValue: '__', 50 | endIndex: 22, 51 | endValue: '__' 52 | }, 53 | { 54 | type: 'raw', 55 | meta: 'inlineCode', 56 | startIndex: 25, 57 | endIndex: 29, 58 | startValue: '`ss`', 59 | endValue: '' 60 | }, 61 | { 62 | type: 'hyper', 63 | meta: 'emphasis', 64 | startIndex: 30, 65 | startValue: '_', 66 | endIndex: 32, 67 | endValue: '_' 68 | }, 69 | { 70 | type: 'hyper', 71 | meta: 'delete', 72 | startIndex: 34, 73 | startValue: '~~', 74 | endIndex: 39, 75 | endValue: '~~' 76 | }, 77 | { 78 | type: 'hyper', 79 | meta: 'emphasis', 80 | startIndex: 42, 81 | startValue: '*', 82 | endIndex: 57, 83 | endValue: '*' 84 | }, 85 | { 86 | type: 'hyper', 87 | meta: 'strong', 88 | startIndex: 46, 89 | startValue: '**', 90 | endIndex: 51, 91 | endValue: '**' 92 | } 93 | ] 94 | expect(result.length).toBe(1) 95 | expect(result[0].value).toBe(text) 96 | expect(result[0].marks).toEqual(marks) 97 | }) 98 | }) 99 | 100 | describe('markdown lint', () => { 101 | test('[md] single paragraph', () => { 102 | expect(getOutput('中文 X[ xxx ](xxx)X`hello`world')).toBe( 103 | '中文 X [xxx](xxx) X `hello` world' 104 | ) 105 | }) 106 | test('[md] frontmatter', () => { 107 | expect( 108 | getOutput('---\ntitle: 介绍\ntype: guide\norder: 2\n---\n## Vue 是什么\n') 109 | ).toBe('---\ntitle: 介绍\ntype: guide\norder: 2\n---\n## Vue 是什么\n') 110 | }) 111 | test('[md] space between raw content', () => { 112 | // 我们 制作了一个视频 113 | expect( 114 | getOutput('我们制作了一个视频') 115 | ).toBe('我们制作了一个视频') 116 | }) 117 | test('[md] space between raw content 2', () => { 118 | // 我们 制作了一个视频 119 | expect( 120 | getOutput('Hello制作了一个视频World') 121 | ).toBe('Hello 制作了一个视频 World') 122 | }) 123 | test('[md] space between raw content 3', () => { 124 | // 创建一个 。 html 文件 125 | expect(getOutput('创建一个 .html 文件')).toBe( 126 | '创建一个 .html 文件' 127 | ) 128 | }) 129 | test('[md] raw content', () => { 130 | // {% raw %}
    ...
    {% raw %} 131 | expect( 132 | getOutput( 133 | '中文 {% raw %}\n
    ...
    \n{% raw %}' 134 | ) 135 | ).toBe('中文 {% raw %}\n
    ...
    \n{% raw %}') 136 | }) 137 | test('[md] empty lines', () => { 138 | expect(getOutput('中文 a\n\nb\n\nc')).toBe('中文 a\n\nb\n\nc') 139 | }) 140 | test('[md] inline code', () => { 141 | expect(getOutput(`改进 \`\` 组件`)).toBe( 142 | `改进 \`\` 组件` 143 | ) 144 | }) 145 | test('[md] footnote + inline code at the end', () => { 146 | expect( 147 | getOutput( 148 | '这样写将始终添加 `errorClass`,但是只有在 `isActive` 是 truthy[[1]](#footnote-1) 时才添加 `activeClass`。' 149 | ) 150 | ).toBe( 151 | '这样写将始终添加 `errorClass`,但是只有在 `isActive` 是 truthy[[1]](#footnote-1) 时才添加 `activeClass`。' 152 | ) 153 | }) 154 | test('[md] space between "&" punctuation', () => { 155 | expect(getOutput('## 访问元素 & 组件')).toBe('## 访问元素 & 组件') 156 | }) 157 | test('[md] duplicated space outside hyper content', () => { 158 | expect( 159 | getOutput( 160 | '那么你可以通过 [`$forceUpdate`](../api/#vm-forceUpdate) 来做这件事。' 161 | ) 162 | ).toBe( 163 | '那么你可以通过 [`$forceUpdate`](../api/#vm-forceUpdate) 来做这件事。' 164 | ) 165 | }) 166 | test('[md] opposite side of hyper mark and bracket mark', () => { 167 | expect( 168 | getOutput( 169 | '注意 **`v-slot` 只能添加在 `