├── .eslintrc ├── .gitignore ├── .prettierrc.js ├── media └── logo.png ├── docs ├── .vuepress │ ├── public │ │ ├── logo.png │ │ └── favicon.ico │ └── config.js ├── doctor-jones │ ├── installation.md │ ├── intro.md │ └── usage.md ├── doctor-jones-extension │ └── README.md ├── cli │ └── README.md ├── README.md └── doctor-jones-loader │ └── README.md ├── .npmignore ├── .travis.yml ├── scripts ├── doc.sh └── release.sh ├── src ├── utils │ └── index.js ├── default-options.js ├── consts.js ├── syntax-parser │ ├── lexer.js │ └── index.js └── index.js ├── index.d.ts ├── LICENSE ├── rollup.config.js ├── package.json ├── bin └── dj ├── test └── index.js ├── CHANGELOG.md ├── README.md └── README_EN.md /.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "extends": ["doran"] 3 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | node_modules 3 | build 4 | dist 5 | coverage 6 | .nyc_output 7 | -------------------------------------------------------------------------------- /.prettierrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | semi: false, 3 | singleQuote: true 4 | } 5 | -------------------------------------------------------------------------------- /media/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Leopoldthecoder/doctor-jones/HEAD/media/logo.png -------------------------------------------------------------------------------- /docs/.vuepress/public/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Leopoldthecoder/doctor-jones/HEAD/docs/.vuepress/public/logo.png -------------------------------------------------------------------------------- /docs/.vuepress/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Leopoldthecoder/doctor-jones/HEAD/docs/.vuepress/public/favicon.ico -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .eslintrc 3 | .prettierrc.js 4 | .nyc_output 5 | .travis.yml 6 | coverage 7 | rollup.config.js 8 | build 9 | scripts 10 | test 11 | docs 12 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: stable 3 | script: ./node_modules/.bin/nyc ava -v 4 | cache: npm 5 | after_success: ./node_modules/.bin/nyc report --reporter=text-lcov | ./node_modules/.bin/coveralls 6 | -------------------------------------------------------------------------------- /docs/doctor-jones/installation.md: -------------------------------------------------------------------------------- 1 | ## CDN 2 | ```html 3 | 4 | ``` 5 | 6 | ## npm 7 | ```bash 8 | npm i doctor-jones -S 9 | ``` 10 | -------------------------------------------------------------------------------- /docs/doctor-jones-extension/README.md: -------------------------------------------------------------------------------- 1 | ## 介绍 2 | doctor-jones 的 Chrome 插件可以一键格式化你正在浏览的页面。 3 | 4 | ## 安装 5 | 6 | 从 [Chrome 网上应用店](https://chrome.google.com/webstore/detail/%E7%90%BC%E6%96%AF%E5%8C%BB%E7%94%9F/lggmpimhpmplkengmfmfecohbdbooiem)获取插件。 7 | -------------------------------------------------------------------------------- /scripts/doc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | set -e 4 | 5 | npm run docs:build 6 | 7 | cd docs/.vuepress/dist 8 | 9 | git init 10 | git add -A 11 | git commit -m 'deploy' 12 | 13 | git push -f git@github.com:Leopoldthecoder/doctor-jones.git master:gh-pages 14 | 15 | cd - 16 | -------------------------------------------------------------------------------- /src/utils/index.js: -------------------------------------------------------------------------------- 1 | export const merge = (target, ...sources) => { 2 | for (let i = 0, j = sources.length; i < j; i++) { 3 | const source = sources[i] || {} 4 | for (const prop in source) { 5 | if (source.hasOwnProperty(prop)) { 6 | const value = source[prop] 7 | if (value !== undefined) { 8 | target[prop] = value 9 | } 10 | } 11 | } 12 | } 13 | return target 14 | } 15 | -------------------------------------------------------------------------------- /src/default-options.js: -------------------------------------------------------------------------------- 1 | export default { 2 | // boolean, 添加空格 3 | spacing: true, 4 | 5 | // boolean, 全角符号与字母数字之间允许有空格 6 | spaceBetweenFullwidthPunctuationAndAlphabets: false, 7 | 8 | // boolean, 允许连续感叹号 9 | successiveExclamationMarks: false, 10 | 11 | // string: 'none' | '3dots' | 'all',允许的省略号 12 | ellipsisTolerance: 'none', 13 | 14 | // string: 'double' | 'single' | 'none',使用直角引号替换的弯引号 15 | replaceWithCornerQuotes: 'double', 16 | 17 | // boolean, 数字周围使用半角括号 18 | halfwidthParenthesisAroundNumbers: true 19 | } 20 | -------------------------------------------------------------------------------- /index.d.ts: -------------------------------------------------------------------------------- 1 | export as namespace dj; 2 | 3 | export = DoctorJones; 4 | 5 | declare function DoctorJones(input: string, options?: DoctorJones.DoctorJonesFormatOptions): string; 6 | 7 | declare namespace DoctorJones { 8 | export type DoctorJonesFormatOptions = { 9 | spacing?: boolean, 10 | spaceBetweenFullwidthPunctuationAndAlphabets?: boolean, 11 | successiveExclamationMarks?: boolean, 12 | ellipsisTolerance?: 'none' | '3dots' | 'all', 13 | replaceWithCornerQuotes?: 'double' | 'single' | 'none', 14 | halfwidthParenthesisAroundNumbers?: boolean 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /scripts/release.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | if [[ -z $1 ]]; then 4 | echo "Enter new version: " 5 | read VERSION 6 | else 7 | VERSION=$1 8 | fi 9 | 10 | read -p "Releasing $VERSION - are you sure? (y/n) " -n 1 -r 11 | echo 12 | if [[ $REPLY =~ ^[Yy]$ ]]; then 13 | echo "Releasing $VERSION ..." 14 | 15 | npm test 16 | 17 | # build 18 | VERSION=$VERSION npm run build 19 | 20 | # version 21 | npm --no-git-tag-version version $VERSION --message "[release] $VERSION" 22 | 23 | # changelog 24 | npx conventional-changelog -p angular -i CHANGELOG.md -s 25 | 26 | git add . 27 | git commit -m "chore: update changelog for $VERSION" 28 | 29 | # publish 30 | git tag $VERSION 31 | git push origin refs/tags/$VERSION 32 | git push origin master 33 | npm publish 34 | fi 35 | -------------------------------------------------------------------------------- /docs/cli/README.md: -------------------------------------------------------------------------------- 1 | ## 安装 2 | 3 | 全局安装 doctor-jones,即可在终端中使用: 4 | 5 | ```bash 6 | npm i doctor-jones -g 7 | ``` 8 | 9 | ## 使用 10 | 11 | ```shell 12 | $ doctor-jones --help 13 | 14 | Format Options 15 | --spacing, -s 是否在中文和字母数字之间添加空格 [boolean] 16 | --space-between, --sb 是否允许在全角符号与字母数字之间存在空格 [boolean] 17 | --exclamation, --ex 是否允许连续的感叹号 [boolean] 18 | --ellipsis, --el 省略号规范化规则 [choices: "none", "3dots", "all"] 19 | --quote, -q 弯引号替换规则 [choices: "none", "double", "single"] 20 | --parenthesis, -p 是否在数字周围使用半角括号 [boolean] 21 | 22 | Options: 23 | --help, -h 帮助信息 [boolean] 24 | --version, -v 当前版本 [boolean] 25 | 26 | Examples: 27 | doctor-jones 需要格式化的字符串 28 | doctor-jones -s false 需要格式化的字符串 29 | doctor-jones -s false --el 3dots 需要格式化的字符串 30 | ``` 31 | -------------------------------------------------------------------------------- /src/consts.js: -------------------------------------------------------------------------------- 1 | export const tokenTypes = { 2 | ALPHABETS_AND_NUMBERS: 'ALPHABETS_AND_NUMBERS', 3 | CJK: 'CJK', 4 | ELLIPSIS: 'ELLIPSIS', 5 | DOTS_AS_ELLIPSIS: 'DOTS_AS_ELLIPSIS', 6 | TWO_DOTS: 'TWO_DOTS', 7 | FULLWIDTH_LEFT_DOUBLE_QUOTE: 'FULLWIDTH_LEFT_DOUBLE_QUOTE', 8 | FULLWIDTH_LEFT_PAREN: 'FULLWIDTH_LEFT_PAREN', 9 | FULLWIDTH_LEFT_SINGLE_QUOTE: 'FULLWIDTH_LEFT_SINGLE_QUOTE', 10 | FULLWIDTH_PUNCTUATION: 'FULLWIDTH_PUNCTUATION', 11 | FULLWIDTH_RIGHT_DOUBLE_QUOTE: 'FULLWIDTH_RIGHT_DOUBLE_QUOTE', 12 | FULLWIDTH_RIGHT_PAREN: 'FULLWIDTH_RIGHT_PAREN', 13 | FULLWIDTH_RIGHT_SINGLE_QUOTE: 'FULLWIDTH_RIGHT_SINGLE_QUOTE', 14 | INVALID_ELLIPSIS: 'INVALID_ELLIPSIS', 15 | MISC: 'MISC', 16 | NUMBERS: 'NUMBERS', 17 | SUCCESSIVE_FULLWIDTH_EXCLAMATION: 'SUCCESSIVE_FULLWIDTH_EXCLAMATION', 18 | WHITESPACE: 'WHITESPACE', 19 | HALFWIDTH_CHAR: 'HALFWIDTH_CHAR' 20 | } 21 | 22 | export const operationTypes = { 23 | ADD: 'ADD', 24 | REMOVE: 'REMOVE', 25 | REPLACE: 'REPLACE' 26 | } 27 | -------------------------------------------------------------------------------- /docs/doctor-jones/intro.md: -------------------------------------------------------------------------------- 1 | ## 工具集 2 | 3 | `doctor-jones` 是一个用来格式化中文字符串(包括中英文混排)的工具集,它基于 [w3c/clreq](https://github.com/w3c/clreq) 及其他中文排版最佳实践。作为一个工具集,`doctor-jones` 包括: 4 | 5 | * 一个 [npm 包](https://www.npmjs.com/package/doctor-jones),用于接收字符串,并返回格式化后的字符串 6 | * 一个 [webpack loader](https://github.com/Leopoldthecoder/doctor-jones-loader),用于格式化你的 `.js`, `.ts`, `.jsx`, `.tsx`, `.vue` 文件中的字符串 7 | * 一个 [Chrome 插件](https://github.com/Leopoldthecoder/doctor-jones-extension),用于格式化当前正在浏览的网页 8 | * 一个即开即用的网站,粘贴一段文本,得到格式化后的输出,即 `doctor-jones-as-a-service`,简称 `DJaaS`(计划中) 9 | 10 | ## 功能 11 | 12 | 使用 `doctor-jones` 可以做到以下格式化: 13 | 14 | * 中英文、中文与数字之间增加半角空格 15 | * 移除全角标点和英文/数字之间多余的半角空格 16 | * 移除多于一个的连续感叹号 17 | * 将不规范的省略号(如`。。。`、`、、`等)规范化为 `……` 18 | * 将引号 `“”` `‘’` 替换为 `「」` `『』` 19 | * 将纯数字周围的全角括号替换为半角括号 20 | 21 | ::: warning 22 | 需要注意的是,这些格式化只对至少包含一个中文字符或全角符号的字符串生效。 23 | ::: 24 | 25 | ## 示例 26 | 27 | 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 杨奕 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | meta: 3 | - name: keywords 4 | content: doctor jones,composition of mixed text,中英文混排,格式化,排版,自动,空格 5 | home: true 6 | heroImage: /logo.png 7 | heroText: Doctor Jones 8 | tagline: 中文排版格式化工具 9 | actionText: 阅读文档 10 | actionLink: /doctor-jones/intro/ 11 | features: 12 | - title: 格式化 13 | details: 修正不规范的中文符号,同时支持中英文混排时不同字符间的格式化 14 | - title: 可配置 15 | details: 通过配置选项来定制你的格式化策略,使输出结果符合你的排版风格 16 | - title: 工具集 17 | details: 除 npm 包以外,还包括 CLI、webpack loader、Chrome 扩展和一个即开即用的网站,满足不同场景的需要 18 | footer: MIT Licensed | Made with ❤ 19 | --- 20 | 21 | ```js 22 | import dj from 'doctor-jones' 23 | 24 | // 在中英文、中文与数字之间增加半角空格 25 | dj('当蚀魂击中已经受其影响的敌人时,立即对敌人造成相当于120秒蚀魂的伤害') 26 | // 当蚀魂击中已经受其影响的敌人时,立即对敌人造成相当于 120 秒蚀魂的伤害 27 | 28 | // 移除全角标点和英文/数字之间多余的半角空格 29 | dj('对圣光的追寻将是她一生中最大的考验, Anajinn 热切地拥抱了自己命运的安排') 30 | // 对圣光的追寻将是她一生中最大的考验,Anajinn 热切地拥抱了自己命运的安排 31 | 32 | // 移除多于一个的连续感叹号 33 | dj('新鲜的肉!!!') 34 | // 新鲜的肉! 35 | 36 | // 将不规范的省略号规范化为 `……` 37 | dj('恐惧之地流传着关于猎魔人库奈的传说,据说此人能与暗影合而为一、神鬼莫测。。。') 38 | // 恐惧之地流传着关于猎魔人库奈的传说,据说此人能与暗影合而为一、神鬼莫测…… 39 | 40 | // 将弯引号替换为直角引号 41 | dj('维尔声称,“御法者”是一种介于现实世界不同位面的存在') 42 | // 维尔声称,「御法者」是一种介于现实世界不同位面的存在 43 | 44 | // 将纯数字周围的全角括号替换为半角括号 45 | dj('镶孔(1)') 46 | // 镶孔(1) 47 | ``` 48 | -------------------------------------------------------------------------------- /rollup.config.js: -------------------------------------------------------------------------------- 1 | import resolve from 'rollup-plugin-node-resolve' 2 | import commonjs from 'rollup-plugin-commonjs' 3 | import buble from 'rollup-plugin-buble' 4 | import { eslint } from 'rollup-plugin-eslint' 5 | import { uglify } from 'rollup-plugin-uglify' 6 | 7 | const isProd = process.env.NODE_ENV === 'prod' 8 | const getConfig = (format, min = false) => { 9 | const output = Object.assign( 10 | isProd 11 | ? { 12 | file: `dist/index.${format}.${min ? 'min.' : ''}js`, 13 | format 14 | } 15 | : { 16 | file: 'build/index.js', 17 | format: 'cjs' 18 | }, 19 | format === 'umd' ? { name: 'dj' } : {} 20 | ) 21 | return { 22 | input: 'src/index.js', 23 | output, 24 | plugins: [ 25 | eslint(), 26 | buble(), 27 | resolve(), 28 | commonjs({ 29 | include: 'node_modules/**', 30 | sourceMap: false 31 | }), 32 | min ? uglify() : null 33 | ], 34 | watch: { 35 | clearScreen: false 36 | } 37 | } 38 | } 39 | 40 | export default (isProd 41 | ? [ 42 | getConfig('cjs', false), 43 | getConfig('cjs', true), 44 | getConfig('umd', false), 45 | getConfig('umd', true) 46 | ] 47 | : getConfig()) 48 | -------------------------------------------------------------------------------- /docs/.vuepress/config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | base: '/doctor-jones/', 3 | title: 'Doctor Jones', 4 | head: [ 5 | [ 6 | 'link', 7 | { rel: 'shortcut icon', type: 'image/x-icon', href: './favicon.ico' } 8 | ] 9 | ], 10 | description: '中文排版格式化工具', 11 | themeConfig: { 12 | displayAllHeaders: true, 13 | nav: [ 14 | { text: '首页', link: '/' }, 15 | { text: 'Doctor Jones', link: '/doctor-jones/intro/' }, 16 | { text: 'CLI', link: '/cli/' }, 17 | { text: 'Webpack Loader', link: '/doctor-jones-loader/' }, 18 | { text: 'Chrome 扩展', link: '/doctor-jones-extension/' }, 19 | { 20 | text: 'GitHub', 21 | link: 22 | 'https://github.com/Leopoldthecoder?utf8=%E2%9C%93&tab=repositories&q=doctor-jones' 23 | } 24 | ], 25 | sidebar: { 26 | '/doctor-jones/': [ 27 | ['intro', '介绍'], 28 | ['installation', '安装'], 29 | ['usage', '使用'] 30 | ], 31 | '/cli/': [['', 'CLI']], 32 | '/doctor-jones-loader/': [['', 'webpack loader']], 33 | '/doctor-jones-extension/': [['', 'Chrome 扩展']] 34 | } 35 | }, 36 | plugins: [ 37 | [ 38 | '@vuepress/google-analytics', 39 | { 40 | ga: 'UA-142298882-1' 41 | } 42 | ] 43 | ] 44 | } 45 | -------------------------------------------------------------------------------- /docs/doctor-jones/usage.md: -------------------------------------------------------------------------------- 1 | ## 引入 2 | 3 | 根据安装方式的不同,需要以不同方式引入 doctor-jones 4 | 5 | ### 通过 CDN 安装 6 | 7 | ```js 8 | const dj = window.dj 9 | ``` 10 | 11 | ### 通过 npm 安装 12 | 13 | ```js 14 | import dj from 'doctor-jones' 15 | ``` 16 | 17 | ## 调用 18 | 19 | ```js 20 | dj('doctor-jones是一个“治疗”中英文混排格式的工具') 21 | // 返回 'doctor-jones 是一个「治疗」中英文混排格式的工具' 22 | ``` 23 | 24 | ## 格式化选项 25 | 26 | `dj` 支持传入第二个参数,作为格式化选项: 27 | 28 | ```js{9,13,17,24,31,35} 29 | dj( 30 | // 待格式化的字符串 31 | 'doctor-jones是一个“治疗”中英文混排格式的工具', 32 | 33 | // 格式化选项 34 | { 35 | // 是否在中文和字母数字之间添加空格 36 | // 可选值:boolean 37 | spacing: true, 38 | 39 | // 是否允许在全角符号与字母数字之间存在空格 40 | // 可选值:boolean 41 | spaceBetweenFullwidthPunctuationAndAlphabets: false, 42 | 43 | // 是否允许连续的感叹号 44 | // 可选值:boolean 45 | successiveExclamationMarks: false, 46 | 47 | // 省略号规范化规则 48 | // 可选值:'none' | '3dots' | 'all' 49 | // 'none':将连续出现两次或以上的 。、,. 规范化为 …… 50 | // '3dots':除 ... 以外,将连续出现两次或以上的 。、,. 规范化为 …… 51 | // 'all':不进行省略号规范化 52 | ellipsisTolerance: 'none', 53 | 54 | // 弯引号替换规则 55 | // 可选值:'double' | 'single' | 'none' 56 | // 'double':使用直角引号「」替换弯引号“”,同时使用直角引号『』替换弯引号‘’ 57 | // 'single':使用直角引号「」替换弯引号‘’,同时使用直角引号『』替换弯引号“” 58 | // 'none':不替换弯引号 59 | replaceWithCornerQuotes: 'double', 60 | 61 | // 是否在数字周围使用半角括号 62 | // 可选值:boolean 63 | halfwidthParenthesisAroundNumbers: true 64 | } 65 | ) 66 | ``` 67 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "doctor-jones", 3 | "version": "1.0.2", 4 | "description": "After all, we should all respect w3c/clreq", 5 | "main": "dist/index.cjs.js", 6 | "typings": "index.d.ts", 7 | "scripts": { 8 | "dev": "cross-env NODE_ENV=dev rollup --watch -c", 9 | "build": "rimraf dist && cross-env NODE_ENV=prod rollup -c", 10 | "test": "nyc ava -v && nyc report --reporter=html", 11 | "docs:dev": "vuepress dev docs", 12 | "docs:build": "vuepress build docs", 13 | "docs:pub": "sh scripts/doc.sh", 14 | "pub": "sh scripts/release.sh" 15 | }, 16 | "ava": { 17 | "require": [ 18 | "esm" 19 | ] 20 | }, 21 | "bin": { 22 | "doctor-jones": "bin/dj" 23 | }, 24 | "repository": { 25 | "type": "git", 26 | "url": "git+https://github.com/Leopoldthecoder/doctor-jones.git" 27 | }, 28 | "author": "Leopoldthecoder", 29 | "license": "MIT", 30 | "bugs": { 31 | "url": "https://github.com/Leopoldthecoder/doctor-jones/issues" 32 | }, 33 | "homepage": "https://github.com/Leopoldthecoder/doctor-jones#readme", 34 | "dependencies": { 35 | "update-notifier": "^6.0.2", 36 | "yargonaut": "^1.1.4", 37 | "yargs": "^13.2.4" 38 | }, 39 | "devDependencies": { 40 | "@vuepress/plugin-google-analytics": "^1.0.1", 41 | "ava": "^5.1.0", 42 | "chinese-random-name": "^1.0.0", 43 | "conventional-changelog-cli": "^2.0.11", 44 | "coveralls": "^3.0.4", 45 | "cross-env": "^5.2.0", 46 | "eslint-config-doran": "^1.0.8", 47 | "esm": "^3.0.84", 48 | "nyc": "^14.1.1", 49 | "prettier": "^1.15.3", 50 | "rimraf": "^2.6.3", 51 | "rollup": "^1.0.1", 52 | "rollup-plugin-buble": "^0.19.6", 53 | "rollup-plugin-commonjs": "^9.2.0", 54 | "rollup-plugin-eslint": "^5.0.0", 55 | "rollup-plugin-node-resolve": "^4.0.0", 56 | "rollup-plugin-uglify": "^6.0.0", 57 | "vuepress": "^1.0.1" 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/syntax-parser/lexer.js: -------------------------------------------------------------------------------- 1 | /* 2 | * Modified from https://github.com/ascoders/syntax-parser 3 | * Author: @ascoders 4 | * */ 5 | 6 | class Tokenizer { 7 | constructor(lexerConfig) { 8 | this.lexerConfig = lexerConfig 9 | } 10 | 11 | tokenize(input) { 12 | const tokens = [] 13 | let token 14 | let lastPosition = 0 15 | 16 | // Keep processing the string until it is empty 17 | while (input.length) { 18 | // Get the next token and the token type 19 | const result = this.getNextToken(input) 20 | if (!result || !result.token) { 21 | throw Error(`Lexer: Unexpected string "${input}".`) 22 | } 23 | token = result.token 24 | if (!token.value) { 25 | throw Error(`Lexer: Regex parse error, please check your lexer config.`) 26 | } 27 | token.position = [lastPosition, lastPosition + token.value.length - 1] 28 | lastPosition += token.value.length 29 | // Advance the string 30 | input = input.substring(token.value.length) 31 | if (!result.config.ignore) { 32 | tokens.push(token) 33 | } 34 | } 35 | return tokens 36 | } 37 | 38 | getNextToken(input) { 39 | let result = null 40 | this.lexerConfig.forEach(eachLexer => { 41 | if (result) return 42 | eachLexer.regexes.forEach(regex => { 43 | if (result) return 44 | const token = this.getTokenOnFirstMatch({ 45 | input, 46 | type: eachLexer.type, 47 | regex 48 | }) 49 | if (token) { 50 | result = { 51 | token, 52 | config: eachLexer 53 | } 54 | } 55 | }) 56 | }) 57 | return result 58 | } 59 | 60 | getTokenOnFirstMatch({ input, type, regex }) { 61 | const matches = input.match(regex) 62 | if (matches) { 63 | return { type, value: matches[1] } 64 | } 65 | } 66 | } 67 | 68 | export const createLexer = lexerConfig => text => 69 | new Tokenizer(lexerConfig).tokenize(text) 70 | -------------------------------------------------------------------------------- /src/syntax-parser/index.js: -------------------------------------------------------------------------------- 1 | import { createLexer } from './lexer' 2 | import { tokenTypes } from '../consts' 3 | 4 | export default createLexer([ 5 | { 6 | type: tokenTypes.WHITESPACE, 7 | regexes: [/^(\s+)/] 8 | }, 9 | { 10 | type: tokenTypes.CJK, 11 | regexes: [ 12 | /^([\u2e80-\u2eff\u2f00-\u2fdf\u3040-\u309f\u30a0-\u30ff\u3100-\u312f\u3200-\u32ff\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff]+)/ 13 | ] 14 | }, 15 | { 16 | type: tokenTypes.FULLWIDTH_LEFT_PAREN, 17 | regexes: [/^(()/] 18 | }, 19 | { 20 | type: tokenTypes.FULLWIDTH_RIGHT_PAREN, 21 | regexes: [/^())/] 22 | }, 23 | { 24 | type: tokenTypes.SUCCESSIVE_FULLWIDTH_EXCLAMATION, 25 | regexes: [/^(!{2,})/] 26 | }, 27 | { 28 | type: tokenTypes.INVALID_ELLIPSIS, 29 | regexes: [/^([。,、]{2,}|[.]{4,}|[…]{3,})/] 30 | }, 31 | { 32 | type: tokenTypes.ELLIPSIS, 33 | regexes: [/^(…{1,2})/] 34 | }, 35 | { 36 | type: tokenTypes.DOTS_AS_ELLIPSIS, 37 | regexes: [/^([.]{3})/] 38 | }, 39 | { 40 | type: tokenTypes.TWO_DOTS, 41 | regexes: [/^([.]{2})/] 42 | }, 43 | { 44 | type: tokenTypes.FULLWIDTH_LEFT_SINGLE_QUOTE, 45 | regexes: [/^(‘)/] 46 | }, 47 | { 48 | type: tokenTypes.FULLWIDTH_RIGHT_SINGLE_QUOTE, 49 | regexes: [/^(’)/] 50 | }, 51 | { 52 | type: tokenTypes.FULLWIDTH_LEFT_DOUBLE_QUOTE, 53 | regexes: [/^(“)/] 54 | }, 55 | { 56 | type: tokenTypes.FULLWIDTH_RIGHT_DOUBLE_QUOTE, 57 | regexes: [/^(”)/] 58 | }, 59 | { 60 | type: tokenTypes.FULLWIDTH_PUNCTUATION, 61 | regexes: [ 62 | /^([,。:;!?()、〈〉《》“”‘’「」『』〔〕【】〖〗⦗⦘〘〙●~]+?)/ 63 | ] 64 | }, 65 | { 66 | type: tokenTypes.NUMBERS, 67 | regexes: [/^([0-9]+(\.[0-9]+)?)/] 68 | }, 69 | { 70 | type: tokenTypes.ALPHABETS_AND_NUMBERS, 71 | regexes: [/^([a-zA-Z0-9]+)/] 72 | }, 73 | { 74 | type: tokenTypes.HALFWIDTH_CHAR, 75 | regexes: [/^([\u0020-\u00ff]+?)/] 76 | }, 77 | { 78 | type: tokenTypes.MISC, 79 | regexes: [/^(.+)/] 80 | } 81 | ]) 82 | -------------------------------------------------------------------------------- /bin/dj: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | const dj = require('../dist/index.cjs') 4 | const yargonaut = require('yargonaut') 5 | const chalk = yargonaut.chalk() 6 | const updateNotifier = require('update-notifier') 7 | const pkg = require('../package.json') 8 | 9 | updateNotifier({ pkg }).notify() 10 | 11 | const optionMap = { 12 | s: 'spacing', 13 | sb: 'spaceBetweenFullwidthPunctuationAndAlphabets', 14 | ex: 'successiveExclamationMarks', 15 | el: 'ellipsisTolerance', 16 | q: 'replaceWithCornerQuotes', 17 | p: 'halfwidthParenthesisAroundNumbers' 18 | } 19 | 20 | yargonaut.helpStyle('green.underline').errorsStyle('red.bold') 21 | 22 | const argv = require('yargs') 23 | .example('', chalk.cyan('doctor-jones 需要格式化的字符串')) 24 | .example('', chalk.cyan('doctor-jones -s false 需要格式化的字符串')) 25 | .example( 26 | '', 27 | chalk.cyan('doctor-jones -s false --el 3dots 需要格式化的字符串') 28 | ) 29 | .group( 30 | ['s', 'sb', 'ex', 'el', 'q', 'p'], 31 | chalk.green.underline('Format Options') 32 | ) 33 | .option('spacing', { 34 | alias: 's', 35 | type: 'boolean', 36 | desc: chalk.cyan('是否在中文和字母数字之间添加空格') 37 | }) 38 | .option('space-between', { 39 | alias: 'sb', 40 | type: 'boolean', 41 | desc: chalk.cyan('是否允许在全角符号与字母数字之间存在空格') 42 | }) 43 | .option('exclamation', { 44 | alias: 'ex', 45 | type: 'boolean', 46 | desc: chalk.cyan('是否允许连续的感叹号') 47 | }) 48 | .option('ellipsis', { 49 | alias: 'el', 50 | choices: ['none', '3dots', 'all'], 51 | requiresArg: true, 52 | desc: chalk.cyan('省略号规范化规则') 53 | }) 54 | .option('quote', { 55 | alias: 'q', 56 | choices: ['none', 'double', 'single'], 57 | requiresArg: true, 58 | desc: chalk.cyan('弯引号替换规则') 59 | }) 60 | .option('parenthesis', { 61 | alias: 'p', 62 | type: 'boolean', 63 | desc: chalk.cyan('是否在数字周围使用半角括号') 64 | }) 65 | .option('help', { 66 | alias: 'h', 67 | type: 'boolean', 68 | desc: chalk.cyan('帮助信息') 69 | }) 70 | .option('version', { 71 | alias: 'v', 72 | type: 'boolean', 73 | desc: chalk.cyan('当前版本') 74 | }).argv 75 | 76 | const options = {} 77 | Object.keys(optionMap).forEach(key => { 78 | if (argv[key] !== undefined) { 79 | options[optionMap[key]] = argv[key] 80 | } 81 | }) 82 | 83 | argv._.forEach(input => { 84 | console.log(dj(String(input), options)) 85 | }) 86 | -------------------------------------------------------------------------------- /docs/doctor-jones-loader/README.md: -------------------------------------------------------------------------------- 1 | ## 介绍 2 | 3 | doctor-jones-loader 是一个 webpack loader,用于将你的源代码中的字符串格式化并输出。它支持: 4 | - 字符串字面量 5 | - 模板字符串 6 | - JSX 模板 7 | - .vue 单文件组件中的模板 8 | - .vue 单文件组件中的脚本 9 | 10 | ::: tip 11 | doctor-jones-loader 同样支持 `.ts` 和 `.tsx` 文件 12 | ::: 13 | 14 | ## 安装 15 | 16 | ```bash 17 | npm i doctor-jones-loader -D 18 | ``` 19 | 20 | ## 配置 21 | 22 | 最简单的用法: 23 | ```js 24 | // webpack.config.js 25 | module.exports = { 26 | //... 27 | module: { 28 | rules: [ 29 | { 30 | test: /\.js$/, 31 | use: [ 32 | { 33 | loader: 'doctor-jones-loader' 34 | } 35 | ] 36 | } 37 | ] 38 | } 39 | } 40 | ``` 41 | 42 | 若需自定义格式化选项,可以传入 `formatOptions`(具体的格式化选项请参考[这里](../doctor-jones/usage.html#格式化选项)): 43 | ```js 44 | { 45 | // ... 46 | use: [ 47 | { 48 | loader: 'doctor-jones-loader', 49 | options: { 50 | formatOptions: { 51 | spacing: false 52 | } 53 | } 54 | } 55 | ] 56 | } 57 | ``` 58 | 59 | 支持 JSX(以 React 为例): 60 | ```js 61 | // webpack.config.js 62 | module.exports = { 63 | //... 64 | module: { 65 | rules: [ 66 | { 67 | test: /\.jsx$/, 68 | use: [ 69 | { 70 | loader: 'doctor-jones-loader', 71 | options: { formatOptions: {/* ... */} } 72 | }, 73 | { 74 | loader: 'babel-loader', 75 | options: { 76 | presets: ['@babel/preset-react'] 77 | } 78 | } 79 | ] 80 | } 81 | ] 82 | } 83 | } 84 | ``` 85 | 86 | ::: warning 87 | 由于 loader 链是逆序调用的,所以需要将 doctor-jones-loader 写在 babel-loader 之前。 88 | ::: 89 | 90 | 支持 Vue: 91 | ```js 92 | // webpack.config.js 93 | const VueLoaderPlugin = require('vue-loader/lib/plugin') 94 | module.exports = { 95 | //... 96 | module: { 97 | rules: [ 98 | { 99 | test: /\.vue$/, 100 | use: [ 101 | { 102 | loader: 'doctor-jones-loader', 103 | options: { formatOptions: {/* ... */} } 104 | }, 105 | { 106 | loader: 'vue-loader' 107 | } 108 | ] 109 | } 110 | ] 111 | }, 112 | plugins: [ 113 | new VueLoaderPlugin() 114 | ] 115 | } 116 | ``` 117 | 118 | 支持 TypeScript: 119 | ```js 120 | // webpack.config.js 121 | module.exports = { 122 | //... 123 | module: { 124 | rules: [ 125 | { 126 | test: /\.ts$/, 127 | use: [ 128 | { 129 | loader: 'doctor-jones-loader', 130 | options: { formatOptions: {/* ... */} } 131 | }, 132 | { 133 | loader: 'ts-loader' 134 | } 135 | ] 136 | } 137 | ] 138 | } 139 | } 140 | ``` 141 | 142 | ## 禁用格式化 143 | 若不希望格式化代码中的某一行,可以通过添加注释的方式来禁用格式化: 144 | ```js 145 | const str1 = 'doctor-jones是一个“治疗”中英文混排格式的工具' 146 | // doctor-jones-disabled-line 147 | const str2 = 'doctor-jones是一个“治疗”中英文混排格式的工具' 148 | 149 | // str1 会被格式化,而 str2 会被忽略 150 | ``` 151 | -------------------------------------------------------------------------------- /test/index.js: -------------------------------------------------------------------------------- 1 | import test from 'ava' 2 | import random from 'chinese-random-name' 3 | import dj from '../src' 4 | 5 | const getRandomCharacters = random.generate 6 | 7 | const dumbOptions = { 8 | spacing: false, 9 | spaceBetweenFullwidthPunctuationAndAlphabets: true, 10 | successiveExclamationMarks: true, 11 | ellipsisTolerance: 'all', 12 | replaceWithCornerQuotes: 'none', 13 | halfwidthParenthesisAroundNumbers: false 14 | } 15 | 16 | const getOptions = option => Object.assign({}, dumbOptions, option) 17 | 18 | test('do nothing', t => { 19 | const chinese = getRandomCharacters(3) 20 | const input = `${chinese}readabitily...` 21 | const output = dj(input, dumbOptions) 22 | t.is(output, input) 23 | }) 24 | 25 | test('spacing', t => { 26 | const chinese = [ 27 | getRandomCharacters(3), 28 | getRandomCharacters(3), 29 | getRandomCharacters(3) 30 | ] 31 | const input = `${chinese[0]}abcd${chinese[1]}42${chinese[2]}xyz` 32 | const output = dj(input, getOptions({ spacing: true })) 33 | t.is(output, `${chinese[0]} abcd ${chinese[1]} 42 ${chinese[2]} xyz`) 34 | }) 35 | 36 | test('remove space between fullwidth punctuation and alphabets/numbers', t => { 37 | const chinese = [getRandomCharacters(3), getRandomCharacters(3)] 38 | const input = `${chinese[0]}, 1 ${chinese[1]} xyz 。` 39 | const output = dj( 40 | input, 41 | getOptions({ spaceBetweenFullwidthPunctuationAndAlphabets: false }) 42 | ) 43 | t.is(output, `${chinese[0]},1 ${chinese[1]} xyz。`) 44 | }) 45 | 46 | test('remove successive exclamation marks', t => { 47 | const chinese = getRandomCharacters(3) 48 | const input = `${chinese}!!!` 49 | const output = dj(input, getOptions({ successiveExclamationMarks: false })) 50 | t.is(output, `${chinese}!`) 51 | }) 52 | 53 | test('normalize ellipsis', t => { 54 | const chinese = [ 55 | getRandomCharacters(3), 56 | getRandomCharacters(3), 57 | getRandomCharacters(3), 58 | getRandomCharacters(3) 59 | ] 60 | const input = `${chinese[0]},,,${chinese[1]}、、${chinese[2]}...${ 61 | chinese[3] 62 | }。。。。` 63 | const noneToleranceOutput = dj( 64 | input, 65 | getOptions({ ellipsisTolerance: 'none' }) 66 | ) 67 | const dotsToleratedOutput = dj( 68 | input, 69 | getOptions({ ellipsisTolerance: '3dots' }) 70 | ) 71 | t.is( 72 | noneToleranceOutput, 73 | `${chinese[0]}……${chinese[1]}……${chinese[2]}……${chinese[3]}……` 74 | ) 75 | t.is( 76 | dotsToleratedOutput, 77 | `${chinese[0]}……${chinese[1]}……${chinese[2]}...${chinese[3]}……` 78 | ) 79 | }) 80 | 81 | test('replace quotation marks', t => { 82 | const chinese = [ 83 | getRandomCharacters(3), 84 | getRandomCharacters(3), 85 | getRandomCharacters(3) 86 | ] 87 | const input = `${chinese[0]}:“${chinese[1]}‘${chinese[2]}’?”` 88 | const doubleQuoteOutput = dj( 89 | input, 90 | getOptions({ replaceWithCornerQuotes: 'double' }) 91 | ) 92 | const singleQuoteOutput = dj( 93 | input, 94 | getOptions({ replaceWithCornerQuotes: 'single' }) 95 | ) 96 | t.is(doubleQuoteOutput, `${chinese[0]}:「${chinese[1]}『${chinese[2]}』?」`) 97 | t.is(singleQuoteOutput, `${chinese[0]}:『${chinese[1]}「${chinese[2]}」?』`) 98 | }) 99 | 100 | test('halfwidth parenthesis around numbers', t => { 101 | const chinese = getRandomCharacters(3) 102 | const input = `${chinese}(2019)` 103 | const output = dj( 104 | input, 105 | getOptions({ halfwidthParenthesisAroundNumbers: true }) 106 | ) 107 | t.is(output, `${chinese}(2019)`) 108 | }) 109 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## [1.0.2](https://github.com/Leopoldthecoder/doctor-jones/compare/1.0.1...1.0.2) (2019-08-21) 2 | 3 | 4 | 5 | ## [1.0.1](https://github.com/Leopoldthecoder/doctor-jones/compare/0.3.2...1.0.1) (2019-08-21) 6 | 7 | 8 | ### Bug Fixes 9 | 10 | * declare ellipsis as token ([f90975f](https://github.com/Leopoldthecoder/doctor-jones/commit/f90975f)) 11 | * remove ellipsis from full width rule ([80d9d5f](https://github.com/Leopoldthecoder/doctor-jones/commit/80d9d5f)) 12 | 13 | 14 | 15 | # [1.0.0](https://github.com/Leopoldthecoder/doctor-jones/compare/0.3.2...1.0.0) (2019-08-17) 16 | 17 | 18 | ### Bug Fixes 19 | 20 | * remove ellipsis from full width rule ([f9db2f1](https://github.com/Leopoldthecoder/doctor-jones/commit/f9db2f1)) 21 | 22 | 23 | 24 | ## [0.3.2](https://github.com/Leopoldthecoder/doctor-jones/compare/0.3.0...0.3.2) (2019-07-11) 25 | 26 | 27 | ### Bug Fixes 28 | 29 | * ellipsis format for 4 or more dots ([1c47946](https://github.com/Leopoldthecoder/doctor-jones/commit/1c47946)) 30 | * remove hyphen from full width rule ([c61d0dd](https://github.com/Leopoldthecoder/doctor-jones/commit/c61d0dd)) 31 | 32 | 33 | 34 | ## [0.3.1](https://github.com/Leopoldthecoder/doctor-jones/compare/0.3.0...0.3.1) (2019-07-11) 35 | 36 | 37 | ### Bug Fixes 38 | 39 | * remove hyphen from full width rule ([0e84861](https://github.com/Leopoldthecoder/doctor-jones/commit/0e84861)) 40 | 41 | 42 | 43 | # [0.3.0](https://github.com/Leopoldthecoder/doctor-jones/compare/0.2.3...0.3.0) (2019-06-29) 44 | 45 | 46 | ### Features 47 | 48 | * add typings ([a12d473](https://github.com/Leopoldthecoder/doctor-jones/commit/a12d473)) 49 | 50 | 51 | 52 | ## [0.2.3](https://github.com/Leopoldthecoder/doctor-jones/compare/0.2.2...0.2.3) (2019-06-26) 53 | 54 | 55 | ### Bug Fixes 56 | 57 | * protect against falsy input ([9c3e95d](https://github.com/Leopoldthecoder/doctor-jones/commit/9c3e95d)) 58 | 59 | 60 | 61 | ## [0.2.2](https://github.com/Leopoldthecoder/doctor-jones/compare/0.2.1...0.2.2) (2019-06-17) 62 | 63 | 64 | ### Bug Fixes 65 | 66 | * wrong cli file name ([5d8d165](https://github.com/Leopoldthecoder/doctor-jones/commit/5d8d165)) 67 | 68 | 69 | 70 | ## [0.2.1](https://github.com/Leopoldthecoder/doctor-jones/compare/0.2.0...0.2.1) (2019-06-17) 71 | 72 | 73 | ### Bug Fixes 74 | 75 | * missing cli registration in package.json ([e96b029](https://github.com/Leopoldthecoder/doctor-jones/commit/e96b029)) 76 | 77 | 78 | 79 | # [0.2.0](https://github.com/Leopoldthecoder/doctor-jones/compare/0.1.2...0.2.0) (2019-06-17) 80 | 81 | 82 | ### Features 83 | 84 | * add cli ([5655aa0](https://github.com/Leopoldthecoder/doctor-jones/commit/5655aa0)) 85 | 86 | 87 | ### BREAKING CHANGES 88 | 89 | * three dots for ellipsisTolerance is renamed from '...' to '3dots' 90 | 91 | 92 | 93 | ## [0.1.2](https://github.com/Leopoldthecoder/doctor-jones/compare/0.1.1...0.1.2) (2019-06-16) 94 | 95 | 96 | ### Features 97 | 98 | * reduce npm package size ([788042e](https://github.com/Leopoldthecoder/doctor-jones/commit/788042e)) 99 | 100 | 101 | 102 | ## [0.1.1](https://github.com/Leopoldthecoder/doctor-jones/compare/0.1.0...0.1.1) (2019-06-14) 103 | 104 | 105 | ### Bug Fixes 106 | 107 | * fix a bug caused by half-width characters ([9d1c2a2](https://github.com/Leopoldthecoder/doctor-jones/commit/9d1c2a2)) 108 | 109 | 110 | 111 | # [0.1.0](https://github.com/Leopoldthecoder/doctor-jones/compare/0.0.3...0.1.0) (2019-05-12) 112 | 113 | 114 | ### Code Refactoring 115 | 116 | * use syntax parser ([7371e97](https://github.com/Leopoldthecoder/doctor-jones/commit/7371e97)) 117 | 118 | 119 | ### BREAKING CHANGES 120 | 121 | * replaceHalfwidthWithFullwidth is removed 122 | 123 | 124 | 125 | ## [0.0.3](https://github.com/Leopoldthecoder/doctor-jones/compare/0.0.2...0.0.3) (2019-01-19) 126 | 127 | 128 | ### Bug Fixes 129 | 130 | * remove unnecessary space around quotation marks ([d751fb7](https://github.com/Leopoldthecoder/doctor-jones/commit/d751fb7)) 131 | 132 | 133 | ### Features 134 | 135 | * add replaceHalfwidthWithFullwidth ([0323d6e](https://github.com/Leopoldthecoder/doctor-jones/commit/0323d6e)) 136 | * **loader:** init doctor-jones-loader ([9ff359c](https://github.com/Leopoldthecoder/doctor-jones/commit/9ff359c)) 137 | 138 | 139 | 140 | ## [0.0.2](https://github.com/Leopoldthecoder/doctor-jones/compare/0.0.1...0.0.2) (2019-01-09) 141 | 142 | 143 | ### Features 144 | 145 | * add halfwidthParenthesisAroundNumbers ([c1e94f8](https://github.com/Leopoldthecoder/doctor-jones/commit/c1e94f8)) 146 | * add replaceWithCornerQuotes ([b7a2cbd](https://github.com/Leopoldthecoder/doctor-jones/commit/b7a2cbd)) 147 | * add spaceBetweenFullwidthPunctuationAndAlphabets ([fc35358](https://github.com/Leopoldthecoder/doctor-jones/commit/fc35358)) 148 | 149 | 150 | 151 | ## [0.0.1](https://github.com/Leopoldthecoder/doctor-jones/compare/bbb190d...0.0.1) (2019-01-07) 152 | 153 | 154 | ### Features 155 | 156 | * add ellipsisTolerance ([d124250](https://github.com/Leopoldthecoder/doctor-jones/commit/d124250)) 157 | * add successiveExclamationMarks ([bbb190d](https://github.com/Leopoldthecoder/doctor-jones/commit/bbb190d)) 158 | 159 | 160 | 161 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Doctor Jones 2 | > After all, we should all respect [w3c/clreq](https://github.com/w3c/clreq) 3 | 4 |

5 | Codacy grade 6 | Coverage Status 7 | npm bundle size 8 | Liscense 9 |
10 |
11 |
12 |
13 | 14 |
15 |
16 |
17 |
18 | 链接 19 |
20 | 文档 21 | · 22 | English README 23 | · 24 | npm Page 25 |
26 |
27 | 相关项目 28 |
29 | doctor-jones-loader 30 | · 31 | doctor-jones-extension 32 | · 33 | More to be developed... 34 |

35 | 36 | ## 37 | 38 | ### 介绍 39 | 40 | `doctor-jones` 是一个用来格式化中文字符串(包括中英文混排)的工具集,它基于 [w3c/clreq](https://github.com/w3c/clreq) 及其他中文排版最佳实践。作为一个工具集,`doctor-jones` 包括: 41 | 42 | * [x] 一个 [`npm` 包](https://www.npmjs.com/package/doctor-jones),用于接收字符串,并返回格式化后的字符串 43 | * [x] 一个 [`webpack` loader](https://github.com/Leopoldthecoder/doctor-jones-loader),用于格式化你的 `.js`, `.ts`, `.jsx`, `.tsx`, `.vue` 文件中的字符串 44 | * [x] 一个 [Chrome 插件](https://github.com/Leopoldthecoder/doctor-jones-extension),用于格式化当前正在浏览的网页 45 | * [ ] 一个即开即用的网站,粘贴一段文本,得到格式化后的输出,即 `doctor-jones-as-a-service`,简称 `DJaaS`(计划中) 46 | 47 | 使用 `doctor-jones` 可以做到以下格式化: 48 | 49 | * 中英文、中文与数字之间增加半角空格 50 | * 移除全角标点和英文/数字之间多余的半角空格 51 | * 移除多于一个的连续感叹号 52 | * 将省略号规范化为 `……` 53 | * 将引号 `“”` `‘’` 替换为 `「」` `『』` 54 | * 将纯数字周围的全角括号替换为半角括号 55 | 56 | 需要注意的是,这些格式化只对至少包含一个中文字符或全角符号的字符串生效。同时,每个格式化选项都可以通过设置来关闭。 57 | 58 | ### 安装 59 | 60 | #### CDN 61 | ```html 62 | 63 | ``` 64 | 65 | #### npm 66 | ```bash 67 | npm i doctor-jones -S 68 | ``` 69 | 70 | ### 使用 71 | ```js 72 | // 使用 CDN 73 | const dj = window.dj 74 | 75 | // 使用 npm 76 | import dj from 'doctor-jones' 77 | 78 | dj('doctor-jones是一个“治疗”中英文混排格式的工具') // 返回 'doctor-jones 是一个「治疗」中英文混排格式的工具' 79 | ``` 80 | 81 | ### 格式化选项 82 | ```js 83 | dj( 84 | // 待格式化的字符串 85 | 'doctor-jones是一个“治疗”中英文混排格式的工具', 86 | 87 | // 格式化选项 88 | { 89 | // 是否在中文和字母数字之间添加空格 90 | // 可选值:boolean 91 | spacing: true, 92 | 93 | // 是否允许在全角符号与字母数字之间存在空格 94 | // 可选值:boolean 95 | spaceBetweenFullwidthPunctuationAndAlphabets: false, 96 | 97 | // 是否允许连续的感叹号 98 | // 可选值:boolean 99 | successiveExclamationMarks: false, 100 | 101 | // 省略号规范化规则 102 | // 可选值:'none' | '3dots' | 'all' 103 | // 'none':将连续出现两次或以上的 。、,. 规范化为 …… 104 | // '3dots':除 ... 以外,将连续出现两次或以上的 。、,. 规范化为 …… 105 | // 'all':不进行省略号规范化 106 | ellipsisTolerance: 'none', 107 | 108 | // 弯引号替换规则 109 | // 可选值:'double' | 'single' | 'none' 110 | // 'double':使用直角引号「」替换弯引号“”,同时使用直角引号『』替换弯引号‘’ 111 | // 'single':使用直角引号「」替换弯引号‘’,同时使用直角引号『』替换弯引号“” 112 | // 'none':不替换弯引号 113 | replaceWithCornerQuotes: 'double', 114 | 115 | // 是否在数字周围使用半角括号 116 | // 可选值:boolean 117 | halfwidthParenthesisAroundNumbers: true 118 | } 119 | ) 120 | ``` 121 | 122 | ### CLI 123 | 124 | 全局安装 doctor-jones,即可在终端中使用: 125 | 126 | ```bash 127 | npm i doctor-jones -g 128 | ``` 129 | 130 | ```shell 131 | $ doctor-jones --help 132 | 133 | Format Options 134 | --spacing, -s 是否在中文和字母数字之间添加空格 [boolean] 135 | --space-between, --sb 是否允许在全角符号与字母数字之间存在空格 [boolean] 136 | --exclamation, --ex 是否允许连续的感叹号 [boolean] 137 | --ellipsis, --el 省略号规范化规则 [choices: "none", "3dots", "all"] 138 | --quote, -q 弯引号替换规则 [choices: "none", "double", "single"] 139 | --parenthesis, -p 是否在数字周围使用半角括号 [boolean] 140 | 141 | Options: 142 | --help, -h 帮助信息 [boolean] 143 | --version, -v 当前版本 [boolean] 144 | 145 | Examples: 146 | doctor-jones 需要格式化的字符串 147 | doctor-jones -s false 需要格式化的字符串 148 | doctor-jones -s false --el 3dots 需要格式化的字符串 149 | ``` 150 | 151 | ### 感谢 152 | - [pangu.js](https://github.com/vinta/pangu.js) 153 | - [syntax-parser](https://github.com/ascoders/syntax-parser) 154 | - [ElemeFE Style Guide](https://github.com/ElemeFE/style-guide/blob/master/copywriter.md) 155 | -------------------------------------------------------------------------------- /README_EN.md: -------------------------------------------------------------------------------- 1 | # Doctor Jones 2 | > After all, we should all respect [w3c/clreq](https://github.com/w3c/clreq) 3 | 4 |

5 | Codacy grade 6 | Coverage Status 7 | npm bundle size 8 | Liscense 9 |
10 |
11 |
12 |
13 | 14 |
15 |
16 |
17 |
18 | Links 19 |
20 | 文档 21 | · 22 | 中文 README 23 | · 24 | npm Page 25 |
26 |
27 | Related Projects 28 |
29 | doctor-jones-loader 30 | · 31 | doctor-jones-extension 32 | · 33 | More to be developed... 34 |

35 | 36 | ## 37 | 38 | ### Introduction 39 | 40 | `doctor-jones` is a toolbox designed for formatting Chinese texts, especially when mixed with Western texts, based on [w3c/clreq](https://github.com/w3c/clreq) and other best practices. As a toolbox, `doctor-jones` includes: 41 | 42 | * [x] An [`npm` package](https://www.npmjs.com/package/doctor-jones) which takes a string and returns the formatted output 43 | * [x] A [`webpack` loader](https://github.com/Leopoldthecoder/doctor-jones-loader) which formats your strings in `.js`, `.ts`, `.jsx`, `.tsx`, `.vue` files 44 | * [x] A [Chrome extension](https://github.com/Leopoldthecoder/doctor-jones-extension) which formats the web page you're browsing 45 | * [ ] A ready-to-use website where you can paste some text and get the formatted output, aka. `doctor-jones-as-a-service`, or `DJaaS` (in plan) 46 | 47 | Here "formatting" means: 48 | 49 | * Adding a halfwidth space between a Chinese character and an alphabet / number 50 | * Removing unnecessary halfwidth space between a fullwidth punctuation and an alphabet / number 51 | * Removing successive exclamation marks 52 | * Normalizing ellipses to `……` 53 | * Replacing quotation marks `“”` `‘’` with `「」` `『』` 54 | * Replacing fullwidth brackets around numbers with halfwidth ones 55 | 56 | Note that these formats only take effects on texts with at least one Chinese character or one full-width mark. And all the above formats can be turned off using options. 57 | 58 | ### Installation 59 | 60 | #### CDN 61 | ```html 62 | 63 | ``` 64 | 65 | #### npm 66 | ```bash 67 | npm i doctor-jones -S 68 | ``` 69 | 70 | ### Usage 71 | ```js 72 | // Using CDN 73 | const dj = window.dj 74 | 75 | // Using npm 76 | import dj from 'doctor-jones' 77 | 78 | dj('doctor-jones是一个“治疗”中英文混排格式的工具') // returns 'doctor-jones 是一个「治疗」中英文混排格式的工具' 79 | ``` 80 | 81 | ### Format options 82 | ```js 83 | dj( 84 | // string to be formatted 85 | 'doctor-jones是一个“治疗”中英文混排格式的工具', 86 | 87 | // format options 88 | { 89 | // whether to add a halfwidth space between a Chinese character and an alphabet / number 90 | // boolean 91 | spacing: true, 92 | 93 | // whether the unnecessary halfwidth space between a fullwidth punctuation and an alphabet / number is allowed 94 | // boolean 95 | spaceBetweenFullwidthPunctuationAndAlphabets: false, 96 | 97 | // whether successive exclamation marks are allowed 98 | // boolean 99 | successiveExclamationMarks: false, 100 | 101 | // ellipses normalization rule 102 | // 'none' | '3dots' | 'all' 103 | // 'none': converts successive 。、,. to …… 104 | // '3dots': converts successive 。、,. to ……, excepted for ... 105 | // 'all': no conversion 106 | ellipsisTolerance: 'none', 107 | 108 | // corner quotation rule 109 | // 'double' | 'single' | 'none' 110 | // 'double': replace “” with 「」, and replace ‘’ with 『』 111 | // 'single': replace ‘’ with 「」, and replace “” with 『』 112 | // 'none': “” and ‘’ are not replaced 113 | replaceWithCornerQuotes: 'double', 114 | 115 | // whether to replace fullwidth brackets around numbers with halfwidth ones 116 | // boolean 117 | halfwidthParenthesisAroundNumbers: true 118 | } 119 | ) 120 | ``` 121 | 122 | ### CLI 123 | 124 | Install doctor-jones globally and you can use it in your terminal: 125 | 126 | ```bash 127 | npm i doctor-jones -g 128 | ``` 129 | 130 | ```shell 131 | $ doctor-jones --help 132 | 133 | Format Options 134 | --spacing, -s 是否在中文和字母数字之间添加空格 [boolean] 135 | --space-between, --sb 是否允许在全角符号与字母数字之间存在空格 [boolean] 136 | --exclamation, --ex 是否允许连续的感叹号 [boolean] 137 | --ellipsis, --el 省略号规范化规则 [choices: "none", "3dots", "all"] 138 | --quote, -q 弯引号替换规则 [choices: "none", "double", "single"] 139 | --parenthesis, -p 是否在数字周围使用半角括号 [boolean] 140 | 141 | Options: 142 | --help, -h 帮助信息 [boolean] 143 | --version, -v 当前版本 [boolean] 144 | 145 | Examples: 146 | doctor-jones 需要格式化的字符串 147 | doctor-jones -s false 需要格式化的字符串 148 | doctor-jones -s false --el 3dots 需要格式化的字符串 149 | ``` 150 | 151 | ### Inspirations 152 | - [pangu.js](https://github.com/vinta/pangu.js) 153 | - [syntax-parser](https://github.com/ascoders/syntax-parser) 154 | - [ElemeFE Style Guide](https://github.com/ElemeFE/style-guide/blob/master/copywriter.md) 155 | -------------------------------------------------------------------------------- /src/index.js: -------------------------------------------------------------------------------- 1 | import defaultOptions from './default-options' 2 | import { tokenTypes, operationTypes } from './consts' 3 | import { merge } from './utils' 4 | import lexer from './syntax-parser' 5 | 6 | const dj = (input, userOptions) => { 7 | if (!input) { 8 | return input 9 | } 10 | const options = merge({}, defaultOptions, userOptions) 11 | const { 12 | spacing, 13 | spaceBetweenFullwidthPunctuationAndAlphabets, 14 | successiveExclamationMarks, 15 | ellipsisTolerance, 16 | replaceWithCornerQuotes, 17 | halfwidthParenthesisAroundNumbers 18 | } = options 19 | 20 | // Diagnose 21 | const tokens = lexer(input) 22 | if ( 23 | !tokens.some( 24 | token => 25 | token.type === tokenTypes.CJK || 26 | token.type === tokenTypes.FULLWIDTH_PUNCTUATION 27 | ) 28 | ) { 29 | return input 30 | } 31 | 32 | let output = '' 33 | const op = [] 34 | 35 | for (let i = 0; i < tokens.length; i++) { 36 | const token = tokens[i] 37 | const nextToken = tokens[i + 1] || {} 38 | const prevToken = tokens[i - 1] || {} 39 | 40 | if (spacing) { 41 | if ( 42 | (token.type === tokenTypes.CJK && 43 | (nextToken.type === tokenTypes.ALPHABETS_AND_NUMBERS || 44 | nextToken.type === tokenTypes.NUMBERS)) || 45 | ((token.type === tokenTypes.ALPHABETS_AND_NUMBERS || 46 | token.type === tokenTypes.NUMBERS) && 47 | nextToken.type === tokenTypes.CJK) 48 | ) { 49 | op.push({ 50 | type: operationTypes.ADD, 51 | position: token.position[1], 52 | content: ' ' 53 | }) 54 | } 55 | } 56 | 57 | if (!spaceBetweenFullwidthPunctuationAndAlphabets) { 58 | if ( 59 | (prevToken.type === tokenTypes.FULLWIDTH_PUNCTUATION && 60 | token.type === tokenTypes.WHITESPACE && 61 | (nextToken.type === tokenTypes.ALPHABETS_AND_NUMBERS || 62 | nextToken.type === tokenTypes.NUMBERS)) || 63 | ((prevToken.type === tokenTypes.ALPHABETS_AND_NUMBERS || 64 | prevToken.type === tokenTypes.NUMBERS) && 65 | token.type === tokenTypes.WHITESPACE && 66 | nextToken.type === tokenTypes.FULLWIDTH_PUNCTUATION) 67 | ) { 68 | op.push({ 69 | type: operationTypes.REMOVE, 70 | position: token.position[0], 71 | length: token.position[1] - token.position[0] + 1 72 | }) 73 | } 74 | } 75 | 76 | if (!successiveExclamationMarks) { 77 | if (token.type === tokenTypes.SUCCESSIVE_FULLWIDTH_EXCLAMATION) { 78 | op.push({ 79 | type: operationTypes.REPLACE, 80 | position: token.position[0], 81 | length: token.position[1] - token.position[0] + 1, 82 | content: '!' 83 | }) 84 | } 85 | } 86 | 87 | if (ellipsisTolerance === 'none' || ellipsisTolerance === '3dots') { 88 | if ( 89 | (ellipsisTolerance === 'none' && 90 | (token.type === tokenTypes.INVALID_ELLIPSIS || 91 | token.type === tokenTypes.DOTS_AS_ELLIPSIS || 92 | token.type === tokenTypes.TWO_DOTS)) || 93 | (ellipsisTolerance === '3dots' && 94 | (token.type === tokenTypes.INVALID_ELLIPSIS || 95 | token.type === tokenTypes.TWO_DOTS)) 96 | ) { 97 | op.push({ 98 | type: operationTypes.REPLACE, 99 | position: token.position[0], 100 | length: token.position[1] - token.position[0] + 1, 101 | content: '……' 102 | }) 103 | } 104 | } 105 | 106 | if (halfwidthParenthesisAroundNumbers) { 107 | if ( 108 | prevToken.type === tokenTypes.FULLWIDTH_LEFT_PAREN && 109 | token.type === tokenTypes.NUMBERS && 110 | nextToken.type === tokenTypes.FULLWIDTH_RIGHT_PAREN 111 | ) { 112 | op.push( 113 | { 114 | type: operationTypes.REPLACE, 115 | position: prevToken.position[0], 116 | length: prevToken.position[1] - prevToken.position[0] + 1, 117 | content: '(' 118 | }, 119 | { 120 | type: operationTypes.REPLACE, 121 | position: nextToken.position[0], 122 | length: nextToken.position[1] - nextToken.position[0] + 1, 123 | content: ')' 124 | } 125 | ) 126 | } 127 | } 128 | 129 | if ( 130 | replaceWithCornerQuotes === 'double' || 131 | replaceWithCornerQuotes === 'single' 132 | ) { 133 | switch (token.type) { 134 | case tokenTypes.FULLWIDTH_LEFT_SINGLE_QUOTE: 135 | op.push({ 136 | type: operationTypes.REPLACE, 137 | position: token.position[0], 138 | length: token.position[1] - token.position[0] + 1, 139 | content: replaceWithCornerQuotes === 'double' ? '『' : '「' 140 | }) 141 | break 142 | case tokenTypes.FULLWIDTH_RIGHT_SINGLE_QUOTE: 143 | op.push({ 144 | type: operationTypes.REPLACE, 145 | position: token.position[0], 146 | length: token.position[1] - token.position[0] + 1, 147 | content: replaceWithCornerQuotes === 'double' ? '』' : '」' 148 | }) 149 | break 150 | case tokenTypes.FULLWIDTH_LEFT_DOUBLE_QUOTE: 151 | op.push({ 152 | type: operationTypes.REPLACE, 153 | position: token.position[0], 154 | length: token.position[1] - token.position[0] + 1, 155 | content: replaceWithCornerQuotes === 'double' ? '「' : '『' 156 | }) 157 | break 158 | case tokenTypes.FULLWIDTH_RIGHT_DOUBLE_QUOTE: 159 | op.push({ 160 | type: operationTypes.REPLACE, 161 | position: token.position[0], 162 | length: token.position[1] - token.position[0] + 1, 163 | content: replaceWithCornerQuotes === 'double' ? '」' : '』' 164 | }) 165 | break 166 | default: 167 | break 168 | } 169 | } 170 | } 171 | 172 | // Operate 173 | if (!op.length) { 174 | return input 175 | } 176 | let currentOpIndex = 0 177 | for (let i = 0; i < input.length; i++) { 178 | const currentOp = op[currentOpIndex] 179 | if (!currentOp) { 180 | output += input[i] 181 | continue 182 | } 183 | 184 | const { position, type, content, length } = currentOp 185 | if (i === position) { 186 | switch (type) { 187 | case operationTypes.ADD: 188 | output += `${input[i]}${content}` 189 | break 190 | case operationTypes.REPLACE: 191 | output += content 192 | i += length - 1 193 | break 194 | case operationTypes.REMOVE: 195 | output += '' 196 | i += length - 1 197 | break 198 | default: 199 | break 200 | } 201 | currentOpIndex++ 202 | } else { 203 | output += input[i] 204 | } 205 | } 206 | 207 | return output 208 | } 209 | 210 | export default dj 211 | --------------------------------------------------------------------------------