├── test ├── .eslintrc ├── cjs.js ├── test.mjs └── fixtures │ └── cjk_breaks.txt ├── .gitignore ├── .github ├── dependabot.yml └── workflows │ └── ci.yml ├── .eslintrc.yml ├── CHANGELOG.md ├── LICENSE ├── package.json ├── rollup.config.mjs ├── README.md └── index.mjs /test/.eslintrc: -------------------------------------------------------------------------------- 1 | env: 2 | node: true 3 | mocha: true 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | coverage/ 3 | dist/ 4 | *.log 5 | yarn.lock 6 | -------------------------------------------------------------------------------- /test/cjs.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | /* eslint-env mocha */ 3 | 4 | const assert = require('node:assert') 5 | const fn = require('../') 6 | 7 | describe('CJS', () => { 8 | it('require', () => { 9 | assert.ok(typeof fn === 'function') 10 | }) 11 | }) 12 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: github-actions 4 | directory: / 5 | schedule: 6 | interval: daily 7 | 8 | - package-ecosystem: npm 9 | directory: / 10 | schedule: 11 | interval: daily 12 | allow: 13 | - dependency-type: production 14 | -------------------------------------------------------------------------------- /test/test.mjs: -------------------------------------------------------------------------------- 1 | import { fileURLToPath } from 'node:url' 2 | import markdownit from 'markdown-it' 3 | import generate from 'markdown-it-testgen' 4 | 5 | import cjk_breaks from '../index.mjs' 6 | 7 | describe('markdown-it-cjk-breaks', function () { 8 | const md = markdownit().use(cjk_breaks) 9 | 10 | generate(fileURLToPath(new URL('fixtures/cjk_breaks.txt', import.meta.url)), { header: true }, md) 11 | }) 12 | -------------------------------------------------------------------------------- /.eslintrc.yml: -------------------------------------------------------------------------------- 1 | extends: standard 2 | 3 | overrides: 4 | - 5 | files: [ '*.mjs' ] 6 | rules: 7 | no-restricted-globals: [ 2, require, __dirname ] 8 | - 9 | files: [ 'test/**' ] 10 | env: { mocha: true } 11 | - 12 | files: [ 'lib/**', 'index.mjs' ] 13 | parserOptions: { ecmaVersion: 2015 } 14 | 15 | ignorePatterns: 16 | - demo/ 17 | - dist/ 18 | - benchmark/extra/ 19 | 20 | rules: 21 | camelcase: 0 22 | no-multi-spaces: 0 23 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 2.0.0 / 2023-12-05 2 | ------------------ 3 | 4 | - Rewrite to ESM. 5 | 6 | 7 | 1.1.3 / 2019-02-01 8 | ------------------ 9 | 10 | - Fixed browser module name, #3. 11 | 12 | 13 | 1.1.2 / 2018-11-27 14 | ------------------ 15 | 16 | - Dev deps bump. 17 | - `dist/` files rebuild, missed in prev release. 18 | 19 | 20 | 1.1.1 / 2018-11-27 21 | ------------------ 22 | 23 | - Fix crash due to incorrect softbreak handling, #2. 24 | 25 | 26 | 1.1.0 / 2018-02-26 27 | ------------------ 28 | 29 | - Switch to algorithm recommended for browsers in [css-text-3](https://www.w3.org/TR/css-text-3/#line-break-transform), #1. 30 | 31 | 32 | 1.0.0 / 2018-02-23 33 | ------------------ 34 | 35 | - First release. 36 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | pull_request: 6 | schedule: 7 | - cron: '0 0 * * 3' 8 | 9 | jobs: 10 | test: 11 | 12 | runs-on: ubuntu-latest 13 | 14 | strategy: 15 | matrix: 16 | node-version: [ '18' ] 17 | 18 | steps: 19 | - uses: actions/checkout@v4 20 | 21 | - name: Use Node.js ${{ matrix.node-version }} 22 | uses: actions/setup-node@v4 23 | with: 24 | node-version: ${{ matrix.node-version }} 25 | 26 | - run: npm install 27 | 28 | - name: Test 29 | run: npm test 30 | 31 | - name: Upload coverage report to coveralls.io 32 | uses: coverallsapp/github-action@master 33 | with: 34 | github-token: ${{ secrets.GITHUB_TOKEN }} 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018 Authors. 2 | 3 | Permission is hereby granted, free of charge, to any person 4 | obtaining a copy of this software and associated documentation 5 | files (the "Software"), to deal in the Software without 6 | restriction, including without limitation the rights to use, 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the 9 | Software is furnished to do so, subject to the following 10 | conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /test/fixtures/cjk_breaks.txt: -------------------------------------------------------------------------------- 1 | Remove linebreaks near ZWSP 2 | . 3 | foo​ 4 | bar 5 | ​baz 6 | . 7 |

foo​bar​baz

8 | . 9 | 10 | Remove linebreaks between hiragana (wide) characters 11 | . 12 | あおえ 13 | いう 14 | . 15 |

あおえいう

16 | . 17 | 18 | Remove linebreaks between halfwidth katakana 19 | . 20 | アオエ 21 | イウ 22 | . 23 |

アオエイウ

24 | . 25 | 26 | Remove linebreaks between fullwidth characters 27 | . 28 | !"# 29 | $% 30 | . 31 |

!"#$%

32 | . 33 | 34 | Keep linebreaks between hangul characters 35 | . 36 | ㅏㅗㅔ 37 | ㅣㅜ 38 | ᅡᅩᅦ 39 | ᅵᅮ 40 | . 41 |

ㅏㅗㅔ 42 | ㅣㅜ 43 | ᅡᅩᅦ 44 | ᅵᅮ

45 | . 46 | 47 | Keep linebreaks between hiragana (wide) and english 48 | . 49 | あおえ 50 | aoe 51 | あおえ 52 | . 53 |

あおえ 54 | aoe 55 | あおえ

56 | . 57 | 58 | Emphasis tokens should be skipped 59 | . 60 | *あおえ* 61 | *いう* 62 | . 63 |

あおえいう

64 | . 65 | 66 | Should recognize astral characters correctly 67 | . 68 | foo🈀 69 | 🈀foo 70 | . 71 |

foo🈀🈀foo

72 | . 73 | 74 | Should process EOL correctly 75 | . 76 | ![img](image.png) 77 | text 78 | . 79 |

img 80 | text

81 | . 82 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "markdown-it-cjk-breaks", 3 | "version": "2.0.0", 4 | "description": "Suppress linebreaks between east asian characters", 5 | "keywords": [ 6 | "markdown-it-plugin", 7 | "markdown-it" 8 | ], 9 | "repository": "markdown-it/markdown-it-cjk-breaks.git", 10 | "license": "MIT", 11 | "main": "dist/index.cjs.js", 12 | "module": "index.mjs", 13 | "exports": { 14 | ".": { 15 | "require": "./dist/index.cjs.js", 16 | "import": "./index.mjs" 17 | }, 18 | "./*": { 19 | "require": "./*", 20 | "import": "./*" 21 | } 22 | }, 23 | "files": [ 24 | "index.mjs", 25 | "lib/", 26 | "dist/" 27 | ], 28 | "dependencies": { 29 | "get-east-asian-width": "^1.2.0" 30 | }, 31 | "scripts": { 32 | "lint": "eslint .", 33 | "build": "rollup -c", 34 | "test": "npm run lint && npm run build && c8 --exclude dist --exclude test -r text -r html -r lcov mocha", 35 | "prepublishOnly": "npm run lint && npm run build" 36 | }, 37 | "devDependencies": { 38 | "@rollup/plugin-babel": "^6.0.4", 39 | "@rollup/plugin-node-resolve": "^15.2.3", 40 | "@rollup/plugin-terser": "^0.4.4", 41 | "c8": "^8.0.1", 42 | "eslint": "^8.55.0", 43 | "eslint-config-standard": "^17.1.0", 44 | "markdown-it": "^13.0.2", 45 | "markdown-it-testgen": "^0.1.6", 46 | "mocha": "^10.2.0", 47 | "rollup": "^4.6.1" 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /rollup.config.mjs: -------------------------------------------------------------------------------- 1 | import resolve from '@rollup/plugin-node-resolve' 2 | import terser from '@rollup/plugin-terser' 3 | import { babel } from '@rollup/plugin-babel' 4 | import { readFileSync } from 'fs' 5 | 6 | const pkg = JSON.parse(readFileSync(new URL('package.json', import.meta.url))) 7 | 8 | function globalName (name) { 9 | const parts = name.split('-') 10 | for (let i = 2; i < parts.length; i++) { 11 | parts[i] = parts[i][0].toUpperCase() + parts[i].slice(1) 12 | } 13 | return parts.join('') 14 | } 15 | 16 | const config_umd_full = { 17 | input: 'index.mjs', 18 | output: [ 19 | { 20 | file: `dist/${pkg.name}.js`, 21 | format: 'umd', 22 | name: globalName(pkg.name), 23 | plugins: [ 24 | // Here terser is used only to force ascii output 25 | terser({ 26 | mangle: false, 27 | compress: false, 28 | format: { comments: 'all', beautify: true, ascii_only: true, indent_level: 2 } 29 | }) 30 | ] 31 | }, 32 | { 33 | file: `dist/${pkg.name}.min.js`, 34 | format: 'umd', 35 | name: globalName(pkg.name), 36 | plugins: [ 37 | terser({ 38 | format: { ascii_only: true } 39 | }) 40 | ] 41 | } 42 | ], 43 | plugins: [ 44 | resolve(), 45 | babel({ babelHelpers: 'bundled' }), 46 | { 47 | banner () { 48 | return `/*! ${pkg.name} ${pkg.version} https://github.com/${pkg.repository} @license ${pkg.license} */` 49 | } 50 | } 51 | ] 52 | } 53 | 54 | const config_cjs_no_deps = { 55 | input: 'index.mjs', 56 | output: { 57 | file: 'dist/index.cjs.js', 58 | format: 'cjs' 59 | }, 60 | // Bundle deps, since those not support CJS 61 | // external: Object.keys(pkg.dependencies || {}), 62 | plugins: [ 63 | resolve(), 64 | babel({ babelHelpers: 'bundled' }) 65 | ] 66 | } 67 | 68 | let config = [ 69 | config_umd_full, 70 | config_cjs_no_deps 71 | ] 72 | 73 | if (process.env.CJS_ONLY) config = [config_cjs_no_deps] 74 | 75 | export default config 76 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # markdown-it-cjk-breaks 2 | 3 | [![CI](https://github.com/markdown-it/markdown-it-cjk-breaks/actions/workflows/ci.yml/badge.svg)](https://github.com/markdown-it/markdown-it-cjk-breaks/actions/workflows/ci.yml) 4 | [![NPM version](https://img.shields.io/npm/v/markdown-it-cjk-breaks.svg?style=flat)](https://www.npmjs.org/package/markdown-it-cjk-breaks) 5 | [![Coverage Status](https://coveralls.io/repos/markdown-it/markdown-it-cjk-breaks/badge.svg?branch=master&service=github)](https://coveralls.io/github/markdown-it/markdown-it-cjk-breaks?branch=master) 6 | 7 | > Plugin for [markdown-it](https://github.com/markdown-it/markdown-it) that suppresses linebreaks between east asian characters. 8 | 9 | Normally newlines in a markdown text get rendered as newlines in output html text. Then browsers will usually render those newlines as whitespace (more smart behavior is included in w3c drafts, but not actually implemented by vendors). 10 | 11 | This plugin finds and removes newlines that cannot be converted to space, algorithm matches [CSS Text Module Level 3](https://www.w3.org/TR/css-text-3/#line-break-transform): 12 | 13 | - If the character immediately before or immediately after the segment break is the zero-width space character (U+200B), then the break is removed, leaving behind the zero-width space. 14 | - Otherwise, if the East Asian Width property [UAX11] of both the character before and after the segment break is F, W, or H (not A), and neither side is Hangul, then the segment break is removed. 15 | - Otherwise, the segment break is converted to a space (U+0020). 16 | 17 | ## Install 18 | 19 | ```bash 20 | yarn add markdown-it-cjk-breaks 21 | ``` 22 | 23 | 24 | ## Usage 25 | 26 | ```js 27 | var md = require('markdown-it')(); 28 | var cjk_breaks = require('markdown-it-cjk-breaks'); 29 | 30 | md.use(cjk_breaks); 31 | 32 | md.render(` 33 | あおえ 34 | うい 35 | aoe 36 | ui 37 | `); 38 | 39 | // returns: 40 | // 41 | //

あおえうい 42 | //aoe 43 | //ui

44 | ``` 45 | 46 | 47 | ## License 48 | 49 | [MIT](https://github.com/markdown-it/markdown-it-cjk-breaks/blob/master/LICENSE) 50 | -------------------------------------------------------------------------------- /index.mjs: -------------------------------------------------------------------------------- 1 | import { eastAsianWidthType } from 'get-east-asian-width' 2 | 3 | function is_surrogate (c1, c2) { 4 | return c1 >= 0xD800 && c1 <= 0xDBFF && c2 >= 0xDC00 && c2 <= 0xDFFF 5 | } 6 | 7 | function is_hangul (c) { 8 | // require('unicode-10.0.0/Script/Hangul/regex') 9 | /* eslint-disable max-len, no-misleading-character-class */ 10 | return /[\u1100-\u11FF\u302E\u302F\u3131-\u318E\u3200-\u321E\u3260-\u327E\uA960-\uA97C\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uFFA0-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC]/.test(c) 11 | /* eslint-enable max-len */ 12 | } 13 | 14 | function process_inlines (tokens) { 15 | for (let i = 0; i < tokens.length; i++) { 16 | if (tokens[i].type !== 'softbreak') continue 17 | 18 | // default last/next character to space 19 | let last = ' ' 20 | let next = ' ' 21 | 22 | for (let j = i - 1; j >= 0; j--) { 23 | if (tokens[j].type !== 'text') continue 24 | 25 | const c1 = tokens[j].content.charCodeAt(tokens[j].content.length - 2) 26 | const c2 = tokens[j].content.charCodeAt(tokens[j].content.length - 1) 27 | 28 | last = tokens[j].content.slice(is_surrogate(c1, c2) ? -2 : -1) 29 | break 30 | } 31 | 32 | for (let j = i + 1; j < tokens.length; j++) { 33 | if (tokens[j].type !== 'text') continue 34 | 35 | const c1 = tokens[j].content.charCodeAt(0) 36 | const c2 = tokens[j].content.charCodeAt(1) 37 | 38 | next = tokens[j].content.slice(0, is_surrogate(c1, c2) ? 2 : 1) 39 | break 40 | } 41 | 42 | let remove_break = false 43 | 44 | // remove newline if it's adjacent to ZWSP 45 | if (last === '\u200b' || next === '\u200b') remove_break = true 46 | 47 | // remove newline if both characters are fullwidth (F), wide (W) or 48 | // halfwidth (H), but not Hangul 49 | if (/^[fwh]$/.test(eastAsianWidthType(last.codePointAt(0))[0]) && 50 | /^[fwh]$/.test(eastAsianWidthType(next.codePointAt(0))[0])) { 51 | if (!is_hangul(last) && !is_hangul(next)) remove_break = true 52 | } 53 | 54 | if (remove_break) { 55 | tokens[i].type = 'text' 56 | tokens[i].content = '' 57 | } 58 | } 59 | } 60 | 61 | function cjk_breaks (state) { 62 | for (let blkIdx = state.tokens.length - 1; blkIdx >= 0; blkIdx--) { 63 | if (state.tokens[blkIdx].type !== 'inline') continue 64 | 65 | process_inlines(state.tokens[blkIdx].children, state) 66 | } 67 | } 68 | 69 | export default function cjk_breaks_plugin (md) { 70 | md.core.ruler.push('cjk_breaks', cjk_breaks) 71 | }; 72 | --------------------------------------------------------------------------------