├── test ├── .eslintrc ├── cjs.js ├── test.mjs └── fixtures │ └── cjk_breaks.txt ├── .gitignore ├── .github ├── dependabot.yml └── workflows │ └── ci.yml ├── .eslintrc.yml ├── CHANGELOG.md ├── LICENSE ├── package.json ├── rollup.config.mjs ├── README.md └── index.mjs /test/.eslintrc: -------------------------------------------------------------------------------- 1 | env: 2 | node: true 3 | mocha: true 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | coverage/ 3 | dist/ 4 | *.log 5 | yarn.lock 6 | -------------------------------------------------------------------------------- /test/cjs.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | /* eslint-env mocha */ 3 | 4 | const assert = require('node:assert') 5 | const fn = require('../') 6 | 7 | describe('CJS', () => { 8 | it('require', () => { 9 | assert.ok(typeof fn === 'function') 10 | }) 11 | }) 12 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: github-actions 4 | directory: / 5 | schedule: 6 | interval: daily 7 | 8 | - package-ecosystem: npm 9 | directory: / 10 | schedule: 11 | interval: daily 12 | allow: 13 | - dependency-type: production 14 | -------------------------------------------------------------------------------- /test/test.mjs: -------------------------------------------------------------------------------- 1 | import { fileURLToPath } from 'node:url' 2 | import markdownit from 'markdown-it' 3 | import generate from 'markdown-it-testgen' 4 | 5 | import cjk_breaks from '../index.mjs' 6 | 7 | describe('markdown-it-cjk-breaks', function () { 8 | const md = markdownit().use(cjk_breaks) 9 | 10 | generate(fileURLToPath(new URL('fixtures/cjk_breaks.txt', import.meta.url)), { header: true }, md) 11 | }) 12 | -------------------------------------------------------------------------------- /.eslintrc.yml: -------------------------------------------------------------------------------- 1 | extends: standard 2 | 3 | overrides: 4 | - 5 | files: [ '*.mjs' ] 6 | rules: 7 | no-restricted-globals: [ 2, require, __dirname ] 8 | - 9 | files: [ 'test/**' ] 10 | env: { mocha: true } 11 | - 12 | files: [ 'lib/**', 'index.mjs' ] 13 | parserOptions: { ecmaVersion: 2015 } 14 | 15 | ignorePatterns: 16 | - demo/ 17 | - dist/ 18 | - benchmark/extra/ 19 | 20 | rules: 21 | camelcase: 0 22 | no-multi-spaces: 0 23 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 2.0.0 / 2023-12-05 2 | ------------------ 3 | 4 | - Rewrite to ESM. 5 | 6 | 7 | 1.1.3 / 2019-02-01 8 | ------------------ 9 | 10 | - Fixed browser module name, #3. 11 | 12 | 13 | 1.1.2 / 2018-11-27 14 | ------------------ 15 | 16 | - Dev deps bump. 17 | - `dist/` files rebuild, missed in prev release. 18 | 19 | 20 | 1.1.1 / 2018-11-27 21 | ------------------ 22 | 23 | - Fix crash due to incorrect softbreak handling, #2. 24 | 25 | 26 | 1.1.0 / 2018-02-26 27 | ------------------ 28 | 29 | - Switch to algorithm recommended for browsers in [css-text-3](https://www.w3.org/TR/css-text-3/#line-break-transform), #1. 30 | 31 | 32 | 1.0.0 / 2018-02-23 33 | ------------------ 34 | 35 | - First release. 36 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | pull_request: 6 | schedule: 7 | - cron: '0 0 * * 3' 8 | 9 | jobs: 10 | test: 11 | 12 | runs-on: ubuntu-latest 13 | 14 | strategy: 15 | matrix: 16 | node-version: [ '18' ] 17 | 18 | steps: 19 | - uses: actions/checkout@v4 20 | 21 | - name: Use Node.js ${{ matrix.node-version }} 22 | uses: actions/setup-node@v4 23 | with: 24 | node-version: ${{ matrix.node-version }} 25 | 26 | - run: npm install 27 | 28 | - name: Test 29 | run: npm test 30 | 31 | - name: Upload coverage report to coveralls.io 32 | uses: coverallsapp/github-action@master 33 | with: 34 | github-token: ${{ secrets.GITHUB_TOKEN }} 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018 Authors. 2 | 3 | Permission is hereby granted, free of charge, to any person 4 | obtaining a copy of this software and associated documentation 5 | files (the "Software"), to deal in the Software without 6 | restriction, including without limitation the rights to use, 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the 9 | Software is furnished to do so, subject to the following 10 | conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /test/fixtures/cjk_breaks.txt: -------------------------------------------------------------------------------- 1 | Remove linebreaks near ZWSP 2 | . 3 | foo 4 | bar 5 | baz 6 | . 7 |
foobarbaz
8 | . 9 | 10 | Remove linebreaks between hiragana (wide) characters 11 | . 12 | あおえ 13 | いう 14 | . 15 |あおえいう
16 | . 17 | 18 | Remove linebreaks between halfwidth katakana 19 | . 20 | アオエ 21 | イウ 22 | . 23 |アオエイウ
24 | . 25 | 26 | Remove linebreaks between fullwidth characters 27 | . 28 | !"# 29 | $% 30 | . 31 |!"#$%
32 | . 33 | 34 | Keep linebreaks between hangul characters 35 | . 36 | ㅏㅗㅔ 37 | ㅣㅜ 38 | ᅡᅩᅦ 39 | ᅵᅮ 40 | . 41 |ㅏㅗㅔ 42 | ㅣㅜ 43 | ᅡᅩᅦ 44 | ᅵᅮ
45 | . 46 | 47 | Keep linebreaks between hiragana (wide) and english 48 | . 49 | あおえ 50 | aoe 51 | あおえ 52 | . 53 |あおえ 54 | aoe 55 | あおえ
56 | . 57 | 58 | Emphasis tokens should be skipped 59 | . 60 | *あおえ* 61 | *いう* 62 | . 63 |あおえいう
64 | . 65 | 66 | Should recognize astral characters correctly 67 | . 68 | foo🈀 69 | 🈀foo 70 | . 71 |foo🈀🈀foo
72 | . 73 | 74 | Should process EOL correctly 75 | . 76 |  77 | text 78 | . 79 |
80 | text
あおえうい 42 | //aoe 43 | //ui
44 | ``` 45 | 46 | 47 | ## License 48 | 49 | [MIT](https://github.com/markdown-it/markdown-it-cjk-breaks/blob/master/LICENSE) 50 | -------------------------------------------------------------------------------- /index.mjs: -------------------------------------------------------------------------------- 1 | import { eastAsianWidthType } from 'get-east-asian-width' 2 | 3 | function is_surrogate (c1, c2) { 4 | return c1 >= 0xD800 && c1 <= 0xDBFF && c2 >= 0xDC00 && c2 <= 0xDFFF 5 | } 6 | 7 | function is_hangul (c) { 8 | // require('unicode-10.0.0/Script/Hangul/regex') 9 | /* eslint-disable max-len, no-misleading-character-class */ 10 | return /[\u1100-\u11FF\u302E\u302F\u3131-\u318E\u3200-\u321E\u3260-\u327E\uA960-\uA97C\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uFFA0-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC]/.test(c) 11 | /* eslint-enable max-len */ 12 | } 13 | 14 | function process_inlines (tokens) { 15 | for (let i = 0; i < tokens.length; i++) { 16 | if (tokens[i].type !== 'softbreak') continue 17 | 18 | // default last/next character to space 19 | let last = ' ' 20 | let next = ' ' 21 | 22 | for (let j = i - 1; j >= 0; j--) { 23 | if (tokens[j].type !== 'text') continue 24 | 25 | const c1 = tokens[j].content.charCodeAt(tokens[j].content.length - 2) 26 | const c2 = tokens[j].content.charCodeAt(tokens[j].content.length - 1) 27 | 28 | last = tokens[j].content.slice(is_surrogate(c1, c2) ? -2 : -1) 29 | break 30 | } 31 | 32 | for (let j = i + 1; j < tokens.length; j++) { 33 | if (tokens[j].type !== 'text') continue 34 | 35 | const c1 = tokens[j].content.charCodeAt(0) 36 | const c2 = tokens[j].content.charCodeAt(1) 37 | 38 | next = tokens[j].content.slice(0, is_surrogate(c1, c2) ? 2 : 1) 39 | break 40 | } 41 | 42 | let remove_break = false 43 | 44 | // remove newline if it's adjacent to ZWSP 45 | if (last === '\u200b' || next === '\u200b') remove_break = true 46 | 47 | // remove newline if both characters are fullwidth (F), wide (W) or 48 | // halfwidth (H), but not Hangul 49 | if (/^[fwh]$/.test(eastAsianWidthType(last.codePointAt(0))[0]) && 50 | /^[fwh]$/.test(eastAsianWidthType(next.codePointAt(0))[0])) { 51 | if (!is_hangul(last) && !is_hangul(next)) remove_break = true 52 | } 53 | 54 | if (remove_break) { 55 | tokens[i].type = 'text' 56 | tokens[i].content = '' 57 | } 58 | } 59 | } 60 | 61 | function cjk_breaks (state) { 62 | for (let blkIdx = state.tokens.length - 1; blkIdx >= 0; blkIdx--) { 63 | if (state.tokens[blkIdx].type !== 'inline') continue 64 | 65 | process_inlines(state.tokens[blkIdx].children, state) 66 | } 67 | } 68 | 69 | export default function cjk_breaks_plugin (md) { 70 | md.core.ruler.push('cjk_breaks', cjk_breaks) 71 | }; 72 | --------------------------------------------------------------------------------