├── .npmignore ├── .gitignore ├── src ├── cn2t.js ├── full.js ├── t2cn.js ├── data-config.js └── main.js ├── .github └── workflows │ ├── test.yml │ └── npm-publish.yml ├── test ├── node │ ├── speed.cjs │ ├── basic.cjs │ └── basic.mjs └── html │ ├── basic.html │ └── complex.html ├── LICENSE ├── package.json ├── rollup.config.js ├── README-zh-CN.md ├── README-zh-TW.md └── README.md /.npmignore: -------------------------------------------------------------------------------- 1 | /.github 2 | /build.js 3 | /buildAndTest.sh 4 | /rollup.config.js 5 | /main.js 6 | /full.js 7 | /test 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | dist 3 | 4 | /data*.js 5 | /full.js 6 | 7 | /test/神雕侠侣.txt 8 | /test/天龙八部.txt 9 | 10 | /bundle-browser.js 11 | /bundle-node.js 12 | -------------------------------------------------------------------------------- /src/cn2t.js: -------------------------------------------------------------------------------- 1 | import { Trie, ConverterFactory, ConverterBuilder, CustomConverter, HTMLConverter } from "./main.js"; 2 | import * as Locale from "../dist/esm-lib/preset/cn2t.js"; 3 | 4 | const Converter = ConverterBuilder(Locale); 5 | 6 | export { Trie, ConverterFactory, Converter, CustomConverter, HTMLConverter, Locale }; 7 | -------------------------------------------------------------------------------- /src/full.js: -------------------------------------------------------------------------------- 1 | import { Trie, ConverterFactory, ConverterBuilder, CustomConverter, HTMLConverter } from "./main.js"; 2 | import * as Locale from "../dist/esm-lib/preset/full.js"; 3 | 4 | const Converter = ConverterBuilder(Locale); 5 | 6 | export { Trie, ConverterFactory, Converter, CustomConverter, HTMLConverter, Locale }; 7 | -------------------------------------------------------------------------------- /src/t2cn.js: -------------------------------------------------------------------------------- 1 | import { Trie, ConverterFactory, ConverterBuilder, CustomConverter, HTMLConverter } from "./main.js"; 2 | import * as Locale from "../dist/esm-lib/preset/t2cn.js"; 3 | 4 | const Converter = ConverterBuilder(Locale); 5 | 6 | export { Trie, ConverterFactory, Converter, CustomConverter, HTMLConverter, Locale }; 7 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | branches-ignore: 6 | - gh-pages 7 | paths-ignore: 8 | - '*.md' 9 | pull_request: 10 | branches-ignore: 11 | - gh-pages 12 | paths-ignore: 13 | - '*.md' 14 | 15 | jobs: 16 | build: 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: actions/checkout@v2 20 | - uses: actions/setup-node@v1 21 | with: 22 | node-version: 14.x 23 | - name: Install bundler 24 | run: npm install -g rollup 25 | - name: Install dependencies 26 | run: npm ci 27 | - name: Build & Test 28 | run: ./buildAndTest.sh 29 | -------------------------------------------------------------------------------- /test/node/speed.cjs: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const path = require('path'); 3 | const OpenCC = require('opencc-js'); 4 | 5 | const text = fs.readFileSync(path.join(__dirname, '..', '神雕侠侣.txt'), { encoding: 'utf8' }) 6 | + fs.readFileSync(path.join(__dirname, '..', '天龙八部.txt'), { encoding: 'utf8' }); 7 | const len = [...text].length; 8 | const loopTimes = 20; 9 | 10 | const startTime = Date.now(); 11 | 12 | const converter = OpenCC.Converter({ from: 'cn', to: 'hk' }); 13 | for (let i = 0; i < loopTimes; i += 1) { 14 | converter(text); 15 | } 16 | 17 | const endTime = Date.now(); 18 | console.log(`檔案共 ${len} 字,測試轉換 ${loopTimes} 次,共用時 ${endTime - startTime} 毫秒`); 19 | -------------------------------------------------------------------------------- /.github/workflows/npm-publish.yml: -------------------------------------------------------------------------------- 1 | name: Node.js Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | jobs: 8 | publish-npm: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v2 12 | - uses: actions/setup-node@v1 13 | with: 14 | node-version: 14 15 | registry-url: https://registry.npmjs.org/ 16 | - name: Install bundler 17 | run: npm install -g rollup 18 | - name: Install dependencies 19 | run: npm ci 20 | - name: Build & Test 21 | run: ./buildAndTest.sh 22 | - name: Publish 23 | run: npm publish 24 | env: 25 | NODE_AUTH_TOKEN: ${{secrets.npm_token}} 26 | -------------------------------------------------------------------------------- /src/data-config.js: -------------------------------------------------------------------------------- 1 | export const variants2standard = { 2 | cn: ['STCharacters', 'STPhrases'], 3 | hk: ['HKVariantsRev', 'HKVariantsRevPhrases'], 4 | tw: ['TWVariantsRev', 'TWVariantsRevPhrases'], 5 | twp: ['TWVariantsRev', 'TWVariantsRevPhrases', 'TWPhrasesRev'], 6 | jp: ['JPVariantsRev', 'JPShinjitaiCharacters', 'JPShinjitaiPhrases'], 7 | }; 8 | 9 | export const standard2variants = { 10 | cn: ['TSCharacters', 'TSPhrases'], 11 | hk: ['HKVariants'], 12 | tw: ['TWVariants'], 13 | twp: ['TWVariants', 'TWPhrasesIT', 'TWPhrasesName', 'TWPhrasesOther'], 14 | jp: ['JPVariants'], 15 | }; 16 | 17 | export const presets = [ 18 | { 19 | filename: 'full', 20 | from: Object.keys(variants2standard), 21 | to: Object.keys(standard2variants) 22 | }, 23 | { 24 | filename: 'cn2t', 25 | from: ['cn'], 26 | to: ['hk', 'tw', 'twp', 'jp'] 27 | }, 28 | { 29 | filename: 't2cn', 30 | from: ['hk', 'tw', 'twp', 'jp'], 31 | to: ['cn'] 32 | } 33 | ]; 34 | -------------------------------------------------------------------------------- /test/html/basic.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 北風和太陽 5 | 6 | 7 | 12 | 13 | 14 |

北風和太陽

15 |

有一天,北風和太陽比賽,看看誰的本事大。這時候,有一個人在路上走,他的身上穿著一件大衣。

16 |

北風對太陽說:「我們來比一比,看誰能把他的大衣脫下來。」

17 |

於是,北風就用力吹!那個人一邊走一邊說:「好冷啊!」,他把大衣拉得更緊。北風吹累了,他對太陽說:「你來試試吧!」

18 |

太陽從雲裡出來,溫和的陽光照在那個人身上,他走了一會兒,覺得有點兒熱了,沒多久,就開始流汗了。於是,他自己就把大衣脫了下來。

19 |

北風說:「太陽,還是你的本事大。」

20 |

21 | 22 | 23 |

24 | 25 | 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020-2021 The nk2028 Project 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /test/html/complex.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | opencc-js 測試頁面 5 | 6 | 7 | 8 | 9 | 14 | 15 | 16 |

opencc-js 測試頁面,標題應該轉換為簡體。

17 |

這句話應該轉換為簡體,內部的 span 也應該轉換為簡體

18 |

此處應該轉換為簡體

19 |

這句話為 zh-TW,lang 不匹配,不用轉換為簡體。

20 |

這個標籤的 class list 包含 ignore-opencc,不用轉換為簡體,且 lang 的值應該不變。 21 | 內部的 span 也不需要轉換為簡體。 22 |

23 |
24 |

25 | 26 | 27 |

28 | 29 | 30 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "opencc-js", 3 | "version": "1.0.5", 4 | "description": "The JavaScript version of Open Chinese Convert (OpenCC)", 5 | "main": "./dist/umd/full.js", 6 | "type": "module", 7 | "exports": { 8 | ".": { 9 | "import": "./dist/esm/full.js", 10 | "require": "./dist/umd/full.js" 11 | }, 12 | "./cn2t": "./dist/esm/cn2t.js", 13 | "./t2cn": "./dist/esm/t2cn.js", 14 | "./preset": "./dist/esm-lib/preset/full.js", 15 | "./*": "./dist/esm-lib/*.js" 16 | }, 17 | "scripts": { 18 | "build": "node build.js && rollup -c", 19 | "test": "node test/node/basic.mjs && node test/node/basic.cjs" 20 | }, 21 | "repository": { 22 | "type": "git", 23 | "url": "git+https://github.com/nk2028/opencc-js.git" 24 | }, 25 | "author": "The nk2028 Project", 26 | "keywords": [ 27 | "opencc", 28 | "Chinese", 29 | "unicode", 30 | "Chinese Language", 31 | "Simplified Chinese", 32 | "Traditional Chinese", 33 | "chinese-language", 34 | "chinese-characters", 35 | "traditional-chinese", 36 | "simplified-chinese" 37 | ], 38 | "license": "MIT", 39 | "bugs": { 40 | "url": "https://github.com/nk2028/opencc-js/issues" 41 | }, 42 | "homepage": "https://github.com/nk2028/opencc-js#readme", 43 | "devDependencies": { 44 | "chai": "~4.3.4", 45 | "opencc-data": "~1.0.7", 46 | "rollup-plugin-generate-package-json": "^3.2.0" 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /rollup.config.js: -------------------------------------------------------------------------------- 1 | import generatePackageJson from 'rollup-plugin-generate-package-json'; 2 | 3 | export default [ 4 | { 5 | input: 'src/full.js', 6 | output: [ 7 | { 8 | file: 'dist/umd/full.js', 9 | format: 'umd', 10 | name: 'OpenCC', 11 | plugins: [ 12 | generatePackageJson({ 13 | baseContents: { 14 | type: "commonjs" 15 | } 16 | }) 17 | ] 18 | }, 19 | { 20 | file: 'dist/esm/full.js', 21 | format: 'es', 22 | } 23 | ] 24 | }, 25 | { 26 | input: 'src/cn2t.js', 27 | output: [ 28 | { 29 | file: 'dist/umd/cn2t.js', 30 | format: 'umd', 31 | name: 'OpenCC' 32 | }, 33 | { 34 | file: 'dist/esm/cn2t.js', 35 | format: 'es', 36 | } 37 | ] 38 | }, 39 | { 40 | input: 'src/t2cn.js', 41 | output: [ 42 | { 43 | file: 'dist/umd/t2cn.js', 44 | format: 'umd', 45 | name: 'OpenCC' 46 | }, 47 | { 48 | file: 'dist/esm/t2cn.js', 49 | format: 'es', 50 | } 51 | ] 52 | } 53 | ]; 54 | -------------------------------------------------------------------------------- /test/node/basic.cjs: -------------------------------------------------------------------------------- 1 | const chai = require('chai'); 2 | const OpenCC = require('opencc-js'); 3 | 4 | chai.should(); 5 | 6 | (function test1() { 7 | const trie = new OpenCC.Trie(); 8 | trie.addWord('abc', '123'); 9 | trie.addWord('abcd', '4'); 10 | trie.convert('ab').should.equal('ab'); 11 | trie.convert('abc').should.equal('123'); 12 | trie.convert('abcd').should.equal('4'); 13 | trie.convert('abcde').should.equal('4e'); 14 | trie.convert('dabc').should.equal('d123'); 15 | trie.convert('dabcd').should.equal('d4'); 16 | }()); 17 | 18 | (function test2() { 19 | const trie = new OpenCC.Trie(); 20 | trie.addWord('𦫖𩇩', 'aaa'); 21 | trie.addWord('的𫟃', 'bbb'); 22 | trie.convert('𦫖𩇩𭞂的𫟃').should.equal('aaa𭞂bbb'); 23 | trie.convert('𦫖𭞂𩇩的𫟃').should.equal('𦫖𭞂𩇩bbb'); 24 | }()); 25 | 26 | (function test3() { 27 | const converter = OpenCC.Converter({ from: 'hk', to: 'cn' }); 28 | converter('政府初步傾向試驗為綠色專線小巴設充電裝置').should.equal('政府初步倾向试验为绿色专线小巴设充电装置'); 29 | }()); 30 | 31 | (function test4() { 32 | const converter = OpenCC.Converter({ from: 't', to: 'cn' }); 33 | converter('漢語').should.equal('汉语'); 34 | }()); 35 | 36 | (function test5() { 37 | const converter = OpenCC.Converter({ from: 'cn', to: 'twp' }); 38 | converter('方便面').should.equal('泡麵'); 39 | }()); 40 | 41 | (function test6() { 42 | const converter = OpenCC.CustomConverter([ 43 | ['香蕉', '🍌️'], 44 | ['蘋果', '🍎️'], 45 | ['梨', '🍐️'], 46 | ]); 47 | converter('香蕉蘋果梨').should.equal('🍌️🍎️🍐️'); 48 | })(); 49 | -------------------------------------------------------------------------------- /test/node/basic.mjs: -------------------------------------------------------------------------------- 1 | import chai from 'chai'; 2 | import * as OpenCC from 'opencc-js/core'; 3 | import * as loc from 'opencc-js/preset'; 4 | 5 | const Converter = OpenCC.ConverterBuilder(loc); 6 | 7 | chai.should(); 8 | 9 | (function test1() { 10 | const trie = new OpenCC.Trie(); 11 | trie.addWord('abc', '123'); 12 | trie.addWord('abcd', '4'); 13 | trie.convert('ab').should.equal('ab'); 14 | trie.convert('abc').should.equal('123'); 15 | trie.convert('abcd').should.equal('4'); 16 | trie.convert('abcde').should.equal('4e'); 17 | trie.convert('dabc').should.equal('d123'); 18 | trie.convert('dabcd').should.equal('d4'); 19 | }()); 20 | 21 | (function test2() { 22 | const trie = new OpenCC.Trie(); 23 | trie.addWord('𦫖𩇩', 'aaa'); 24 | trie.addWord('的𫟃', 'bbb'); 25 | trie.convert('𦫖𩇩𭞂的𫟃').should.equal('aaa𭞂bbb'); 26 | trie.convert('𦫖𭞂𩇩的𫟃').should.equal('𦫖𭞂𩇩bbb'); 27 | }()); 28 | 29 | (function test3() { 30 | const converter = Converter({ from: 'hk', to: 'cn' }); 31 | converter('政府初步傾向試驗為綠色專線小巴設充電裝置').should.equal('政府初步倾向试验为绿色专线小巴设充电装置'); 32 | }()); 33 | 34 | (function test4() { 35 | const converter = Converter({ from: 't', to: 'cn' }); 36 | converter('漢語').should.equal('汉语'); 37 | }()); 38 | 39 | (function test5() { 40 | const converter = Converter({ from: 'cn', to: 'twp' }); 41 | converter('方便面').should.equal('泡麵'); 42 | }()); 43 | 44 | (function test6() { 45 | const converter = OpenCC.CustomConverter([ 46 | ['香蕉', '🍌️'], 47 | ['蘋果', '🍎️'], 48 | ['梨', '🍐️'], 49 | ]); 50 | converter('香蕉蘋果梨').should.equal('🍌️🍎️🍐️'); 51 | })(); 52 | -------------------------------------------------------------------------------- /README-zh-CN.md: -------------------------------------------------------------------------------- 1 | # opencc-js [![](https://badge.fury.io/js/opencc-js.svg)](https://www.npmjs.com/package/opencc-js) [![](https://github.com/nk2028/opencc-js/workflows/Test/badge.svg)](https://github.com/nk2028/opencc-js/actions?query=workflow%3ATest) [![](https://data.jsdelivr.com/v1/package/npm/opencc-js/badge)](https://www.jsdelivr.com/package/npm/opencc-js) 2 | 3 | 开放中文转换 JavaScript 版 4 | 5 | ## 加载 6 | 7 | **在 HTML 中加载** 8 | 9 | 加载以下 `script` 标签(择一即可): 10 | 11 | ```html 12 | 13 | 14 | 15 | ``` 16 | 17 | 自行托管的话,除了使用原先的 umd,也可以使用 es module 18 | 19 | ```html 20 | 25 | ``` 26 | 27 | **在 Node.js 中加载** 28 | 29 | ```sh 30 | npm install opencc-js 31 | ``` 32 | 33 | CommonJS 34 | 35 | ```javascript 36 | const OpenCC = require('opencc-js'); 37 | ``` 38 | 39 | ES Modules 40 | 41 | ```javascript 42 | import * as OpenCC from 'opencc-js'; 43 | ``` 44 | 45 | ## 使用 46 | 47 | **基本用法** 48 | 49 | ```javascript 50 | // 将繁体中文(香港)转换为简体中文(中国大陆) 51 | const converter = OpenCC.Converter({ from: 'hk', to: 'cn' }); 52 | console.log(converter('漢語')); // output: 汉语 53 | ``` 54 | 55 | - `cn`: 简体中文(中国大陆) 56 | - `tw`: 繁体中文(台湾) 57 | - `twp`: 且转换词汇(例如:自行車 -> 腳踏車) 58 | - `hk`: 繁体中文(香港) 59 | - `jp`: 日本新字体 60 | - `t`: 繁体中文(OpenCC 标准。除非你知道自己在做什么,否则请勿使用) 61 | 62 | **自订转换器** 63 | 64 | ```javascript 65 | const converter = OpenCC.CustomConverter([ 66 | ['香蕉', 'banana'], 67 | ['蘋果', 'apple'], 68 | ['梨', 'pear'], 69 | ]); 70 | console.log(converter('香蕉 蘋果 梨')); // output: banana apple pear 71 | ``` 72 | 73 | 或以「空白」及「|」当作分隔符号 74 | 75 | ```javascript 76 | const converter = OpenCC.CustomConverter('香蕉 banana|蘋果 apple|梨 pear'); 77 | console.log(converter('香蕉 蘋果 梨')); // output: banana apple pear 78 | ``` 79 | 80 | **添加字词** 81 | 82 | * `ConverterFactory` 是比较底层的函数,`Converter` 及 `CustomConverter` 都是这个函数的再包装。 83 | * 透过 `Locale` 属性可以得到原本的字典,进而添加字词。 84 | 85 | ```javascript 86 | const customDict = [ 87 | ['“', '「'], 88 | ['”', '」'], 89 | ['‘', '『'], 90 | ['’', '』'], 91 | ]; 92 | const converter = OpenCC.ConverterFactory( 93 | OpenCC.Locale.from.cn, // 中国大陆 => OpenCC 标准 94 | OpenCC.Locale.to.tw.concat([customDict]) // OpenCC 标准 => 台湾+自订 95 | ); 96 | console.log(converter('悟空道:“师父又来了。怎么叫做‘水中捞月’?”')); 97 | // output: 悟空道:「師父又來了。怎麼叫做『水中撈月』?」 98 | ``` 99 | 100 | 下面的写法也会得到相同的结果,只是内部会多做一次转换 101 | 102 | ```javascript 103 | const customDict = [ 104 | ['“', '「'], 105 | ['”', '」'], 106 | ['‘', '『'], 107 | ['’', '』'], 108 | ]; 109 | const converter = OpenCC.ConverterFactory( 110 | OpenCC.Locale.from.cn, // 中国大陆 => OpenCC 标准 111 | OpenCC.Locale.to.tw, // OpenCC 标准 => 台湾 112 | [customDict] // 台湾 => 自订 113 | ); 114 | console.log(converter('悟空道:“师父又来了。怎么叫做‘水中捞月’?”')); 115 | // output: 悟空道:「師父又來了。怎麼叫做『水中撈月』?」 116 | ``` 117 | 118 | **DOM 操作** 119 | 120 | HTML 属性 `lang='*'` 定义了目标。 121 | 122 | ```html 123 | 漢語 124 | ``` 125 | 126 | ```javascript 127 | // 将繁体中文(香港)转换为简体中文(中国大陆) 128 | const converter = OpenCC.Converter({ from: 'hk', to: 'cn' }); 129 | // 设置转换起点为根节点,即转换整个页面 130 | const rootNode = document.documentElement; 131 | // 将所有 zh-HK 标签转为 zh-CN 标签 132 | const HTMLConvertHandler = OpenCC.HTMLConverter(converter, rootNode, 'zh-HK', 'zh-CN'); 133 | HTMLConvertHandler.convert(); // 开始转换 -> 汉语 134 | HTMLConvertHandler.restore(); // 复原 -> 漢語 135 | ``` 136 | 137 | class list 包含 `ignore-opencc` 的标签不会被转换(包括该标签的所有子节点)。 138 | 139 | ## 打包优化 140 | 141 | 如果使用 rollup 等工具打包程式码,以下方式能让打包工具自动移除用不到的部分,减少档案大小。 142 | 143 | ```javascript 144 | import * as OpenCC from 'opencc-js/core'; // 核心程式码 145 | import * as Locale from 'opencc-js/preset'; // 字典资料 146 | 147 | const converter = OpenCC.ConverterFactory(Locale.from.hk, Locale.to.cn); 148 | console.log(converter('漢語')); 149 | ``` 150 | 151 | 备注: 152 | 153 | * 由于这是利用 Tree Shaking,所以必须使用 ES Modules 154 | * 在这个模式之下,没有 `Converter` 函式,必须直接使用 `ConverterFactory` 155 | -------------------------------------------------------------------------------- /README-zh-TW.md: -------------------------------------------------------------------------------- 1 | # opencc-js [![](https://badge.fury.io/js/opencc-js.svg)](https://www.npmjs.com/package/opencc-js) [![](https://github.com/nk2028/opencc-js/workflows/Test/badge.svg)](https://github.com/nk2028/opencc-js/actions?query=workflow%3ATest) [![](https://data.jsdelivr.com/v1/package/npm/opencc-js/badge)](https://www.jsdelivr.com/package/npm/opencc-js) 2 | 3 | 開放中文轉換 JavaScript 版 4 | 5 | ## 載入 6 | 7 | **在 HTML 中載入** 8 | 9 | 載入以下 `script` 標籤(擇一即可): 10 | 11 | ```html 12 | 13 | 14 | 15 | ``` 16 | 17 | 自行託管的話,除了使用原先的 umd,也可以使用 es module 18 | 19 | ```html 20 | 25 | ``` 26 | 27 | **在 Node.js 中載入** 28 | 29 | ```sh 30 | npm install opencc-js 31 | ``` 32 | 33 | CommonJS 34 | 35 | ```javascript 36 | const OpenCC = require('opencc-js'); 37 | ``` 38 | 39 | ES Modules 40 | 41 | ```javascript 42 | import * as OpenCC from 'opencc-js'; 43 | ``` 44 | 45 | ## 使用 46 | 47 | **基本用法** 48 | 49 | ```javascript 50 | // 將繁體中文(香港)轉換為簡體中文(中國大陸) 51 | const converter = OpenCC.Converter({ from: 'hk', to: 'cn' }); 52 | console.log(converter('漢語')); // output: 汉语 53 | ``` 54 | 55 | - `cn`: 簡體中文(中國大陸) 56 | - `tw`: 繁體中文(臺灣) 57 | - `twp`: 且轉換詞彙(例如:自行車 -> 腳踏車) 58 | - `hk`: 繁體中文(香港) 59 | - `jp`: 日本新字體 60 | - `t`: 繁體中文(OpenCC 標準。除非你知道自己在做什麼,否則請勿使用) 61 | 62 | **自訂轉換器** 63 | 64 | ```javascript 65 | const converter = OpenCC.CustomConverter([ 66 | ['香蕉', 'banana'], 67 | ['蘋果', 'apple'], 68 | ['梨', 'pear'], 69 | ]); 70 | console.log(converter('香蕉 蘋果 梨')); // output: banana apple pear 71 | ``` 72 | 73 | 或以「空白」及「|」當作分隔符號 74 | 75 | ```javascript 76 | const converter = OpenCC.CustomConverter('香蕉 banana|蘋果 apple|梨 pear'); 77 | console.log(converter('香蕉 蘋果 梨')); // output: banana apple pear 78 | ``` 79 | 80 | **添加字詞** 81 | 82 | * `ConverterFactory` 是比較底層的函數,`Converter` 及 `CustomConverter` 都是這個函數的再包裝。 83 | * 透過 `Locale` 屬性可以得到原本的字典,進而添加字詞。 84 | 85 | ```javascript 86 | const customDict = [ 87 | ['“', '「'], 88 | ['”', '」'], 89 | ['‘', '『'], 90 | ['’', '』'], 91 | ]; 92 | const converter = OpenCC.ConverterFactory( 93 | OpenCC.Locale.from.cn, // 中國大陸 => OpenCC 標準 94 | OpenCC.Locale.to.tw.concat([customDict]) // OpenCC 標準 => 臺灣+自訂 95 | ); 96 | console.log(converter('悟空道:“师父又来了。怎么叫做‘水中捞月’?”')); 97 | // output: 悟空道:「師父又來了。怎麼叫做『水中撈月』?」 98 | ``` 99 | 100 | 下面的寫法也會得到相同的結果,只是內部會多做一次轉換 101 | 102 | ```javascript 103 | const customDict = [ 104 | ['“', '「'], 105 | ['”', '」'], 106 | ['‘', '『'], 107 | ['’', '』'], 108 | ]; 109 | const converter = OpenCC.ConverterFactory( 110 | OpenCC.Locale.from.cn, // 中國大陸 => OpenCC 標準 111 | OpenCC.Locale.to.tw, // OpenCC 標準 => 臺灣 112 | [customDict] // 臺灣 => 自訂 113 | ); 114 | console.log(converter('悟空道:“师父又来了。怎么叫做‘水中捞月’?”')); 115 | // output: 悟空道:「師父又來了。怎麼叫做『水中撈月』?」 116 | ``` 117 | 118 | **DOM 操作** 119 | 120 | HTML 屬性 `lang='*'` 定義了目標。 121 | 122 | ```html 123 | 漢語 124 | ``` 125 | 126 | ```javascript 127 | // 將繁體中文(香港)轉換為簡體中文(中國大陸) 128 | const converter = OpenCC.Converter({ from: 'hk', to: 'cn' }); 129 | // 設定轉換起點為根節點,即轉換整個頁面 130 | const rootNode = document.documentElement; 131 | // 將所有 zh-HK 標籤轉為 zh-CN 標籤 132 | const HTMLConvertHandler = OpenCC.HTMLConverter(converter, rootNode, 'zh-HK', 'zh-CN'); 133 | HTMLConvertHandler.convert(); // 開始轉換 -> 汉语 134 | HTMLConvertHandler.restore(); // 復原 -> 漢語 135 | ``` 136 | 137 | class list 包含 `ignore-opencc` 的標籤不會被轉換(包括該標籤的所有子節點)。 138 | 139 | ## 打包優化 140 | 141 | 如果使用 rollup 等工具打包程式碼,以下方式能讓打包工具自動移除用不到的部分,減少檔案大小。 142 | 143 | ```javascript 144 | import * as OpenCC from 'opencc-js/core'; // 核心程式碼 145 | import * as Locale from 'opencc-js/preset'; // 字典資料 146 | 147 | const converter = OpenCC.ConverterFactory(Locale.from.hk, Locale.to.cn); 148 | console.log(converter('漢語')); 149 | ``` 150 | 151 | 備註: 152 | 153 | * 由於這是利用 Tree Shaking,所以必須使用 ES Modules 154 | * 在這個模式之下,沒有 `Converter` 函式,必須直接使用 `ConverterFactory` 155 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # opencc-js [![](https://badge.fury.io/js/opencc-js.svg)](https://www.npmjs.com/package/opencc-js) [![](https://github.com/nk2028/opencc-js/workflows/Test/badge.svg)](https://github.com/nk2028/opencc-js/actions?query=workflow%3ATest) [![](https://data.jsdelivr.com/v1/package/npm/opencc-js/badge)](https://www.jsdelivr.com/package/npm/opencc-js) 2 | 3 | The JavaScript version of Open Chinese Convert (OpenCC) 4 | 5 | [繁體版](README-zh-TW.md) - [简体版](README-zh-CN.md) 6 | 7 | ## Import 8 | 9 | **Import opencc-js in HTML page** 10 | 11 | Import in HTML pages: 12 | 13 | ```html 14 | 15 | 16 | 17 | ``` 18 | 19 | ES6 import 20 | 21 | ```html 22 | 27 | ``` 28 | 29 | **Import opencc-js in Node.js script** 30 | 31 | ```sh 32 | npm install opencc-js 33 | ``` 34 | 35 | CommonJS 36 | 37 | ```javascript 38 | const OpenCC = require('opencc-js'); 39 | ``` 40 | 41 | ES Modules 42 | 43 | ```javascript 44 | import * as OpenCC from 'opencc-js'; 45 | ``` 46 | 47 | ## Usage 48 | 49 | **Basic usage** 50 | 51 | ```javascript 52 | // Convert Traditional Chinese (Hong Kong) to Simplified Chinese (Mainland China) 53 | const converter = OpenCC.Converter({ from: 'hk', to: 'cn' }); 54 | console.log(converter('漢語')); // output: 汉语 55 | ``` 56 | 57 | **Custom Converter** 58 | 59 | ```javascript 60 | const converter = OpenCC.CustomConverter([ 61 | ['香蕉', 'banana'], 62 | ['蘋果', 'apple'], 63 | ['梨', 'pear'], 64 | ]); 65 | console.log(converter('香蕉 蘋果 梨')); // output: banana apple pear 66 | ``` 67 | 68 | Or using space and vertical bar as delimiter. 69 | 70 | ```javascript 71 | const converter = OpenCC.CustomConverter('香蕉 banana|蘋果 apple|梨 pear'); 72 | console.log(converter('香蕉 蘋果 梨')); // output: banana apple pear 73 | ``` 74 | 75 | **Add words** 76 | 77 | * Use low-level function `ConverterFactory` to create converter. 78 | * Get dictionary from the property `Locale`. 79 | 80 | ```javascript 81 | const customDict = [ 82 | ['“', '「'], 83 | ['”', '」'], 84 | ['‘', '『'], 85 | ['’', '』'], 86 | ]; 87 | const converter = OpenCC.ConverterFactory( 88 | OpenCC.Locale.from.cn, // Simplified Chinese (Mainland China) => OpenCC standard 89 | OpenCC.Locale.to.tw.concat([customDict]) // OpenCC standard => Traditional Chinese (Taiwan) with custom words 90 | ); 91 | console.log(converter('悟空道:“师父又来了。怎么叫做‘水中捞月’?”')); 92 | // output: 悟空道:「師父又來了。怎麼叫做『水中撈月』?」 93 | ``` 94 | 95 | This will get the same result with an extra convertion. 96 | 97 | ```javascript 98 | const customDict = [ 99 | ['“', '「'], 100 | ['”', '」'], 101 | ['‘', '『'], 102 | ['’', '』'], 103 | ]; 104 | const converter = OpenCC.ConverterFactory( 105 | OpenCC.Locale.from.cn, // Simplified Chinese (Mainland China) => OpenCC standard 106 | OpenCC.Locale.to.tw, // OpenCC standard => Traditional Chinese (Taiwan) 107 | [customDict] // Traditional Chinese (Taiwan) => custom words 108 | ); 109 | console.log(converter('悟空道:“师父又来了。怎么叫做‘水中捞月’?”')); 110 | // output: 悟空道:「師父又來了。怎麼叫做『水中撈月』?」 111 | ``` 112 | 113 | **DOM operations** 114 | 115 | HTML attribute `lang='*'` defines the targets. 116 | 117 | ```html 118 | 漢語 119 | ``` 120 | 121 | ```javascript 122 | // Set Chinese convert from Traditional (Hong Kong) to Simplified (Mainland China) 123 | const converter = OpenCC.Converter({ from: 'hk', to: 'cn' }); 124 | // Set the conversion starting point to the root node, i.e. convert the whole page 125 | const rootNode = document.documentElement; 126 | // Convert all elements with attributes lang='zh-HK'. Change attribute value to lang='zh-CN' 127 | const HTMLConvertHandler = OpenCC.HTMLConverter(converter, rootNode, 'zh-HK', 'zh-CN'); 128 | HTMLConvertHandler.convert(); // Convert -> 汉语 129 | HTMLConvertHandler.restore(); // Restore -> 漢語 130 | ``` 131 | 132 | ## API 133 | * `.Converter({})`: declare the converter's direction via locals. 134 | * default: `{ from: 'tw', to: 'cn' }` 135 | * syntax : `{ from: local1, to: local2 }` 136 | * locals: letter codes defining a writing local tradition, occasionally its idiomatic habits. 137 | * `cn`: Simplified Chinese (Mainland China) 138 | * `tw`: Traditional Chinese (Taiwan) 139 | * `twp`: with phrase conversion (ex: 自行車 -> 腳踏車) 140 | * `hk`: Traditional Chinese (Hong Kong) 141 | * `jp`: Japanese Shinjitai 142 | * `t`: Traditional Chinese (OpenCC standard. Do not use unless you know what you are doing) 143 | * `.CustomConverter([])` : defines custom dictionary. 144 | * default: `[]` 145 | * syntax : `[ ['item1','replacement1'], ['item2','replacement2'], … ]` 146 | * `.HTMLConverter(converter, rootNode, langAttrInitial, langAttrNew )` : uses previously defined converter() to converts all HTML elements text content from a starting root node and down, into the target local. Also converts all attributes `lang` from existing `langAttrInitial` to `langAttrNew` values. 147 | * `lang` attributes : html attribute defines the languages of the text content to the browser, at start (`langAttrInitial`) and after conversion (`langAttrNew`). 148 | * syntax convention: [IETF languages codes](https://www.w3.org/International/articles/bcp47/#macro), mainly `zh-TW`, `zh-HK`, `zh-CN`, `zh-SG`,… 149 | * `ignore-opencc` : html class signaling an element and its sub-nodes will not be converted. 150 | 151 | ## Bundle optimization 152 | 153 | * Tree Shaking (ES Modules Only) may result less size of bundle file. 154 | * Using `ConverterFactory` instead of `Converter`. 155 | 156 | ```javascript 157 | import * as OpenCC from 'opencc-js/core'; // primary code 158 | import * as Locale from 'opencc-js/preset'; // dictionary 159 | 160 | const converter = OpenCC.ConverterFactory(Locale.from.hk, Locale.to.cn); 161 | console.log(converter('漢語')); 162 | ``` 163 | -------------------------------------------------------------------------------- /src/main.js: -------------------------------------------------------------------------------- 1 | /** 2 | * 字典,範例:"a alpha|b beta" 或 [["a", "alpha"], ["b", "beta"]] 3 | * @typedef {string|string[][]} DictLike 4 | */ 5 | 6 | /** 7 | * 字典群組 8 | * @typedef {DictLike[]} DictGroup 9 | */ 10 | 11 | /** 12 | * 地區設定資料 13 | * @typedef {object} LocalePreset 14 | * @property {object.} from 15 | * @property {object.} to 16 | */ 17 | 18 | /** 19 | * Trie 樹。 20 | */ 21 | export class Trie { 22 | // 使用 Map 實作 Trie 樹 23 | // Trie 的每個節點為一個 Map 物件 24 | // key 為 code point,value 為子節點(也是一個 Map)。 25 | // 如果 Map 物件有 trie_val 屬性,則該屬性為值字串,代表替換的字詞。 26 | 27 | constructor() { 28 | this.map = new Map(); 29 | } 30 | 31 | /** 32 | * 將一項資料加入字典樹 33 | * @param {string} s 要匹配的字串 34 | * @param {string} v 若匹配成功,則替換為此字串 35 | */ 36 | addWord(s, v) { 37 | let { map } = this; 38 | for (const c of s) { 39 | const cp = c.codePointAt(0); 40 | const nextMap = map.get(cp); 41 | if (nextMap == null) { 42 | const tmp = new Map(); 43 | map.set(cp, tmp); 44 | map = tmp; 45 | } else { 46 | map = nextMap; 47 | } 48 | } 49 | map.trie_val = v; 50 | } 51 | 52 | /** 53 | * 讀取字典資料 54 | * @param {DictLike} d 字典 55 | */ 56 | loadDict(d) { 57 | if (typeof d === 'string') { 58 | d = d.split('|'); 59 | for (const line of d) { 60 | const [l, r] = line.split(' '); 61 | this.addWord(l, r); 62 | } 63 | } else { 64 | for (let arr of d) { 65 | const [l, r] = arr; 66 | this.addWord(l, r); 67 | } 68 | } 69 | } 70 | 71 | /** 72 | * 讀取多個字典資料 73 | * @param {DictLike[]} arr 字典 74 | */ 75 | loadDictGroup(arr) { 76 | arr.forEach(d => { 77 | this.loadDict(d); 78 | }); 79 | } 80 | 81 | /** 82 | * 根據字典樹中的資料轉換字串。 83 | * @param {string} s 要轉換的字串 84 | */ 85 | convert(s) { 86 | const t = this.map; 87 | const n = s.length, arr = []; 88 | let orig_i; 89 | for (let i = 0; i < n;) { 90 | let t_curr = t, k = 0, v; 91 | for (let j = i; j < n;) { 92 | const x = s.codePointAt(j); 93 | j += x > 0xffff ? 2 : 1; 94 | 95 | const t_next = t_curr.get(x); 96 | if (typeof t_next === 'undefined') { 97 | break; 98 | } 99 | t_curr = t_next; 100 | 101 | const v_curr = t_curr.trie_val; 102 | if (typeof v_curr !== 'undefined') { 103 | k = j; 104 | v = v_curr; 105 | } 106 | } 107 | if (k > 0) { // 有替代 108 | if (orig_i !== null) { 109 | arr.push(s.slice(orig_i, i)); 110 | orig_i = null; 111 | } 112 | arr.push(v); 113 | i = k; 114 | } else { // 無替代 115 | if (orig_i === null) { 116 | orig_i = i; 117 | } 118 | i += s.codePointAt(i) > 0xffff ? 2 : 1; 119 | } 120 | } 121 | if (orig_i !== null) { 122 | arr.push(s.slice(orig_i, n)); 123 | } 124 | return arr.join(''); 125 | } 126 | } 127 | 128 | /** 129 | * Create a OpenCC converter 130 | * @param {...DictGroup} dictGroup 131 | * @returns The converter that performs the conversion. 132 | */ 133 | export function ConverterFactory(...dictGroups) { 134 | const trieArr = dictGroups.map(grp => { 135 | const t = new Trie(); 136 | t.loadDictGroup(grp); 137 | return t; 138 | }); 139 | /** 140 | * The converter that performs the conversion. 141 | * @param {string} s The string to be converted. 142 | * @returns {string} The converted string. 143 | */ 144 | function convert(s) { 145 | return trieArr.reduce((res, t) => { 146 | return t.convert(res); 147 | }, s); 148 | } 149 | return convert; 150 | } 151 | 152 | /** 153 | * Build Converter function with locale data 154 | * @param {LocalePreset} localePreset 155 | * @returns Converter function 156 | */ 157 | export function ConverterBuilder(localePreset) { 158 | return function Converter(options) { 159 | let dictGroups = []; 160 | ['from', 'to'].forEach(type => { 161 | if (typeof options[type] !== 'string') { 162 | throw new Error('Please provide the `' + type + '` option'); 163 | } 164 | if (options[type] !== 't') { 165 | dictGroups.push(localePreset[type][options[type]]); 166 | } 167 | }); 168 | return ConverterFactory.apply(null, dictGroups); 169 | } 170 | } 171 | 172 | /** 173 | * Create a custom converter. 174 | * @param {string[][]} dict The dictionary to be used for conversion. 175 | * @returns The converter that performs the conversion. 176 | */ 177 | export function CustomConverter(dict) { 178 | return ConverterFactory([dict]); 179 | } 180 | 181 | /** 182 | * Create a HTML page converter. 183 | * @param {(s: string) => string} converter The converter that performs the conversion. 184 | * @param {HTMLElement} rootNode The root node for recursive conversions. 185 | * @param {string} fromLangTag The lang tag to be converted. 186 | * @param {string} toLangTag The lang tag of the conversion result. 187 | * @returns The HTML page converter. 188 | */ 189 | export function HTMLConverter(converter, rootNode, fromLangTag, toLangTag) { 190 | /** 191 | * Perform the conversion on the page. 192 | */ 193 | function convert() { 194 | function inner(currentNode, langMatched) { 195 | /* class list 包含 ignore-opencc 的元素會跳過後續的轉換 */ 196 | if (currentNode.nodeType === Node.ELEMENT_NODE && currentNode.classList.contains('ignore-opencc')) return; 197 | 198 | if (currentNode.lang === fromLangTag) { 199 | langMatched = true; 200 | currentNode.shouldChangeLang = true; // 記住 lang 屬性被修改了,以便恢復 201 | currentNode.lang = toLangTag; 202 | } else if (currentNode.lang && currentNode.lang.length) { 203 | langMatched = false; 204 | } 205 | 206 | if (langMatched) { 207 | /* Do not convert these elements */ 208 | if (currentNode.tagName === 'SCRIPT') return; 209 | if (currentNode.tagName === 'STYLE') return; 210 | 211 | /* 處理特殊屬性 */ 212 | if (currentNode.tagName === 'META' && currentNode.name === 'description') { 213 | if (currentNode.originalContent == null) { 214 | currentNode.originalContent = currentNode.content; 215 | } 216 | currentNode.content = converter(currentNode.originalContent); 217 | } else if (currentNode.tagName === 'META' && currentNode.name === 'keywords') { 218 | if (currentNode.originalContent == null) { 219 | currentNode.originalContent = currentNode.content; 220 | } 221 | currentNode.content = converter(currentNode.originalContent); 222 | } else if (currentNode.tagName === 'IMG') { 223 | if (currentNode.originalAlt == null) { 224 | currentNode.originalAlt = currentNode.alt; 225 | } 226 | currentNode.alt = converter(currentNode.originalAlt); 227 | } else if (currentNode.tagName === 'INPUT' && currentNode.type === 'button') { 228 | if (currentNode.originalValue == null) { 229 | currentNode.originalValue = currentNode.value; 230 | } 231 | currentNode.value = converter(currentNode.originalValue); 232 | } 233 | } 234 | 235 | for (const node of currentNode.childNodes) { 236 | if (node.nodeType === Node.TEXT_NODE && langMatched) { 237 | if (node.originalString == null) { 238 | node.originalString = node.nodeValue; // 存儲原始字串,以便恢復 239 | } 240 | node.nodeValue = converter(node.originalString); 241 | } else { 242 | inner(node, langMatched); 243 | } 244 | } 245 | } 246 | inner(rootNode, false); 247 | } 248 | 249 | /** 250 | * Restore the page to the state before the conversion. 251 | */ 252 | function restore() { 253 | function inner(currentNode) { 254 | /* class list 包含 ignore-opencc 的元素會跳過後續的轉換 */ 255 | if (currentNode.nodeType === Node.ELEMENT_NODE && currentNode.classList.contains('ignore-opencc')) return; 256 | 257 | if (currentNode.shouldChangeLang) { 258 | currentNode.lang = fromLangTag; 259 | } 260 | 261 | if (currentNode.originalString !== undefined) { 262 | currentNode.nodeValue = currentNode.originalString; 263 | } 264 | 265 | /* 處理特殊屬性 */ 266 | if (currentNode.tagName === 'META' && currentNode.name === 'description') { 267 | if (currentNode.originalContent !== undefined) { 268 | currentNode.content = currentNode.originalContent; 269 | } 270 | } else if (currentNode.tagName === 'META' && currentNode.name === 'keywords') { 271 | if (currentNode.originalContent !== undefined) { 272 | currentNode.content = currentNode.originalContent; 273 | } 274 | } else if (currentNode.tagName === 'IMG') { 275 | if (currentNode.originalAlt !== undefined) { 276 | currentNode.alt = currentNode.originalAlt; 277 | } 278 | } else if (currentNode.tagName === 'INPUT' && currentNode.type === 'button') { 279 | if (currentNode.originalValue !== undefined) { 280 | currentNode.value = currentNode.originalValue; 281 | } 282 | } 283 | 284 | for (const node of currentNode.childNodes) { 285 | inner(node); 286 | } 287 | } 288 | inner(rootNode); 289 | } 290 | 291 | return { convert, restore }; 292 | } 293 | --------------------------------------------------------------------------------