├── .editorconfig ├── .github ├── CONTRIBUTING.md ├── dependabot.yml └── workflows │ └── ci.yml ├── .gitignore ├── .gitmodules ├── CHANGELOG.md ├── LICENSE ├── Makefile ├── README.md ├── _tools └── gen_pinyin_dict.go ├── benchmark_test.go ├── cli └── pinyin │ ├── go.mod │ ├── go.sum │ └── main.go ├── doc.go ├── example_test.go ├── go.mod ├── phonetic_symbol.go ├── pinyin.go ├── pinyin_dict.go └── pinyin_test.go /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig is awesome: http://EditorConfig.org 2 | 3 | # top-most EditorConfig file 4 | root = true 5 | 6 | # Unix-style newlines with a newline ending every file 7 | [*] 8 | charset = utf-8 9 | end_of_line = lf 10 | insert_final_newline = true 11 | trim_trailing_whitespace = true 12 | 13 | # Indentiation 14 | [*.{py,rst}] 15 | indent_style = space 16 | indent_size = 4 17 | [{Makefile,*.go}] 18 | indent_style = tab 19 | indent_size = 4 20 | [*.{ini,yml}] 21 | indent_style = space 22 | indent_size = 2 23 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | 4 | * 发送 Pull Request 时麻烦将目标分支修改为 `develop` 分支。 5 | * 如果是关于单个汉字的拼音有误的 BUG,麻烦前往 [pinyin-data](https://github.com/mozillazg/pinyin-data/issues) 进行反馈。 6 | 7 | 8 | Thanks for contributing! :heart: 9 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: gomod 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | time: "21:00" 8 | open-pull-requests-limit: 10 9 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | - develop 8 | pull_request: 9 | branches: 10 | - master 11 | - develop 12 | 13 | jobs: 14 | 15 | build: 16 | name: Build 17 | runs-on: ubuntu-latest 18 | strategy: 19 | matrix: 20 | go-version: [1.17, 1.18, 1.19, '1.20'] 21 | steps: 22 | 23 | - name: Set up Go 24 | uses: actions/setup-go@v3 25 | with: 26 | go-version: ${{ matrix.go-version }} 27 | env: 28 | GOPATH: ${{ env.HOME }} 29 | id: go 30 | 31 | - name: Check out code into the Go module directory 32 | uses: actions/checkout@v3 33 | 34 | - name: Get dependencies 35 | run: | 36 | go get -v -t -d ./... 37 | 38 | - name: Build 39 | run: go build -v . 40 | 41 | - name: Test 42 | run: go test -v . 43 | 44 | - name: Test cmd 45 | run: | 46 | cd cli/pinyin/ 47 | go run ./main.go 测试 48 | go run ./main.go -s zhao 测试 49 | echo 测试 | go run ./main.go 50 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | *.exe 23 | *.test 24 | *.prof 25 | *~ 26 | *.swp 27 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "_tools/pinyin-data"] 2 | path = _tools/pinyin-data 3 | url = https://github.com/mozillazg/pinyin-data.git 4 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [0.20.0] (2023-05-14) 4 | 5 | * **Changed** 使用 [pinyin-data][pinyin-data] v0.13.0 的拼音数据 6 | 7 | 8 | ## [0.19.0] (2021-12-11) 9 | 10 | * **Changed** 使用 [pinyin-data][pinyin-data] v0.12.0 的拼音数据 11 | 12 | 13 | ## [0.18.0] (2020-06-13) 14 | 15 | * **Changed** 使用 [pinyin-data][pinyin-data] v0.9.0 的拼音数据 16 | * **Bugfixed** 修复自定义的 Fallback 函数可能会导致结果乱码的问题 Fixes [#35] 17 | 18 | ## [0.17.0] (2020-04-09) 19 | 20 | * **Changed** 因为依赖的 gojieba 经常出现安装异常,撤销 v0.16.0 的修改,撤销后 v0.17.0 的代码跟 v0.15.0 基本是一样的。 21 | 如果有需要使用 v0.16.0 新增的 ``func Paragraph(p string) string`` 功能的请使用 v0.16.0 版本或者通过 v0.16.0 中相关代码实现类似的需求。 22 | 23 | ## [0.16.0] (2019-12-05) 24 | 25 | * **NEW** 增加 ``func Paragraph(p string) string`` 用于便捷处理大段文字 26 | (thanks [@huacnlee] via [#37][#37]) 27 | 28 | ## [0.15.0] (2019-04-06) 29 | 30 | * **Changed** 使用 [pinyin-data][pinyin-data] v0.7.0 的拼音数据 31 | * **NEW** 添加 go.mod 文件 32 | 33 | 34 | ## [0.14.0] (2018-08-05) 35 | 36 | * **Changed** 使用 [pinyin-data][pinyin-data] v0.6.1 的拼音数据 37 | * **Changed** 命令行工具移到 `cmd/pinyin/` 目录下,现在需要改为使用 38 | `go get -u github.com/mozillazg/go-pinyin/cmd/pinyin` 来安装命令行工具。 39 | 40 | 41 | ## [0.13.0] (2018-04-29) 42 | 43 | * **Changed** 使用 [pinyin-data][pinyin-data] v0.5.1 的拼音数据 (via [#30]) 44 | * **Changed** 修改命令行工具 `-s` 参数的值(thanks [@wdscxsj][@wdscxsj] via [#19][#19]): 45 | * `Normal` 改为 `zhao` 46 | * `Tone` 改为 `zh4ao` 47 | * `Tone2` 改为 `zha4o` 48 | * `Tone3` 改为 `zhao4` 49 | * `Initials` 改为 `zh` 50 | * `FirstLetter` 改为 `z` 51 | * `Finals` 改为 `ao` 52 | * `FinalsTone` 改为 `4ao` 53 | * `FinalsTone2` 改为 `a4o` 54 | * `FinalsTone3` 改为 `ao4` 55 | * **Changed** 严格限制命令行参数中 `-s` 选项的值(thanks [@wdscxsj][@wdscxsj] via [#20][#20]): 56 | 57 | 58 | ## [0.12.0] (2017-04-25) 59 | 60 | 61 | * **NEW** 命令行程序支持通过 -s 指定新增的 `Tone3` 和 `FinalsTone3` 拼音风格 62 | 63 | $ pinyin -s Tone3 请至少输入一个汉字 64 | qing3 zhi4 shao3 shu1 ru4 yi1 ge4 han4 zi4 65 | 66 | $ pinyin -s FinalsTone3 请至少输入一个汉字 67 | ing3 i4 ao3 u1 u4 i1 e4 an4 i4 68 | 69 | * **Changed** use [pinyin-data](https://github.com/mozillazg/pinyin-data) v0.4.1 70 | 71 | 72 | ## [0.11.0] (2016-10-28) 73 | 74 | * **Changed** 不再使用 `0` 表示轻声(因为之前并没有正确的实现这个功能, 同时也觉得这个功能没必要)。 75 | 顺便修复了 Tone2 中 `ü` 标轻声的问题(像 `侵略 -> qi1n lv0e4` ) 76 | * **NEW** 新增 `Tone3` 和 `FinalsTone3` 拼音风格。 77 | 78 | hans := "中国人" 79 | args := pinyin.NewArgs() 80 | args.Style = pinyin.Tone3 81 | fmt.Println("Tone3:", pinyin.Pinyin(hans, args)) 82 | // Output: Tone3: [[zhong1] [guo2] [ren2]] 83 | 84 | args.Style = pinyin.FinalsTone3 85 | fmt.Println("FinalsTone3:", pinyin.Pinyin(hans, args)) 86 | // Output: FinalsTone3: [[ong1] [uo2] [en2]] 87 | 88 | 89 | 90 | ## [0.10.0] (2016-10-18) 91 | 92 | * **Changed** use [pinyin-data](https://github.com/mozillazg/pinyin-data) v0.4.0 93 | 94 | 95 | ## [0.9.0] (2016-09-04): 96 | 97 | * **NEW** 新增 `func Convert(s string, a *Args) [][]string` 98 | * **NEW** 新增 `func LazyConvert(s string, a *Args) []string` 99 | 100 | 之所以增加这两个函数是希望 `a` 参数支持 `nil` 101 | 102 | 103 | 104 | ## [0.8.0] (2016-08-19) 105 | 106 | * **Changed** use [pinyin-data](https://github.com/mozillazg/pinyin-data) v0.3.0 107 | * Fixed [#13](https://github.com/mozillazg/go-pinyin/issues/13) . thanks [@aisq2008](https://github.com/aisq2008) 108 | * Fixed pinyin of 罗 109 | 110 | 111 | ## [0.7.0] (2016-08-02) 112 | 113 | * **Changed** use [pinyin-data](https://github.com/mozillazg/pinyin-data) v0.2.0 114 | * **Improved** golint and gofmt 115 | 116 | 117 | ## [0.6.0] (2016-05-14) 118 | 119 | * **NEW** 命令行程序支持指定拼音风格: 120 | 121 | ```shell 122 | $ pinyin -s Normal 你好 123 | ni hao 124 | ``` 125 | * **Bugfixed** 解决韵母 i, u, ü 的问题:根据以下拼音方案,还原出正确的韵母 126 | [#8](https://github.com/mozillazg/go-pinyin/pull/8), [python-pinyin#26](https://github.com/mozillazg/python-pinyin/pull/26) 127 | 128 | > i 行的韵母,前面没有声母的时候,写成:yi(衣),yɑ(呀),ye(耶), 129 | > yɑo(腰),you(忧),yɑn(烟),yin(因),yɑnɡ(央),yinɡ(英),yonɡ(雍)。 130 | > 131 | > u 行的韵母,前面没有声母的时候,写成wu(乌),wɑ(蛙),wo(窝), 132 | > wɑi(歪),wei(威),wɑn(弯),wen(温),wɑnɡ(汪),wenɡ(翁)。 133 | > 134 | > ü行的韵母跟声母j,q,x拼的时候,写成ju(居),qu(区),xu(虚), 135 | > ü上两点也省略;但是跟声母l,n拼的时候,仍然写成lü(吕),nü(女)。 136 | 137 | **注意** `y` 既不是声母也不是韵母。详见 [汉语拼音方案](http://www.edu.cn/20011114/3009777.shtml) 138 | 139 | * **Bugfixed** 解决未正确处理鼻音 ḿ, ń, ň, ǹ 的问题:包含鼻音的拼音不应该有声母 140 | 141 | 142 | 143 | ## [0.5.0] (2016-03-12) 144 | 145 | * **CHANGE** 改为使用来自 [pinyin-data](https://github.com/mozillazg/pinyin-data) 的拼音数据。 146 | * **NEW** 命令行程序支持从标准输入读取数据(支持管道和重定向输入): 147 | 148 | ```shell 149 | $ echo "你好" | pinyin 150 | nǐ hǎo 151 | $ pinyin < hello.txt 152 | nǐ hǎo 153 | ``` 154 | 155 | 156 | ## [0.4.0] (2016-01-29) 157 | 158 | * **NEW** `Args` 结构体新增 field: `Fallback func(r rune, a Args) []string` 159 | 用于处理没有拼音的字符(默认忽略没有拼音的字符): 160 | ```go 161 | a := pinyin.NewArgs() 162 | a.Fallback = func(r rune, a pinyin.Args) []string { 163 | return []string{string(r + 1)} 164 | } 165 | fmt.Println(pinyin.Pinyin("中国人abc", a)) 166 | // Output: [[zhong] [guo] [ren] [b] [c] [d]] 167 | 168 | // or 169 | pinyin.Fallback = func(r rune, a pinyin.Args) []string { 170 | return []string{string(r)} 171 | } 172 | fmt.Println(pinyin.Pinyin("中国人abc", pinyin.NewArgs())) 173 | // Output: [[zhong] [guo] [ren] [a] [b] [c]] 174 | ``` 175 | 176 | 177 | ## [0.3.0] (2015-12-29) 178 | 179 | * fix "当字符串中有非中文的时候,会出现下标越界的情况"(影响 `pinyin.LazyPinyin` 和 `pinyin.Slug` ([#1](https://github.com/mozillazg/go-pinyin/issues/1))) 180 | * 调整对非中文字符的处理:当遇到没有拼音的字符时,直接忽略 181 | ```go 182 | // before 183 | fmt.Println(pinyin.Pinyin("中国人abc", pinyin.NewArgs())) 184 | [[zhong] [guo] [ren] [] [] []] 185 | 186 | // after 187 | fmt.Println(pinyin.Pinyin("中国人abc", pinyin.NewArgs())) 188 | [[zhong] [guo] [ren]] 189 | ``` 190 | 191 | 192 | ## [0.2.1] (2015-08-26) 193 | 194 | * `yu`, `y`, `w` 不是声母 195 | 196 | 197 | ## [0.2.0] (2015-01-04) 198 | 199 | * 新增 `func NewArgs() Args` 200 | * 解决 `Args.Separator` 无法赋值为 `""` 的 BUG 201 | * 规范命名: 202 | * `NORMAL` -> `Normal` 203 | * `TONE` -> `Tone` 204 | * `TONE2` -> `Tone2` 205 | * `INITIALS` -> `Initials` 206 | * `FIRST_LETTER` -> `FirstLetter` 207 | * `FINALS` -> `Finals` 208 | * `FINALS_TONE` -> `FinalsTone` 209 | * `FINALS_TONE2` -> `FinalsTone2` 210 | 211 | ## [0.1.1] (2014-12-07) 212 | * 更新拼音库 213 | 214 | 215 | ## 0.1.0 (2014-11-23) 216 | * Initial Release 217 | 218 | 219 | [pinyin-data]: https://github.com/mozillazg/pinyin-data 220 | [@wdscxsj]: https://github.com/wdscxsj 221 | [@huacnlee]: https://github.com/huacnlee 222 | [#19]: https://github.com/mozillazg/go-pinyin/pull/19 223 | [#20]: https://github.com/mozillazg/go-pinyin/pull/20 224 | [#30]: https://github.com/mozillazg/go-pinyin/pull/30 225 | [#37]: https://github.com/mozillazg/go-pinyin/pull/37 226 | [#35]: https://github.com/mozillazg/go-pinyin/issues/35 227 | 228 | [0.1.1]: https://github.com/mozillazg/go-pinyin/compare/v0.1.0...v0.1.1 229 | [0.2.0]: https://github.com/mozillazg/go-pinyin/compare/v0.1.1...v0.2.0 230 | [0.2.1]: https://github.com/mozillazg/go-pinyin/compare/v0.2.0...v0.2.1 231 | [0.3.0]: https://github.com/mozillazg/go-pinyin/compare/v0.2.1...v0.3.0 232 | [0.4.0]: https://github.com/mozillazg/go-pinyin/compare/v0.3.0...v0.4.0 233 | [0.5.0]: https://github.com/mozillazg/go-pinyin/compare/v0.4.0...v0.5.0 234 | [0.6.0]: https://github.com/mozillazg/go-pinyin/compare/v0.5.0...v0.6.0 235 | [0.7.0]: https://github.com/mozillazg/go-pinyin/compare/v0.6.0...v0.7.0 236 | [0.8.0]: https://github.com/mozillazg/go-pinyin/compare/v0.7.0...v0.8.0 237 | [0.9.0]: https://github.com/mozillazg/go-pinyin/compare/v0.8.0...v0.9.0 238 | [0.10.0]: https://github.com/mozillazg/go-pinyin/compare/v0.9.0...v0.10.0 239 | [0.11.0]: https://github.com/mozillazg/go-pinyin/compare/v0.10.0...v0.11.0 240 | [0.12.0]: https://github.com/mozillazg/go-pinyin/compare/v0.11.0...v0.12.0 241 | [0.13.0]: https://github.com/mozillazg/go-pinyin/compare/v0.12.0...v0.13.0 242 | [0.14.0]: https://github.com/mozillazg/go-pinyin/compare/v0.13.0...v0.14.0 243 | [0.15.0]: https://github.com/mozillazg/go-pinyin/compare/v0.14.0...v0.15.0 244 | [0.16.0]: https://github.com/mozillazg/go-pinyin/compare/v0.15.0...v0.16.0 245 | [0.17.0]: https://github.com/mozillazg/go-pinyin/compare/v0.16.0...v0.17.0 246 | [0.18.0]: https://github.com/mozillazg/go-pinyin/compare/v0.17.0...v0.18.0 247 | [0.19.0]: https://github.com/mozillazg/go-pinyin/compare/v0.18.0...v0.19.0 248 | [0.20.0]: https://github.com/mozillazg/go-pinyin/compare/v0.19.0...v0.20.0 249 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 mozillazg 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | help: 2 | @echo "test run test" 3 | @echo "lint run lint" 4 | @echo "gen_pinyin_dict gen pinyin dict" 5 | 6 | .PHONY: test 7 | test: 8 | @echo "run test" 9 | @go test -v -cover 10 | 11 | .PHONY: gen_pinyin_dict 12 | gen_pinyin_dict: 13 | @go run _tools/gen_pinyin_dict.go _tools/pinyin-data/pinyin.txt pinyin_dict.go 14 | 15 | .PHONY: lint 16 | lint: 17 | gofmt -s -w . cmd/pinyin _tools 18 | golint . 19 | golint cmd/pinyin 20 | golint _tools 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | go-pinyin 2 | ========= 3 | 4 | [![Build Status](https://github.com/mozillazg/go-pinyin/actions/workflows/ci.yml/badge.svg?branch=master)](https://github.com/mozillazg/go-pinyin/actions/workflows/ci.yml) 5 | [![Coverage Status](https://coveralls.io/repos/mozillazg/go-pinyin/badge.svg?branch=master)](https://coveralls.io/r/mozillazg/go-pinyin?branch=master) 6 | [![Go Report Card](https://goreportcard.com/badge/github.com/mozillazg/go-pinyin)](https://goreportcard.com/report/github.com/mozillazg/go-pinyin) 7 | [![GoDoc](https://godoc.org/github.com/mozillazg/go-pinyin?status.svg)](https://godoc.org/github.com/mozillazg/go-pinyin) 8 | 9 | 汉语拼音转换工具 Go 版。 10 | 11 | 12 | Installation 13 | ------------ 14 | 15 | ``` 16 | go get github.com/mozillazg/go-pinyin 17 | ``` 18 | 19 | install CLI tool: 20 | 21 | ``` 22 | # go version>=1.17 23 | go install github.com/mozillazg/go-pinyin/cli/pinyin@latest 24 | 25 | # go version<1.17 26 | go get -u github.com/mozillazg/go-pinyin/cli/pinyin 27 | 28 | $ pinyin 中国人 29 | zhōng guó rén 30 | ``` 31 | 32 | 33 | Documentation 34 | -------------- 35 | 36 | API documentation can be found here: 37 | https://godoc.org/github.com/mozillazg/go-pinyin 38 | 39 | 40 | Usage 41 | ------ 42 | 43 | ```go 44 | package main 45 | 46 | import ( 47 | "fmt" 48 | "github.com/mozillazg/go-pinyin" 49 | ) 50 | 51 | func main() { 52 | hans := "中国人" 53 | 54 | // 默认 55 | a := pinyin.NewArgs() 56 | fmt.Println(pinyin.Pinyin(hans, a)) 57 | // [[zhong] [guo] [ren]] 58 | 59 | // 包含声调 60 | a.Style = pinyin.Tone 61 | fmt.Println(pinyin.Pinyin(hans, a)) 62 | // [[zhōng] [guó] [rén]] 63 | 64 | // 声调用数字表示 65 | a.Style = pinyin.Tone2 66 | fmt.Println(pinyin.Pinyin(hans, a)) 67 | // [[zho1ng] [guo2] [re2n]] 68 | 69 | // 开启多音字模式 70 | a = pinyin.NewArgs() 71 | a.Heteronym = true 72 | fmt.Println(pinyin.Pinyin(hans, a)) 73 | // [[zhong zhong] [guo] [ren]] 74 | a.Style = pinyin.Tone2 75 | fmt.Println(pinyin.Pinyin(hans, a)) 76 | // [[zho1ng zho4ng] [guo2] [re2n]] 77 | 78 | fmt.Println(pinyin.LazyPinyin(hans, pinyin.NewArgs())) 79 | // [zhong guo ren] 80 | 81 | fmt.Println(pinyin.Convert(hans, nil)) 82 | // [[zhong] [guo] [ren]] 83 | 84 | fmt.Println(pinyin.LazyConvert(hans, nil)) 85 | // [zhong guo ren] 86 | } 87 | ``` 88 | 89 | 注意: 90 | 91 | * 默认情况下会忽略没有拼音的字符(可以通过自定义 `Fallback` 参数的值来自定义如何处理没有拼音的字符, 92 | 详见 [示例](https://godoc.org/github.com/mozillazg/go-pinyin#example-Pinyin--FallbackCustom1))。 93 | * 根据 [《汉语拼音方案》](http://www.moe.gov.cn/s78/A19/yxs_left/moe_810/s230/195802/t19580201_186000.html) y,w,ü (yu) 都不是声母, 94 | 以及不是所有拼音都有声母,如果这不是你预期的话,你可能需要的是首字母风格 `FirstLetter` 95 | ( [详细信息](https://github.com/mozillazg/python-pinyin#%E4%B8%BA%E4%BB%80%E4%B9%88%E6%B2%A1%E6%9C%89-y-w-yu-%E5%87%A0%E4%B8%AA%E5%A3%B0%E6%AF%8D) )。 96 | 97 | 98 | Related Projects 99 | ----------------- 100 | 101 | * [hotoo/pinyin](https://github.com/hotoo/pinyin): 汉语拼音转换工具 Node.js/JavaScript 版。 102 | * [mozillazg/python-pinyin](https://github.com/mozillazg/python-pinyin): 汉语拼音转换工具 Python 版。 103 | * [mozillazg/rust-pinyin](https://github.com/mozillazg/rust-pinyin): 汉语拼音转换工具 Rust 版。 104 | 105 | 106 | pinyin data 107 | ----------------- 108 | 109 | * 使用 [pinyin-data](https://github.com/mozillazg/pinyin-data) 的拼音数据 110 | 111 | 112 | License 113 | --------- 114 | 115 | Under the MIT License. 116 | -------------------------------------------------------------------------------- /_tools/gen_pinyin_dict.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "flag" 6 | "fmt" 7 | "io" 8 | "os" 9 | "strings" 10 | ) 11 | 12 | type cmdArgs struct { 13 | inputFile string 14 | outputFile string 15 | } 16 | 17 | func genCode(inFile *os.File, outFile *os.File) { 18 | rd := bufio.NewReader(inFile) 19 | output := `package pinyin 20 | 21 | // PinyinDict is data map 22 | // Warning: Auto-generated file, don't edit. 23 | var PinyinDict = map[int]string{ 24 | ` 25 | lines := []string{} 26 | 27 | for { 28 | line, err := rd.ReadString('\n') 29 | if err == io.EOF { 30 | break 31 | } else if err != nil { 32 | panic(err) 33 | } 34 | if strings.HasPrefix(line, "#") { 35 | continue 36 | } 37 | 38 | // line: `U+4E2D: zhōng,zhòng # 中` 39 | dataSlice := strings.Split(line, " #") 40 | dataSlice = strings.Split(dataSlice[0], ": ") 41 | // 0x4E2D 42 | hexCode := strings.Replace(dataSlice[0], "U+", "0x", 1) 43 | // zhōng,zhòng 44 | pinyin := dataSlice[1] 45 | lines = append(lines, fmt.Sprintf("\t%s: \"%s\",", hexCode, pinyin)) 46 | } 47 | 48 | output += strings.Join(lines, "\n") 49 | output += "\n}\n" 50 | outFile.WriteString(output) 51 | return 52 | } 53 | 54 | func parseCmdArgs() cmdArgs { 55 | flag.Parse() 56 | inputFile := flag.Arg(0) 57 | outputFile := flag.Arg(1) 58 | return cmdArgs{inputFile, outputFile} 59 | } 60 | 61 | func main() { 62 | args := parseCmdArgs() 63 | usage := "gen_pinyin_dict INPUT OUTPUT" 64 | inputFile := args.inputFile 65 | outputFile := args.outputFile 66 | if inputFile == "" || outputFile == "" { 67 | fmt.Println(usage) 68 | os.Exit(1) 69 | } 70 | 71 | inFp, err := os.Open(inputFile) 72 | if err != nil { 73 | fmt.Printf("open file %s error", inputFile) 74 | panic(err) 75 | } 76 | outFp, err := os.Create(outputFile) 77 | if err != nil { 78 | fmt.Printf("open file %s error", outputFile) 79 | panic(err) 80 | } 81 | defer inFp.Close() 82 | defer outFp.Close() 83 | 84 | genCode(inFp, outFp) 85 | } 86 | -------------------------------------------------------------------------------- /benchmark_test.go: -------------------------------------------------------------------------------- 1 | package pinyin 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | ) 7 | 8 | var hans500 = strings.Replace(strings.Replace(` 9 | 的、一、是、在、不、了、有、和、人、这、中、大、为、上、个、国、我、以、要、他、 10 | 时、来、用、们、生、到、作、地、于、出、就、分、对、成、会、可、主、发、年、动、 11 | 同、工、也、能、下、过、子、说、产、种、面、而、方、后、多、定、行、学、法、所、 12 | 民、得、经、十、三、之、进、着、等、部、度、家、电、力、里、如、水、化、高、自、 13 | 二、理、起、小、物、现、实、加、量、都、两、体、制、机、当、使、点、从、业、本、 14 | 去、把、性、好、应、开、它、合、还、因、由、其、些、然、前、外、天、政、四、日、 15 | 那、社、义、事、平、形、相、全、表、间、样、与、关、各、重、新、线、内、数、正、 16 | 心、反、你、明、看、原、又、么、利、比、或、但、质、气、第、向、道、命、此、变、 17 | 条、只、没、结、解、问、意、建、月、公、无、系、军、很、情、者、最、立、代、想、 18 | 已、通、并、提、直、题、党、程、展、五、果、料、象、员、革、位、入、常、文、总、 19 | 次、品、式、活、设、及、管、特、件、长、求、老、头、基、资、边、流、路、级、少、 20 | 图、山、统、接、知、较、将、组、见、计、别、她、手、角、期、根、论、运、农、指、 21 | 几、九、区、强、放、决、西、被、干、做、必、战、先、回、则、任、取、据、处、队、 22 | 南、给、色、光、门、即、保、治、北、造、百、规、热、领、七、海、口、东、导、器、 23 | 压、志、世、金、增、争、济、阶、油、思、术、极、交、受、联、什、认、六、共、权、 24 | 收、证、改、清、己、美、再、采、转、更、单、风、切、打、白、教、速、花、带、安、 25 | 场、身、车、例、真、务、具、万、每、目、至、达、走、积、示、议、声、报、斗、完、 26 | 类、八、离、华、名、确、才、科、张、信、马、节、话、米、整、空、元、况、今、集、 27 | 温、传、土、许、步、群、广、石、记、需、段、研、界、拉、林、律、叫、且、究、观、 28 | 越、织、装、影、算、低、持、音、众、书、布、复、容、儿、须、际、商、非、验、连、 29 | 断、深、难、近、矿、千、周、委、素、技、备、半、办、青、省、列、习、响、约、支、 30 | 般、史、感、劳、便、团、往、酸、历、市、克、何、除、消、构、府、称、太、准、精、 31 | 值、号、率、族、维、划、选、标、写、存、候、毛、亲、快、效、斯、院、查、江、型、 32 | 眼、王、按、格、养、易、置、派、层、片、始、却、专、状、育、厂、京、识、适、属、 33 | 圆、包、火、住、调、满、县、局、照、参、红、细、引、听、该、铁、价、严、龙、飞 34 | `, "、", "", -1), "\n", "", -1) 35 | 36 | func benchmarkPinyin(b *testing.B, s string, args Args) { 37 | b.StopTimer() 38 | b.StartTimer() 39 | 40 | for i := 0; i < b.N; i++ { 41 | Pinyin(s, args) 42 | } 43 | } 44 | 45 | func benchmarkLazyPinyin(b *testing.B, s string, args Args) { 46 | b.StopTimer() 47 | b.StartTimer() 48 | 49 | for i := 0; i < b.N; i++ { 50 | LazyPinyin(s, args) 51 | } 52 | } 53 | 54 | func BenchmarkPinyinOne(b *testing.B) { 55 | args := NewArgs() 56 | benchmarkPinyin(b, "中", args) 57 | } 58 | 59 | func BenchmarkPinyin500(b *testing.B) { 60 | args := NewArgs() 61 | benchmarkPinyin(b, hans500, args) 62 | } 63 | 64 | func BenchmarkLazyPinyinOne(b *testing.B) { 65 | args := NewArgs() 66 | benchmarkLazyPinyin(b, "中", args) 67 | } 68 | 69 | func BenchmarkLazyPinyin500(b *testing.B) { 70 | args := NewArgs() 71 | benchmarkLazyPinyin(b, hans500, args) 72 | } 73 | -------------------------------------------------------------------------------- /cli/pinyin/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/mozillazg/go-pinyin/cli/pinyin 2 | 3 | go 1.17 4 | 5 | require ( 6 | github.com/mattn/go-isatty v0.0.18 7 | github.com/mozillazg/go-pinyin v0.19.0 8 | ) 9 | 10 | require golang.org/x/sys v0.6.0 // indirect 11 | -------------------------------------------------------------------------------- /cli/pinyin/go.sum: -------------------------------------------------------------------------------- 1 | github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp98= 2 | github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= 3 | github.com/mozillazg/go-pinyin v0.19.0 h1:p+J8/kjJ558KPvVGYLvqBhxf8jbZA2exSLCs2uUVN8c= 4 | github.com/mozillazg/go-pinyin v0.19.0/go.mod h1:iR4EnMMRXkfpFVV5FMi4FNB6wGq9NV6uDWbUuPhP4Yc= 5 | golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ= 6 | golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 7 | -------------------------------------------------------------------------------- /cli/pinyin/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "io/ioutil" 7 | "os" 8 | "strings" 9 | 10 | "github.com/mattn/go-isatty" 11 | "github.com/mozillazg/go-pinyin" 12 | ) 13 | 14 | func main() { 15 | heteronym := flag.Bool("e", false, "启用多音字模式") 16 | style := flag.String("s", "zh4ao", "指定拼音风格。可选值:zhao, zh4ao, zha4o, zhao4, zh, z, ao, 4ao, a4o, ao4") 17 | flag.Parse() 18 | hans := flag.Args() 19 | stdin := []byte{} 20 | if !isatty.IsTerminal(os.Stdin.Fd()) { 21 | stdin, _ = ioutil.ReadAll(os.Stdin) 22 | } 23 | if len(stdin) > 0 { 24 | hans = append(hans, string(stdin)) 25 | } 26 | 27 | if len(hans) == 0 { 28 | fmt.Fprintln(os.Stderr, "请至少输入一个汉字: pinyin [-e] [-s STYLE] HANS [HANS ...]") 29 | os.Exit(1) 30 | } 31 | 32 | args := pinyin.NewArgs() 33 | if *heteronym { 34 | args.Heteronym = true 35 | } 36 | 37 | styleValues := map[string]int{ 38 | "zhao": pinyin.Normal, 39 | "zh4ao": pinyin.Tone, 40 | "zha4o": pinyin.Tone2, 41 | "zhao4": pinyin.Tone3, 42 | "zh": pinyin.Initials, 43 | "z": pinyin.FirstLetter, 44 | "ao": pinyin.Finals, 45 | "4ao": pinyin.FinalsTone, 46 | "a4o": pinyin.FinalsTone2, 47 | "ao4": pinyin.FinalsTone3, 48 | } 49 | if value, ok := styleValues[*style]; !ok { 50 | fmt.Fprintf(os.Stderr, "无效的拼音风格:%s\n", *style) 51 | os.Exit(1) 52 | } else { 53 | args.Style = value 54 | } 55 | 56 | pys := pinyin.Pinyin(strings.Join(hans, ""), args) 57 | for _, s := range pys { 58 | fmt.Print(strings.Join(s, ","), " ") 59 | } 60 | if len(pys) > 0 { 61 | fmt.Println() 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | Package pinyin : 汉语拼音转换工具. 3 | 4 | Usage 5 | 6 | package main 7 | 8 | import ( 9 | "fmt" 10 | "github.com/mozillazg/go-pinyin" 11 | ) 12 | 13 | func main() { 14 | hans := "中国人" 15 | // 默认 16 | a := pinyin.NewArgs() 17 | fmt.Println(pinyin.Pinyin(hans, a)) 18 | // [[zhong] [guo] [ren]] 19 | 20 | // 包含声调 21 | a.Style = pinyin.Tone 22 | fmt.Println(pinyin.Pinyin(hans, a)) 23 | // [[zhōng] [guó] [rén]] 24 | 25 | // 声调用数字表示 26 | a.Style = pinyin.Tone2 27 | fmt.Println(pinyin.Pinyin(hans, a)) 28 | // [[zho1ng] [guo2] [re2n]] 29 | 30 | // 开启多音字模式 31 | a = pinyin.NewArgs() 32 | a.Heteronym = true 33 | fmt.Println(pinyin.Pinyin(hans, a)) 34 | // [[zhong zhong] [guo] [ren]] 35 | a.Style = pinyin.Tone2 36 | fmt.Println(pinyin.Pinyin(hans, a)) 37 | // [[zho1ng zho4ng] [guo2] [re2n]] 38 | } 39 | */ 40 | package pinyin 41 | -------------------------------------------------------------------------------- /example_test.go: -------------------------------------------------------------------------------- 1 | package pinyin_test 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/mozillazg/go-pinyin" 7 | ) 8 | 9 | func ExampleConvert() { 10 | hans := "中国人" 11 | fmt.Println("default:", pinyin.Convert(hans, nil)) 12 | // Output: default: [[zhong] [guo] [ren]] 13 | } 14 | 15 | func ExamplePinyin_default() { 16 | hans := "中国人" 17 | a := pinyin.NewArgs() 18 | fmt.Println("default:", pinyin.Pinyin(hans, a)) 19 | // Output: default: [[zhong] [guo] [ren]] 20 | } 21 | 22 | func ExamplePinyin_normal() { 23 | hans := "中国人" 24 | a := pinyin.NewArgs() 25 | a.Style = pinyin.Normal 26 | fmt.Println("Normal:", pinyin.Pinyin(hans, a)) 27 | // Output: Normal: [[zhong] [guo] [ren]] 28 | } 29 | 30 | func ExamplePinyin_tone() { 31 | hans := "中国人" 32 | a := pinyin.NewArgs() 33 | a.Style = pinyin.Tone 34 | fmt.Println("Tone:", pinyin.Pinyin(hans, a)) 35 | // Output: Tone: [[zhōng] [guó] [rén]] 36 | } 37 | 38 | func ExamplePinyin_tone2() { 39 | hans := "中国人" 40 | a := pinyin.NewArgs() 41 | a.Style = pinyin.Tone2 42 | fmt.Println("Tone2:", pinyin.Pinyin(hans, a)) 43 | // Output: Tone2: [[zho1ng] [guo2] [re2n]] 44 | } 45 | 46 | func ExamplePinyin_initials() { 47 | hans := "中国人" 48 | a := pinyin.NewArgs() 49 | a.Style = pinyin.Initials 50 | fmt.Println("Initials:", pinyin.Pinyin(hans, a)) 51 | // Output: Initials: [[zh] [g] [r]] 52 | } 53 | 54 | func ExamplePinyin_firstLetter() { 55 | hans := "中国人" 56 | a := pinyin.NewArgs() 57 | a.Style = pinyin.FirstLetter 58 | fmt.Println(pinyin.Pinyin(hans, a)) 59 | // Output: [[z] [g] [r]] 60 | } 61 | 62 | func ExamplePinyin_finals() { 63 | hans := "中国人" 64 | a := pinyin.NewArgs() 65 | a.Style = pinyin.Finals 66 | fmt.Println(pinyin.Pinyin(hans, a)) 67 | // Output: [[ong] [uo] [en]] 68 | } 69 | 70 | func ExamplePinyin_finalsTone() { 71 | hans := "中国人" 72 | a := pinyin.NewArgs() 73 | a.Style = pinyin.FinalsTone 74 | fmt.Println(pinyin.Pinyin(hans, a)) 75 | // Output: [[ōng] [uó] [én]] 76 | } 77 | 78 | func ExamplePinyin_finalsTone2() { 79 | hans := "中国人" 80 | a := pinyin.NewArgs() 81 | a.Style = pinyin.FinalsTone2 82 | fmt.Println(pinyin.Pinyin(hans, a)) 83 | // Output: [[o1ng] [uo2] [e2n]] 84 | } 85 | 86 | func ExamplePinyin_heteronym() { 87 | hans := "中国人" 88 | a := pinyin.NewArgs() 89 | a.Heteronym = true 90 | a.Style = pinyin.Tone2 91 | fmt.Println(pinyin.Pinyin(hans, a)) 92 | // Output: [[zho1ng zho4ng] [guo2] [re2n]] 93 | } 94 | 95 | func ExamplePinyin_fallbackCustom1() { 96 | hans := "中国人abc" 97 | a := pinyin.NewArgs() 98 | a.Fallback = func(r rune, a pinyin.Args) []string { 99 | return []string{string(r)} 100 | } 101 | fmt.Println(pinyin.Pinyin(hans, a)) 102 | // Output: [[zhong] [guo] [ren] [a] [b] [c]] 103 | } 104 | 105 | func ExamplePinyin_fallbackCustom2() { 106 | hans := "中国人アイウ" 107 | a := pinyin.NewArgs() 108 | a.Fallback = func(r rune, a pinyin.Args) []string { 109 | data := map[rune][]string{ 110 | 'ア': {"a"}, 111 | 'イ': {"i"}, 112 | 'ウ': {"u"}, 113 | } 114 | s, ok := data[r] 115 | if ok { 116 | return s 117 | } else { 118 | return []string{} 119 | } 120 | } 121 | fmt.Println(pinyin.Pinyin(hans, a)) 122 | // Output: [[zhong] [guo] [ren] [a] [i] [u]] 123 | } 124 | 125 | func ExampleLazyPinyin() { 126 | hans := "中国人" 127 | a := pinyin.NewArgs() 128 | fmt.Println(pinyin.LazyPinyin(hans, a)) 129 | // Output: [zhong guo ren] 130 | } 131 | 132 | func ExampleSlug() { 133 | hans := "中国人" 134 | a := pinyin.NewArgs() 135 | fmt.Println(pinyin.Slug(hans, a)) 136 | // Output: zhong-guo-ren 137 | } 138 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/mozillazg/go-pinyin 2 | 3 | go 1.11 4 | -------------------------------------------------------------------------------- /phonetic_symbol.go: -------------------------------------------------------------------------------- 1 | package pinyin 2 | 3 | // 带音标字符。 4 | var phoneticSymbol = map[string]string{ 5 | "ā": "a1", 6 | "á": "a2", 7 | "ǎ": "a3", 8 | "à": "a4", 9 | "ē": "e1", 10 | "é": "e2", 11 | "ě": "e3", 12 | "è": "e4", 13 | "ō": "o1", 14 | "ó": "o2", 15 | "ǒ": "o3", 16 | "ò": "o4", 17 | "ī": "i1", 18 | "í": "i2", 19 | "ǐ": "i3", 20 | "ì": "i4", 21 | "ū": "u1", 22 | "ú": "u2", 23 | "ǔ": "u3", 24 | "ù": "u4", 25 | "ü": "v", 26 | "ǘ": "v2", 27 | "ǚ": "v3", 28 | "ǜ": "v4", 29 | "ń": "n2", 30 | "ň": "n3", 31 | "ǹ": "n4", 32 | "ḿ": "m2", 33 | } 34 | -------------------------------------------------------------------------------- /pinyin.go: -------------------------------------------------------------------------------- 1 | package pinyin 2 | 3 | import ( 4 | "regexp" 5 | "strings" 6 | ) 7 | 8 | // Meta 9 | const ( 10 | Version = "0.20.0" 11 | Author = "mozillazg, 闲耘" 12 | License = "MIT" 13 | Copyright = "Copyright (c) 2016 mozillazg, 闲耘" 14 | ) 15 | 16 | // 拼音风格(推荐) 17 | const ( 18 | Normal = 0 // 普通风格,不带声调(默认风格)。如: zhong guo 19 | Tone = 1 // 声调风格1,拼音声调在韵母第一个字母上。如: zhōng guó 20 | Tone2 = 2 // 声调风格2,即拼音声调在各个韵母之后,用数字 [1-4] 进行表示。如: zho1ng guo2 21 | Tone3 = 8 // 声调风格3,即拼音声调在各个拼音之后,用数字 [1-4] 进行表示。如: zhong1 guo2 22 | Initials = 3 // 声母风格,只返回各个拼音的声母部分。如: zh g 。注意:不是所有的拼音都有声母 23 | FirstLetter = 4 // 首字母风格,只返回拼音的首字母部分。如: z g 24 | Finals = 5 // 韵母风格,只返回各个拼音的韵母部分,不带声调。如: ong uo 25 | FinalsTone = 6 // 韵母风格1,带声调,声调在韵母第一个字母上。如: ōng uó 26 | FinalsTone2 = 7 // 韵母风格2,带声调,声调在各个韵母之后,用数字 [1-4] 进行表示。如: o1ng uo2 27 | FinalsTone3 = 9 // 韵母风格3,带声调,声调在各个拼音之后,用数字 [1-4] 进行表示。如: ong1 uo2 28 | ) 29 | 30 | // 拼音风格(兼容之前的版本) 31 | const ( 32 | NORMAL = Normal 33 | TONE = Tone 34 | TONE2 = Tone2 35 | INITIALS = Initials 36 | FIRST_LETTER = FirstLetter 37 | FINALS = Finals 38 | FINALS_TONE = FinalsTone 39 | FINALS_TONE2 = FinalsTone2 40 | ) 41 | 42 | // 声母表 43 | var initialArray = strings.Split( 44 | "b,p,m,f,d,t,n,l,g,k,h,j,q,x,r,zh,ch,sh,z,c,s", 45 | ",", 46 | ) 47 | 48 | // 所有带声调的字符 49 | var rePhoneticSymbolSource = func(m map[string]string) string { 50 | s := "" 51 | for k := range m { 52 | s = s + k 53 | } 54 | return s 55 | }(phoneticSymbol) 56 | 57 | // 匹配带声调字符的正则表达式 58 | var rePhoneticSymbol = regexp.MustCompile("[" + rePhoneticSymbolSource + "]") 59 | 60 | // 匹配使用数字标识声调的字符的正则表达式 61 | var reTone2 = regexp.MustCompile("([aeoiuvnm])([1-4])$") 62 | 63 | // 匹配 Tone2 中标识韵母声调的正则表达式 64 | var reTone3 = regexp.MustCompile("^([a-z]+)([1-4])([a-z]*)$") 65 | 66 | // Args 配置信息 67 | type Args struct { 68 | Style int // 拼音风格(默认: Normal) 69 | Heteronym bool // 是否启用多音字模式(默认:禁用) 70 | Separator string // Slug 中使用的分隔符(默认:-) 71 | 72 | // 处理没有拼音的字符(默认忽略没有拼音的字符) 73 | // 函数返回的 slice 的长度为0 则表示忽略这个字符 74 | Fallback func(r rune, a Args) []string 75 | } 76 | 77 | // Style 默认配置:风格 78 | var Style = Normal 79 | 80 | // Heteronym 默认配置:是否启用多音字模式 81 | var Heteronym = false 82 | 83 | // Separator 默认配置: `Slug` 中 Join 所用的分隔符 84 | var Separator = "-" 85 | 86 | // Fallback 默认配置: 如何处理没有拼音的字符(忽略这个字符) 87 | var Fallback = func(r rune, a Args) []string { 88 | return []string{} 89 | } 90 | 91 | var finalExceptionsMap = map[string]string{ 92 | "ū": "ǖ", 93 | "ú": "ǘ", 94 | "ǔ": "ǚ", 95 | "ù": "ǜ", 96 | } 97 | var reFinalExceptions = regexp.MustCompile("^(j|q|x)(ū|ú|ǔ|ù)$") 98 | var reFinal2Exceptions = regexp.MustCompile("^(j|q|x)u(\\d?)$") 99 | 100 | // NewArgs 返回包含默认配置的 `Args` 101 | func NewArgs() Args { 102 | return Args{Style, Heteronym, Separator, Fallback} 103 | } 104 | 105 | // 获取单个拼音中的声母 106 | func initial(p string) string { 107 | s := "" 108 | for _, v := range initialArray { 109 | if strings.HasPrefix(p, v) { 110 | s = v 111 | break 112 | } 113 | } 114 | return s 115 | } 116 | 117 | // 获取单个拼音中的韵母 118 | func final(p string) string { 119 | n := initial(p) 120 | if n == "" { 121 | return handleYW(p) 122 | } 123 | 124 | // 特例 j/q/x 125 | matches := reFinalExceptions.FindStringSubmatch(p) 126 | // jū -> jǖ 127 | if len(matches) == 3 && matches[1] != "" && matches[2] != "" { 128 | v, _ := finalExceptionsMap[matches[2]] 129 | return v 130 | } 131 | // ju -> jv, ju1 -> jv1 132 | p = reFinal2Exceptions.ReplaceAllString(p, "${1}v$2") 133 | return strings.Join(strings.SplitN(p, n, 2), "") 134 | } 135 | 136 | // 处理 y, w 137 | func handleYW(p string) string { 138 | // 特例 y/w 139 | if strings.HasPrefix(p, "yu") { 140 | p = "v" + p[2:] // yu -> v 141 | } else if strings.HasPrefix(p, "yi") { 142 | p = p[1:] // yi -> i 143 | } else if strings.HasPrefix(p, "y") { 144 | p = "i" + p[1:] // y -> i 145 | } else if strings.HasPrefix(p, "wu") { 146 | p = p[1:] // wu -> u 147 | } else if strings.HasPrefix(p, "w") { 148 | p = "u" + p[1:] // w -> u 149 | } 150 | return p 151 | } 152 | 153 | func toFixed(p string, a Args) string { 154 | if a.Style == Initials { 155 | return initial(p) 156 | } 157 | origP := p 158 | 159 | // 替换拼音中的带声调字符 160 | py := rePhoneticSymbol.ReplaceAllStringFunc(p, func(m string) string { 161 | symbol, _ := phoneticSymbol[m] 162 | switch a.Style { 163 | // 不包含声调 164 | case Normal, FirstLetter, Finals: 165 | // 去掉声调: a1 -> a 166 | m = reTone2.ReplaceAllString(symbol, "$1") 167 | case Tone2, FinalsTone2, Tone3, FinalsTone3: 168 | // 返回使用数字标识声调的字符 169 | m = symbol 170 | default: 171 | // 声调在头上 172 | } 173 | return m 174 | }) 175 | 176 | switch a.Style { 177 | // 将声调移动到最后 178 | case Tone3, FinalsTone3: 179 | py = reTone3.ReplaceAllString(py, "$1$3$2") 180 | } 181 | switch a.Style { 182 | // 首字母 183 | case FirstLetter: 184 | py = string([]rune(py)[0]) 185 | // 韵母 186 | case Finals, FinalsTone, FinalsTone2, FinalsTone3: 187 | // 转换为 []rune unicode 编码用于获取第一个拼音字符 188 | // 因为 string 是 utf-8 编码不方便获取第一个拼音字符 189 | rs := []rune(origP) 190 | switch string(rs[0]) { 191 | // 因为鼻音没有声母所以不需要去掉声母部分 192 | case "ḿ", "ń", "ň", "ǹ": 193 | default: 194 | py = final(py) 195 | } 196 | } 197 | return py 198 | } 199 | 200 | func applyStyle(p []string, a Args) []string { 201 | newP := []string{} 202 | for _, v := range p { 203 | newP = append(newP, toFixed(v, a)) 204 | } 205 | return newP 206 | } 207 | 208 | // SinglePinyin 把单个 `rune` 类型的汉字转换为拼音. 209 | func SinglePinyin(r rune, a Args) []string { 210 | if a.Fallback == nil { 211 | a.Fallback = Fallback 212 | } 213 | value, ok := PinyinDict[int(r)] 214 | pys := []string{} 215 | if ok { 216 | pys = strings.Split(value, ",") 217 | } else { 218 | pys = a.Fallback(r, a) 219 | } 220 | if len(pys) > 0 { 221 | if !a.Heteronym { 222 | pys = []string{pys[0]} 223 | } 224 | return applyStyle(pys, a) 225 | } 226 | return pys 227 | } 228 | 229 | // Pinyin 汉字转拼音,支持多音字模式. 230 | func Pinyin(s string, a Args) [][]string { 231 | pys := [][]string{} 232 | for _, r := range s { 233 | py := SinglePinyin(r, a) 234 | if len(py) > 0 { 235 | pys = append(pys, py) 236 | } 237 | } 238 | return pys 239 | } 240 | 241 | // LazyPinyin 汉字转拼音,与 `Pinyin` 的区别是: 242 | // 返回值类型不同,并且不支持多音字模式,每个汉字只取第一个音. 243 | func LazyPinyin(s string, a Args) []string { 244 | a.Heteronym = false 245 | pys := []string{} 246 | for _, v := range Pinyin(s, a) { 247 | pys = append(pys, v[0]) 248 | } 249 | return pys 250 | } 251 | 252 | // Slug join `LazyPinyin` 的返回值. 253 | // 建议改用 https://github.com/mozillazg/go-slugify 254 | func Slug(s string, a Args) string { 255 | separator := a.Separator 256 | return strings.Join(LazyPinyin(s, a), separator) 257 | } 258 | 259 | // Convert 跟 Pinyin 的唯一区别就是 a 参数可以是 nil 260 | func Convert(s string, a *Args) [][]string { 261 | if a == nil { 262 | args := NewArgs() 263 | a = &args 264 | } 265 | return Pinyin(s, *a) 266 | } 267 | 268 | // LazyConvert 跟 LazyPinyin 的唯一区别就是 a 参数可以是 nil 269 | func LazyConvert(s string, a *Args) []string { 270 | if a == nil { 271 | args := NewArgs() 272 | a = &args 273 | } 274 | return LazyPinyin(s, *a) 275 | } 276 | -------------------------------------------------------------------------------- /pinyin_test.go: -------------------------------------------------------------------------------- 1 | package pinyin 2 | 3 | import ( 4 | "reflect" 5 | "testing" 6 | ) 7 | 8 | type pinyinFunc func(string, Args) [][]string 9 | type testCase struct { 10 | args Args 11 | result [][]string 12 | } 13 | 14 | func testPinyin(t *testing.T, s string, d []testCase, f pinyinFunc) { 15 | for _, tc := range d { 16 | v := f(s, tc.args) 17 | if !reflect.DeepEqual(v, tc.result) { 18 | t.Errorf("Expected %s, got %s", tc.result, v) 19 | } 20 | } 21 | } 22 | 23 | func TestPinyin(t *testing.T) { 24 | hans := "中国人" 25 | testData := []testCase{ 26 | // default 27 | { 28 | Args{Style: Normal}, 29 | [][]string{ 30 | {"zhong"}, 31 | {"guo"}, 32 | {"ren"}, 33 | }, 34 | }, 35 | // default 36 | { 37 | NewArgs(), 38 | [][]string{ 39 | {"zhong"}, 40 | {"guo"}, 41 | {"ren"}, 42 | }, 43 | }, 44 | // Normal 45 | { 46 | Args{Style: Normal}, 47 | [][]string{ 48 | {"zhong"}, 49 | {"guo"}, 50 | {"ren"}, 51 | }, 52 | }, 53 | // Tone 54 | { 55 | Args{Style: Tone}, 56 | [][]string{ 57 | {"zhōng"}, 58 | {"guó"}, 59 | {"rén"}, 60 | }, 61 | }, 62 | // Tone2 63 | { 64 | Args{Style: Tone2}, 65 | [][]string{ 66 | {"zho1ng"}, 67 | {"guo2"}, 68 | {"re2n"}, 69 | }, 70 | }, 71 | // Tone3 72 | { 73 | Args{Style: Tone3}, 74 | [][]string{ 75 | {"zhong1"}, 76 | {"guo2"}, 77 | {"ren2"}, 78 | }, 79 | }, 80 | // Initials 81 | { 82 | Args{Style: Initials}, 83 | [][]string{ 84 | {"zh"}, 85 | {"g"}, 86 | {"r"}, 87 | }, 88 | }, 89 | // FirstLetter 90 | { 91 | Args{Style: FirstLetter}, 92 | [][]string{ 93 | {"z"}, 94 | {"g"}, 95 | {"r"}, 96 | }, 97 | }, 98 | // Finals 99 | { 100 | Args{Style: Finals}, 101 | [][]string{ 102 | {"ong"}, 103 | {"uo"}, 104 | {"en"}, 105 | }, 106 | }, 107 | // FinalsTone 108 | { 109 | Args{Style: FinalsTone}, 110 | [][]string{ 111 | {"ōng"}, 112 | {"uó"}, 113 | {"én"}, 114 | }, 115 | }, 116 | // FinalsTone2 117 | { 118 | Args{Style: FinalsTone2}, 119 | [][]string{ 120 | {"o1ng"}, 121 | {"uo2"}, 122 | {"e2n"}, 123 | }, 124 | }, 125 | // FinalsTone3 126 | { 127 | Args{Style: FinalsTone3}, 128 | [][]string{ 129 | {"ong1"}, 130 | {"uo2"}, 131 | {"en2"}, 132 | }, 133 | }, 134 | // Heteronym 135 | { 136 | Args{Heteronym: true}, 137 | [][]string{ 138 | {"zhong", "zhong"}, 139 | {"guo"}, 140 | {"ren"}, 141 | }, 142 | }, 143 | } 144 | 145 | testPinyin(t, hans, testData, Pinyin) 146 | 147 | // 测试不是多音字的 Heteronym 148 | hans = "你" 149 | testData = []testCase{ 150 | { 151 | Args{}, 152 | [][]string{ 153 | {"ni"}, 154 | }, 155 | }, 156 | { 157 | Args{Heteronym: true}, 158 | [][]string{ 159 | {"ni"}, 160 | }, 161 | }, 162 | } 163 | testPinyin(t, hans, testData, Pinyin) 164 | } 165 | 166 | func TestNoneHans(t *testing.T) { 167 | s := "abc" 168 | v := Pinyin(s, NewArgs()) 169 | value := [][]string{} 170 | if !reflect.DeepEqual(v, value) { 171 | t.Errorf("Expected %s, got %s", value, v) 172 | } 173 | } 174 | 175 | func TestLazyPinyin(t *testing.T) { 176 | s := "中国人" 177 | v := LazyPinyin(s, Args{}) 178 | value := []string{"zhong", "guo", "ren"} 179 | if !reflect.DeepEqual(v, value) { 180 | t.Errorf("Expected %s, got %s", value, v) 181 | } 182 | 183 | s = "中国人abc" 184 | v = LazyPinyin(s, Args{}) 185 | value = []string{"zhong", "guo", "ren"} 186 | if !reflect.DeepEqual(v, value) { 187 | t.Errorf("Expected %s, got %s", value, v) 188 | } 189 | } 190 | 191 | func TestSlug(t *testing.T) { 192 | s := "中国人" 193 | v := Slug(s, Args{}) 194 | value := "zhongguoren" 195 | if v != value { 196 | t.Errorf("Expected %s, got %s", value, v) 197 | } 198 | 199 | v = Slug(s, Args{Separator: ","}) 200 | value = "zhong,guo,ren" 201 | if v != value { 202 | t.Errorf("Expected %s, got %s", value, v) 203 | } 204 | 205 | a := NewArgs() 206 | v = Slug(s, a) 207 | value = "zhong-guo-ren" 208 | if v != value { 209 | t.Errorf("Expected %s, got %s", value, v) 210 | } 211 | 212 | s = "中国人abc,,中" 213 | v = Slug(s, a) 214 | value = "zhong-guo-ren-zhong" 215 | if v != value { 216 | t.Errorf("Expected %s, got %s", value, v) 217 | } 218 | } 219 | 220 | func TestFinal(t *testing.T) { 221 | value := "an" 222 | v := final("an") 223 | if v != value { 224 | t.Errorf("Expected %s, got %s", value, v) 225 | } 226 | } 227 | 228 | func TestFallback(t *testing.T) { 229 | hans := "中国人abc" 230 | testData := []testCase{ 231 | // default 232 | { 233 | NewArgs(), 234 | [][]string{ 235 | {"zhong"}, 236 | {"guo"}, 237 | {"ren"}, 238 | }, 239 | }, 240 | // custom 241 | { 242 | Args{ 243 | Fallback: func(r rune, a Args) []string { 244 | return []string{"la"} 245 | }, 246 | }, 247 | [][]string{ 248 | {"zhong"}, 249 | {"guo"}, 250 | {"ren"}, 251 | {"la"}, 252 | {"la"}, 253 | {"la"}, 254 | }, 255 | }, 256 | // custom 257 | { 258 | Args{ 259 | Heteronym: true, 260 | Fallback: func(r rune, a Args) []string { 261 | return []string{"la", "wo"} 262 | }, 263 | }, 264 | [][]string{ 265 | {"zhong", "zhong"}, 266 | {"guo"}, 267 | {"ren"}, 268 | {"la", "wo"}, 269 | {"la", "wo"}, 270 | {"la", "wo"}, 271 | }, 272 | }, 273 | } 274 | testPinyin(t, hans, testData, Pinyin) 275 | } 276 | 277 | type testItem struct { 278 | hans string 279 | args Args 280 | result [][]string 281 | } 282 | 283 | func testPinyinUpdate(t *testing.T, d []testItem, f pinyinFunc) { 284 | for _, tc := range d { 285 | v := f(tc.hans, tc.args) 286 | if !reflect.DeepEqual(v, tc.result) { 287 | t.Errorf("Expected %s, got %s", tc.result, v) 288 | } 289 | } 290 | } 291 | 292 | func TestUpdated(t *testing.T) { 293 | testData := []testItem{ 294 | // 误把 yu 放到声母列表了 295 | {"鱼", Args{Style: Tone2}, [][]string{{"yu2"}}}, 296 | {"鱼", Args{Style: Tone3}, [][]string{{"yu2"}}}, 297 | {"鱼", Args{Style: Finals}, [][]string{{"v"}}}, 298 | {"雨", Args{Style: Tone2}, [][]string{{"yu3"}}}, 299 | {"雨", Args{Style: Tone3}, [][]string{{"yu3"}}}, 300 | {"雨", Args{Style: Finals}, [][]string{{"v"}}}, 301 | {"元", Args{Style: Tone2}, [][]string{{"yua2n"}}}, 302 | {"元", Args{Style: Tone3}, [][]string{{"yuan2"}}}, 303 | {"元", Args{Style: Finals}, [][]string{{"van"}}}, 304 | // y, w 也不是拼音, yu的韵母是v, yi的韵母是i, wu的韵母是u 305 | {"呀", Args{Style: Initials}, [][]string{{""}}}, 306 | {"呀", Args{Style: Tone2}, [][]string{{"ya"}}}, 307 | {"呀", Args{Style: Tone3}, [][]string{{"ya"}}}, 308 | {"呀", Args{Style: Finals}, [][]string{{"ia"}}}, 309 | {"无", Args{Style: Initials}, [][]string{{""}}}, 310 | {"无", Args{Style: Tone2}, [][]string{{"wu2"}}}, 311 | {"无", Args{Style: Tone3}, [][]string{{"wu2"}}}, 312 | {"无", Args{Style: Finals}, [][]string{{"u"}}}, 313 | {"衣", Args{Style: Tone2}, [][]string{{"yi1"}}}, 314 | {"衣", Args{Style: Tone3}, [][]string{{"yi1"}}}, 315 | {"衣", Args{Style: Finals}, [][]string{{"i"}}}, 316 | {"万", Args{Style: Tone2}, [][]string{{"wa4n"}}}, 317 | {"万", Args{Style: Tone3}, [][]string{{"wan4"}}}, 318 | {"万", Args{Style: Finals}, [][]string{{"uan"}}}, 319 | // ju, qu, xu 的韵母应该是 v 320 | {"具", Args{Style: FinalsTone}, [][]string{{"ǜ"}}}, 321 | {"具", Args{Style: FinalsTone2}, [][]string{{"v4"}}}, 322 | {"具", Args{Style: FinalsTone3}, [][]string{{"v4"}}}, 323 | {"具", Args{Style: Finals}, [][]string{{"v"}}}, 324 | {"取", Args{Style: FinalsTone}, [][]string{{"ǚ"}}}, 325 | {"取", Args{Style: FinalsTone2}, [][]string{{"v3"}}}, 326 | {"取", Args{Style: FinalsTone3}, [][]string{{"v3"}}}, 327 | {"取", Args{Style: Finals}, [][]string{{"v"}}}, 328 | {"徐", Args{Style: FinalsTone}, [][]string{{"ǘ"}}}, 329 | {"徐", Args{Style: FinalsTone2}, [][]string{{"v2"}}}, 330 | {"徐", Args{Style: FinalsTone3}, [][]string{{"v2"}}}, 331 | {"徐", Args{Style: Finals}, [][]string{{"v"}}}, 332 | // # ń 333 | {"嗯", Args{Style: Normal}, [][]string{{"n"}}}, 334 | {"嗯", Args{Style: Tone}, [][]string{{"ń"}}}, 335 | {"嗯", Args{Style: Tone2}, [][]string{{"n2"}}}, 336 | {"嗯", Args{Style: Tone3}, [][]string{{"n2"}}}, 337 | {"嗯", Args{Style: Initials}, [][]string{{""}}}, 338 | {"嗯", Args{Style: FirstLetter}, [][]string{{"n"}}}, 339 | {"嗯", Args{Style: Finals}, [][]string{{"n"}}}, 340 | {"嗯", Args{Style: FinalsTone}, [][]string{{"ń"}}}, 341 | {"嗯", Args{Style: FinalsTone2}, [][]string{{"n2"}}}, 342 | {"嗯", Args{Style: FinalsTone3}, [][]string{{"n2"}}}, 343 | // # ḿ \u1e3f U+1E3F 344 | {"呣", Args{Style: Normal}, [][]string{{"m"}}}, 345 | {"呣", Args{Style: Tone}, [][]string{{"ḿ"}}}, 346 | {"呣", Args{Style: Tone2}, [][]string{{"m2"}}}, 347 | {"呣", Args{Style: Tone3}, [][]string{{"m2"}}}, 348 | {"呣", Args{Style: Initials}, [][]string{{""}}}, 349 | {"呣", Args{Style: FirstLetter}, [][]string{{"m"}}}, 350 | {"呣", Args{Style: Finals}, [][]string{{"m"}}}, 351 | {"呣", Args{Style: FinalsTone}, [][]string{{"ḿ"}}}, 352 | {"呣", Args{Style: FinalsTone2}, [][]string{{"m2"}}}, 353 | {"呣", Args{Style: FinalsTone3}, [][]string{{"m2"}}}, 354 | // 去除 0 355 | {"啊", Args{Style: Tone2}, [][]string{{"a"}}}, 356 | {"啊", Args{Style: Tone3}, [][]string{{"a"}}}, 357 | {"侵略", Args{Style: Tone2}, [][]string{{"qi1n"}, {"lve4"}}}, 358 | {"侵略", Args{Style: FinalsTone2}, [][]string{{"i1n"}, {"ve4"}}}, 359 | {"侵略", Args{Style: FinalsTone3}, [][]string{{"in1"}, {"ve4"}}}, 360 | } 361 | testPinyinUpdate(t, testData, Pinyin) 362 | } 363 | 364 | func TestConvert(t *testing.T) { 365 | s := "中国人" 366 | v := Convert(s, nil) 367 | value := [][]string{{"zhong"}, {"guo"}, {"ren"}} 368 | if !reflect.DeepEqual(v, value) { 369 | t.Errorf("Expected %s, got %s", value, v) 370 | } 371 | 372 | a := NewArgs() 373 | v = Convert(s, &a) 374 | if !reflect.DeepEqual(v, value) { 375 | t.Errorf("Expected %s, got %s", value, v) 376 | } 377 | } 378 | 379 | func TestLazyConvert(t *testing.T) { 380 | s := "中国人" 381 | v := LazyConvert(s, nil) 382 | value := []string{"zhong", "guo", "ren"} 383 | if !reflect.DeepEqual(v, value) { 384 | t.Errorf("Expected %s, got %s", value, v) 385 | } 386 | 387 | a := NewArgs() 388 | v = LazyConvert(s, &a) 389 | if !reflect.DeepEqual(v, value) { 390 | t.Errorf("Expected %s, got %s", value, v) 391 | } 392 | } 393 | 394 | func TestPinyin_fallback_issue_35(t *testing.T) { 395 | a := NewArgs() 396 | a.Separator = "" 397 | a.Style = FirstLetter 398 | a.Fallback = func(r rune, a Args) []string { 399 | return []string{string(r)} 400 | } 401 | var s = "重。,a庆" 402 | v := Pinyin(s, a) 403 | expect := [][]string{{"z"}, {"。"}, {","}, {"a"}, {"q"}} 404 | if !reflect.DeepEqual(v, expect) { 405 | t.Errorf("Expected %s, got %s", expect, v) 406 | } 407 | } 408 | --------------------------------------------------------------------------------