├── tests ├── pinyin │ ├── a.txt │ ├── a.txt-first_letter │ ├── a.txt-plain │ ├── a.txt-with_tone │ ├── a.txt-with_tone_num │ ├── a.txt-with_tone_num_end │ ├── c.txt │ ├── c.txt-first_letter │ ├── b.txt │ ├── c.txt-plain │ ├── c.txt-with_tone │ ├── c.txt-with_tone_num │ ├── c.txt-with_tone_num_end │ ├── b.txt-first_letter │ ├── b.txt-plain │ ├── b.txt-with_tone │ ├── b.txt-with_tone_num │ └── b.txt-with_tone_num_end ├── special_pinyin.rs ├── pinyin.rs └── compat.rs ├── .gitmodules ├── ci ├── install_extra.sh ├── coveralls.sh └── script.sh ├── .github ├── dependabot.yml └── workflows │ └── ci.yml ├── .bumpversion.cfg ├── coverage-check ├── Cargo.toml └── src │ └── main.rs ├── src ├── data.rs ├── lib.rs ├── compat.rs ├── pinyin.rs └── pinyin_multi.rs ├── DEVELOP.md ├── examples └── basic.rs ├── LICENSE ├── Cargo.toml ├── README.md ├── .gitignore └── CHANGELOG.md /tests/pinyin/a.txt: -------------------------------------------------------------------------------- 1 | 黛薄红深,约掠绿鬟云腻。 2 | 小鸳鸯,金翡翠,称人心。 3 | 锦鳞无处传幽意,海燕兰堂春又去。 4 | 隔年书,千点泪,恨难任。 5 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "pinyin-data"] 2 | path = pinyin-data 3 | url = https://github.com/mozillazg/pinyin-data.git 4 | -------------------------------------------------------------------------------- /tests/pinyin/a.txt-first_letter: -------------------------------------------------------------------------------- 1 | d,b,h,s,-,y,l,l,h,y,n,- 2 | x,y,y,-,j,f,c,-,c,r,x,- 3 | j,l,w,c,c,y,y,-,h,y,l,t,c,y,q,- 4 | g,n,s,-,q,d,l,-,h,n,r,- 5 | -------------------------------------------------------------------------------- /ci/install_extra.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -ex 4 | 5 | rustup component add rustfmt clippy 6 | rustfmt --version 7 | cargo clippy --version 8 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: cargo 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | time: "21:00" 8 | open-pull-requests-limit: 10 9 | -------------------------------------------------------------------------------- /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | commit = True 3 | tag = True 4 | current_version = 0.10.0 5 | 6 | [bumpversion:file:Cargo.toml] 7 | search = version = "{current_version}" 8 | replace = version = "{new_version}" 9 | 10 | -------------------------------------------------------------------------------- /tests/pinyin/a.txt-plain: -------------------------------------------------------------------------------- 1 | dai,bao,hong,shen,-,yue,lüe,lü,huan,yun,ni,- 2 | xiao,yuan,yang,-,jin,fei,cui,-,cheng,ren,xin,- 3 | jin,lin,wu,chu,chuan,you,yi,-,hai,yan,lan,tang,chun,you,qu,- 4 | ge,nian,shu,-,qian,dian,lei,-,hen,nan,ren,- 5 | -------------------------------------------------------------------------------- /tests/pinyin/a.txt-with_tone: -------------------------------------------------------------------------------- 1 | dài,báo,hóng,shēn,-,yuē,lüè,lǜ,huán,yún,nì,- 2 | xiǎo,yuān,yāng,-,jīn,fěi,cuì,-,chēng,rén,xīn,- 3 | jǐn,lín,wú,chù,chuán,yōu,yì,-,hǎi,yàn,lán,táng,chūn,yòu,qù,- 4 | gé,nián,shū,-,qiān,diǎn,lèi,-,hèn,nán,rèn,- 5 | -------------------------------------------------------------------------------- /tests/pinyin/a.txt-with_tone_num: -------------------------------------------------------------------------------- 1 | da4i,ba2o,ho2ng,she1n,-,yue1,lüe4,lü4,hua2n,yu2n,ni4,- 2 | xia3o,yua1n,ya1ng,-,ji1n,fe3i,cui4,-,che1ng,re2n,xi1n,- 3 | ji3n,li2n,wu2,chu4,chua2n,yo1u,yi4,-,ha3i,ya4n,la2n,ta2ng,chu1n,yo4u,qu4,- 4 | ge2,nia2n,shu1,-,qia1n,dia3n,le4i,-,he4n,na2n,re4n,- 5 | -------------------------------------------------------------------------------- /tests/pinyin/a.txt-with_tone_num_end: -------------------------------------------------------------------------------- 1 | dai4,bao2,hong2,shen1,-,yue1,lüe4,lü4,huan2,yun2,ni4,- 2 | xiao3,yuan1,yang1,-,jin1,fei3,cui4,-,cheng1,ren2,xin1,- 3 | jin3,lin2,wu2,chu4,chuan2,you1,yi4,-,hai3,yan4,lan2,tang2,chun1,you4,qu4,- 4 | ge2,nian2,shu1,-,qian1,dian3,lei4,-,hen4,nan2,ren4,- 5 | -------------------------------------------------------------------------------- /tests/pinyin/c.txt: -------------------------------------------------------------------------------- 1 | 黔无驴,有好事者,船载以入;至则无可用,放之山下。 2 | 虎见之,庞然大物也,以为神。 3 | 蔽林间窥之,稍出近之,慭慭然莫相知。 4 | 他日,驴一鸣,虎大骇远遁,以为且噬已也,甚恐! 5 | 然往来视之,觉无异能者,益习其声,又近出前后,终不敢搏。 6 | 稍近益狎,荡倚冲冒。 7 | 驴不胜怒,蹄之。 8 | 虎因喜,计之曰:“技止此耳!” 9 | 因跳踉大阚,断其喉,尽其肉,乃去。 10 | 11 | 噫!形之庞也类有德;声之宏也类有能。 12 | 向不出其技,虎虽猛,疑畏,卒不敢取,今若是焉,悲夫! 13 | -------------------------------------------------------------------------------- /coverage-check/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "coverage-check" 3 | version = "0.1.0" 4 | authors = ["Xidorn Quan "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | termcolor = "1.4.1" 9 | 10 | [dependencies.pinyin] 11 | path = ".." 12 | default-features = false 13 | features = ["with_tone"] 14 | -------------------------------------------------------------------------------- /ci/coveralls.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if [[ "$TRAVIS_RUST_VERSION" == stable ]]; then 4 | curl -sL https://github.com/xd009642/tarpaulin/releases/download/0.8.3/cargo-tarpaulin-0.8.3-travis.tar.gz | \ 5 | tar xvz -C $HOME/.cargo/bin 6 | cargo tarpaulin --ciserver travis-ci --coveralls $TRAVIS_JOB_ID \ 7 | --exclude-files 'tests/*' --exclude-files 'coverage-check/*' 8 | fi 9 | -------------------------------------------------------------------------------- /src/data.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::unreadable_literal)] 2 | 3 | use crate::{CharBlock, PinyinData}; 4 | 5 | pub(crate) static PINYIN_DATA: &[PinyinData] = 6 | include!(concat!(env!("OUT_DIR"), "/pinyin_data.rs")); 7 | 8 | #[cfg(feature = "heteronym")] 9 | pub(crate) static HETERONYM_TABLE: &[&[u16]] = 10 | include!(concat!(env!("OUT_DIR"), "/heteronym_table.rs")); 11 | 12 | pub(crate) static CHAR_BLOCKS: &[CharBlock] = include!(concat!(env!("OUT_DIR"), "/char_blocks.rs")); 13 | -------------------------------------------------------------------------------- /tests/pinyin/c.txt-first_letter: -------------------------------------------------------------------------------- 1 | q,w,l,-,y,h,s,z,-,c,z,y,r,-,z,z,w,k,y,-,f,z,s,x,- 2 | h,j,z,-,p,r,d,w,y,-,y,w,s,- 3 | b,l,j,k,z,-,s,c,j,z,-,y,y,r,m,x,z,- 4 | t,r,-,l,y,m,-,h,d,h,y,d,-,y,w,q,s,y,y,-,s,k,- 5 | r,w,l,s,z,-,j,w,y,n,z,-,y,x,q,s,-,y,j,c,q,h,-,z,b,g,b,- 6 | s,j,y,x,-,d,y,c,m,- 7 | l,b,s,n,-,t,z,- 8 | h,y,x,-,j,z,y,-,-,j,z,c,e,-,- 9 | y,t,l,d,h,-,d,q,h,-,j,q,r,-,n,q,- 10 | 11 | y,-,x,z,p,y,l,y,d,-,s,z,h,y,l,y,n,- 12 | x,b,c,q,j,-,h,s,m,-,y,w,-,z,b,g,q,-,j,r,s,y,-,b,f,- 13 | -------------------------------------------------------------------------------- /tests/pinyin/b.txt: -------------------------------------------------------------------------------- 1 | 唧唧复唧唧,木兰当户织。 2 | 不闻机杼声,惟闻女叹息。 3 | 问女何所思?问女何所忆? 4 | 女亦无所思,女亦无所忆。 5 | 昨夜见军帖,可汗大点兵。 6 | 军书十二卷,卷卷有爷名。 7 | 阿爷无大儿,木兰无长兄。 8 | 愿为市鞍马,从此替爷征。 9 | 10 | 东市买骏马,西市买鞍鞯。 11 | 南市买辔头,北市买长鞭。 12 | 朝辞爷娘去,暮宿黄河边。 13 | 不闻爷娘唤女声,但闻黄河流水鸣溅溅。 14 | 旦辞黄河去,暮至黑山头。 15 | 不闻爷娘唤女声,但闻燕山胡骑声啾啾。 16 | 17 | 万里赴戎机,关山度若飞。 18 | 朔气传金柝,寒光照铁衣。 19 | 将军百战死,壮士十年归。 20 | 21 | 归来见天子,天子坐明堂。 22 | 策勋十二转,赏赐百千强。 23 | 可汗问所欲,“木兰不用尚书郎。 24 | 愿借明驼千里足,送儿还故乡。” 25 | 26 | 爷娘闻女来,出郭相扶将。 27 | 阿姊闻妹来,当户理红妆。 28 | 小弟闻姊来,磨刀霍霍向猪羊。 29 | 开我东阁门,坐我西间床。 30 | 脱我战时袍,着我旧时裳。 31 | 当窗理云鬓,对镜贴花黄。 32 | 出门看伙伴,伙伴皆惊惶: 33 | “同行十二年,不知木兰是女郎”。 34 | 35 | 雄兔脚扑朔,雌兔眼迷离。 36 | 两兔傍地走,安能辨我是雄雌? 37 | -------------------------------------------------------------------------------- /tests/special_pinyin.rs: -------------------------------------------------------------------------------- 1 | use pinyin::ToPinyinMulti; 2 | 3 | #[test] 4 | fn special_pinyin() { 5 | assert_eq!( 6 | list_all_heteronym('欸'), 7 | &["ai1", "ai3", "ê1", "ê2", "ê3", "ê4", "xie4", "ei2", "ei3", "ei4", "ei1"], 8 | ); 9 | assert_eq!(list_all_heteronym('嘸'), &["fu3", "wu3", "m1", "m2"]); 10 | assert_eq!(list_all_heteronym('呣'), &["m2", "m4", "mou2"]); 11 | } 12 | 13 | fn list_all_heteronym(ch: char) -> Vec<&'static str> { 14 | ch.to_pinyin_multi() 15 | .unwrap() 16 | .into_iter() 17 | .map(|pinyin| pinyin.with_tone_num_end()) 18 | .collect::>() 19 | } 20 | -------------------------------------------------------------------------------- /ci/script.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -ex 4 | 5 | cargo fmt --all -- --check 6 | cargo clippy --all-targets --all-features -- -D warnings 7 | 8 | cargo build 9 | cargo test 10 | 11 | cargo test --no-default-features --features=plain 12 | cargo test --no-default-features --features=with_tone 13 | cargo test --no-default-features --features=with_tone_num 14 | cargo test --no-default-features --features=with_tone_num_end 15 | 16 | cargo test --no-default-features --features=plain,heteronym 17 | cargo test --no-default-features --features=with_tone,heteronym 18 | cargo test --no-default-features --features=with_tone_num,heteronym 19 | cargo test --no-default-features --features=with_tone_num_end,heteronym 20 | -------------------------------------------------------------------------------- /tests/pinyin/c.txt-plain: -------------------------------------------------------------------------------- 1 | qian,wu,lü,-,you,hao,shi,zhe,-,chuan,zai,yi,ru,-,zhi,ze,wu,ke,yong,-,fang,zhi,shan,xia,- 2 | hu,jian,zhi,-,pang,ran,da,wu,ye,-,yi,wei,shen,- 3 | bi,lin,jian,kui,zhi,-,shao,chu,jin,zhi,-,yin,yin,ran,mo,xiang,zhi,- 4 | ta,ri,-,lü,yi,ming,-,hu,da,hai,yuan,dun,-,yi,wei,qie,shi,yi,ye,-,shen,kong,- 5 | ran,wang,lai,shi,zhi,-,jue,wu,yi,neng,zhe,-,yi,xi,qi,sheng,-,you,jin,chu,qian,hou,-,zhong,bu,gan,bo,- 6 | shao,jin,yi,xia,-,dang,yi,chong,mao,- 7 | lü,bu,sheng,nu,-,ti,zhi,- 8 | hu,yin,xi,-,ji,zhi,yue,-,-,ji,zhi,ci,er,-,- 9 | yin,tiao,liang,da,han,-,duan,qi,hou,-,jin,qi,rou,-,nai,qu,- 10 | 11 | yi,-,xing,zhi,pang,ye,lei,you,de,-,sheng,zhi,hong,ye,lei,you,neng,- 12 | xiang,bu,chu,qi,ji,-,hu,sui,meng,-,yi,wei,-,zu,bu,gan,qu,-,jin,ruo,shi,yan,-,bei,fu,- 13 | -------------------------------------------------------------------------------- /tests/pinyin/c.txt-with_tone: -------------------------------------------------------------------------------- 1 | qián,wú,lǘ,-,yǒu,hǎo,shì,zhě,-,chuán,zài,yǐ,rù,-,zhì,zé,wú,kě,yòng,-,fàng,zhī,shān,xià,- 2 | hǔ,jiàn,zhī,-,páng,rán,dà,wù,yě,-,yǐ,wèi,shén,- 3 | bì,lín,jiān,kuī,zhī,-,shāo,chū,jìn,zhī,-,yìn,yìn,rán,mò,xiāng,zhī,- 4 | tā,rì,-,lǘ,yī,míng,-,hǔ,dà,hài,yuǎn,dùn,-,yǐ,wèi,qiě,shì,yǐ,yě,-,shèn,kǒng,- 5 | rán,wǎng,lái,shì,zhī,-,jué,wú,yì,néng,zhě,-,yì,xí,qí,shēng,-,yòu,jìn,chū,qián,hòu,-,zhōng,bù,gǎn,bó,- 6 | shāo,jìn,yì,xiá,-,dàng,yǐ,chōng,mào,- 7 | lǘ,bù,shèng,nù,-,tí,zhī,- 8 | hǔ,yīn,xǐ,-,jì,zhī,yuē,-,-,jì,zhǐ,cǐ,ěr,-,- 9 | yīn,tiào,liáng,dà,hǎn,-,duàn,qí,hóu,-,jǐn,qí,ròu,-,nǎi,qù,- 10 | 11 | yī,-,xíng,zhī,páng,yě,lèi,yǒu,dé,-,shēng,zhī,hóng,yě,lèi,yǒu,néng,- 12 | xiàng,bù,chū,qí,jì,-,hǔ,suī,měng,-,yí,wèi,-,zú,bù,gǎn,qǔ,-,jīn,ruò,shì,yān,-,bēi,fū,- 13 | -------------------------------------------------------------------------------- /DEVELOP.md: -------------------------------------------------------------------------------- 1 | 开发文档 2 | ========== 3 | 4 | 5 | 发布新版本 6 | ---------- 7 | 8 | 发布新版本步骤如下: 9 | 10 | 1. 首先确保 master 分支是最新的准备发布的代码。 11 | 2. 确保 master 分支的单元测试和 examples 正常通过(可以通过把 master 代码推送到远程然后触发 CI 服务确认也可以在本地跑测试确认)。 12 | 3. 更新 `CHANGELOG.md`:增加变更记录、更新版本发布日期。 13 | 4. 更新 `README.md`:更新 Installation 中的版本信息。 14 | 5. 通过 [bumpversion](https://pypi.org/project/bumpversion/) 更新本地版本号以及增加 git tag(三选一)(如果不想使用 15 | bumpversion 命令的话,需要手动编辑 `Cargo.toml` 文件更新版本号以及通过 git tag 命令增加一个新 tag): 16 | * 如果是大版本(1.x.y -> 2.0.0)更新,执行 `bumpversion --verbose major` 命令。 17 | * 如果是不兼容/新功能版本(1.2.y -> 1.3.0),执行 `bumpversion --verbose minor` 命令。 18 | * 如果是 bugfix 之类的小版本(1.2.3 -> 1.2.4),执行 `bumpversion --verbose patch` 命令。 19 | 6. 准备发布新版本: 20 | * `cargo publish` 21 | 7. 检查发布结果是否符合预期: 22 | * https://crates.io/crates/pinyin 23 | * https://docs.rs/pinyin/ 24 | 8. 推送变更: 25 | * `git push && git push --tags` 26 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [ master, develop ] 6 | pull_request: 7 | branches: [ master, develop ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | name: Build 16 | runs-on: ${{ matrix.os }} 17 | strategy: 18 | matrix: 19 | # os: [ubuntu-latest, macos-latest, windows-latest] 20 | os: [ ubuntu-latest ] 21 | 22 | steps: 23 | 24 | - uses: actions/checkout@v4 25 | - name: Install minimal stable with clippy and rustfmt 26 | uses: dtolnay/rust-toolchain@stable 27 | with: 28 | toolchain: stable 29 | components: rustfmt, clippy 30 | 31 | - name: init 32 | run: git submodule update --init --recursive 33 | 34 | - name: Test 35 | run: bash ci/script.sh 36 | 37 | # - name: coveralls 38 | # run: bash ci/coveralls.sh 39 | -------------------------------------------------------------------------------- /tests/pinyin/c.txt-with_tone_num: -------------------------------------------------------------------------------- 1 | qia2n,wu2,lü2,-,yo3u,ha3o,shi4,zhe3,-,chua2n,za4i,yi3,ru4,-,zhi4,ze2,wu2,ke3,yo4ng,-,fa4ng,zhi1,sha1n,xia4,- 2 | hu3,jia4n,zhi1,-,pa2ng,ra2n,da4,wu4,ye3,-,yi3,we4i,she2n,- 3 | bi4,li2n,jia1n,kui1,zhi1,-,sha1o,chu1,ji4n,zhi1,-,yi4n,yi4n,ra2n,mo4,xia1ng,zhi1,- 4 | ta1,ri4,-,lü2,yi1,mi2ng,-,hu3,da4,ha4i,yua3n,du4n,-,yi3,we4i,qie3,shi4,yi3,ye3,-,she4n,ko3ng,- 5 | ra2n,wa3ng,la2i,shi4,zhi1,-,jue2,wu2,yi4,ne2ng,zhe3,-,yi4,xi2,qi2,she1ng,-,yo4u,ji4n,chu1,qia2n,ho4u,-,zho1ng,bu4,ga3n,bo2,- 6 | sha1o,ji4n,yi4,xia2,-,da4ng,yi3,cho1ng,ma4o,- 7 | lü2,bu4,she4ng,nu4,-,ti2,zhi1,- 8 | hu3,yi1n,xi3,-,ji4,zhi1,yue1,-,-,ji4,zhi3,ci3,e3r,-,- 9 | yi1n,tia4o,lia2ng,da4,ha3n,-,dua4n,qi2,ho2u,-,ji3n,qi2,ro4u,-,na3i,qu4,- 10 | 11 | yi1,-,xi2ng,zhi1,pa2ng,ye3,le4i,yo3u,de2,-,she1ng,zhi1,ho2ng,ye3,le4i,yo3u,ne2ng,- 12 | xia4ng,bu4,chu1,qi2,ji4,-,hu3,sui1,me3ng,-,yi2,we4i,-,zu2,bu4,ga3n,qu3,-,ji1n,ruo4,shi4,ya1n,-,be1i,fu1,- 13 | -------------------------------------------------------------------------------- /tests/pinyin/c.txt-with_tone_num_end: -------------------------------------------------------------------------------- 1 | qian2,wu2,lü2,-,you3,hao3,shi4,zhe3,-,chuan2,zai4,yi3,ru4,-,zhi4,ze2,wu2,ke3,yong4,-,fang4,zhi1,shan1,xia4,- 2 | hu3,jian4,zhi1,-,pang2,ran2,da4,wu4,ye3,-,yi3,wei4,shen2,- 3 | bi4,lin2,jian1,kui1,zhi1,-,shao1,chu1,jin4,zhi1,-,yin4,yin4,ran2,mo4,xiang1,zhi1,- 4 | ta1,ri4,-,lü2,yi1,ming2,-,hu3,da4,hai4,yuan3,dun4,-,yi3,wei4,qie3,shi4,yi3,ye3,-,shen4,kong3,- 5 | ran2,wang3,lai2,shi4,zhi1,-,jue2,wu2,yi4,neng2,zhe3,-,yi4,xi2,qi2,sheng1,-,you4,jin4,chu1,qian2,hou4,-,zhong1,bu4,gan3,bo2,- 6 | shao1,jin4,yi4,xia2,-,dang4,yi3,chong1,mao4,- 7 | lü2,bu4,sheng4,nu4,-,ti2,zhi1,- 8 | hu3,yin1,xi3,-,ji4,zhi1,yue1,-,-,ji4,zhi3,ci3,er3,-,- 9 | yin1,tiao4,liang2,da4,han3,-,duan4,qi2,hou2,-,jin3,qi2,rou4,-,nai3,qu4,- 10 | 11 | yi1,-,xing2,zhi1,pang2,ye3,lei4,you3,de2,-,sheng1,zhi1,hong2,ye3,lei4,you3,neng2,- 12 | xiang4,bu4,chu1,qi2,ji4,-,hu3,sui1,meng3,-,yi2,wei4,-,zu2,bu4,gan3,qu3,-,jin1,ruo4,shi4,yan1,-,bei1,fu1,- 13 | -------------------------------------------------------------------------------- /tests/pinyin/b.txt-first_letter: -------------------------------------------------------------------------------- 1 | j,j,f,j,j,-,m,l,d,h,z,- 2 | b,w,j,z,s,-,w,w,n,t,x,- 3 | w,n,h,s,s,-,w,n,h,s,y,- 4 | n,y,w,s,s,-,n,y,w,s,y,- 5 | z,y,j,j,t,-,k,h,d,d,b,- 6 | j,s,s,e,j,-,j,j,y,y,m,- 7 | a,y,w,d,e,-,m,l,w,z,x,- 8 | y,w,s,a,m,-,c,c,t,y,z,- 9 | 10 | d,s,m,j,m,-,x,s,m,a,j,- 11 | n,s,m,p,t,-,b,s,m,z,b,- 12 | c,c,y,n,q,-,m,s,h,h,b,- 13 | b,w,y,n,h,n,s,-,d,w,h,h,l,s,m,j,j,- 14 | d,c,h,h,q,-,m,z,h,s,t,- 15 | b,w,y,n,h,n,s,-,d,w,y,s,h,q,s,j,j,- 16 | 17 | w,l,f,r,j,-,g,s,d,r,f,- 18 | s,q,c,j,t,-,h,g,z,t,y,- 19 | j,j,b,z,s,-,z,s,s,n,g,- 20 | 21 | g,l,j,t,z,-,t,z,z,m,t,- 22 | c,x,s,e,z,-,s,c,b,q,q,- 23 | k,h,w,s,y,-,-,m,l,b,y,s,s,l,- 24 | y,j,m,t,q,l,z,-,s,e,h,g,x,-,- 25 | 26 | y,n,w,n,l,-,c,g,x,f,j,- 27 | a,z,w,m,l,-,d,h,l,h,z,- 28 | x,d,w,z,l,-,m,d,h,h,x,z,y,- 29 | k,w,d,g,m,-,z,w,x,j,c,- 30 | t,w,z,s,p,-,z,w,j,s,s,- 31 | d,c,l,y,b,-,d,j,t,h,h,- 32 | c,m,k,h,b,-,h,b,j,j,h,- 33 | -,t,x,s,e,n,-,b,z,m,l,s,n,l,-,- 34 | 35 | x,t,j,p,s,-,c,t,y,m,l,- 36 | l,t,b,d,z,-,a,n,b,w,s,x,c,- 37 | -------------------------------------------------------------------------------- /examples/basic.rs: -------------------------------------------------------------------------------- 1 | use pinyin::{ToPinyin, ToPinyinMulti}; 2 | 3 | fn main() { 4 | let hans = "中国人"; 5 | 6 | // 无声调,输出 zhong guo ren 7 | for pinyin in hans.to_pinyin().flatten() { 8 | print!("{} ", pinyin.plain()); 9 | } 10 | println!(); 11 | 12 | // 包含声调,输出 zhōng guó rén 13 | for pinyin in hans.to_pinyin().flatten() { 14 | print!("{} ", pinyin.with_tone()); 15 | } 16 | println!(); 17 | 18 | // 声调用数字表示,输出 zho1ng guo2 re2n 19 | for pinyin in hans.to_pinyin().flatten() { 20 | print!("{} ", pinyin.with_tone_num()); 21 | } 22 | println!(); 23 | 24 | // 声调用数字在末尾表示,输出 zhong1 guo2 ren2 25 | for pinyin in hans.to_pinyin().flatten() { 26 | print!("{} ", pinyin.with_tone_num_end()); 27 | } 28 | println!(); 29 | 30 | // 多音字,输出 31 | // zho1ng zho4ng 32 | // guo2 33 | // re2n 34 | for multi in hans.to_pinyin_multi().flatten() { 35 | for pinyin in multi { 36 | print!("{} ", pinyin.with_tone_num()); 37 | } 38 | println!(); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 mozillazg 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /coverage-check/src/main.rs: -------------------------------------------------------------------------------- 1 | use pinyin::ToPinyin; 2 | use std::collections::HashSet; 3 | use std::io::{self, BufRead, Write}; 4 | use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; 5 | 6 | const CHARS: &[&[char]] = &[ 7 | &['a', 'ā', 'á', 'ǎ', 'à'], 8 | &['o', 'ō', 'ó', 'ǒ', 'ò'], 9 | &['e', 'ē', 'é', 'ě', 'è'], 10 | &['i', 'ī', 'í', 'ǐ', 'ì'], 11 | &['u', 'ū', 'ú', 'ǔ', 'ù'], 12 | &['ü', 'ǖ', 'ǘ', 'ǚ', 'ǜ'], 13 | ]; 14 | 15 | fn main() -> io::Result<()> { 16 | let mut chars = HashSet::new(); 17 | let stdin = io::stdin(); 18 | for line in stdin.lock().lines() { 19 | let line = line?; 20 | for opt_pinyin in line.as_str().to_pinyin() { 21 | if let Some(pinyin) = opt_pinyin { 22 | chars.extend(pinyin.with_tone().chars()); 23 | } 24 | } 25 | } 26 | let mut stdout = StandardStream::stdout(ColorChoice::Auto); 27 | for line in CHARS.iter() { 28 | for ch in line.iter() { 29 | let color = if chars.contains(ch) { 30 | Color::Green 31 | } else { 32 | Color::Red 33 | }; 34 | stdout.set_color(ColorSpec::new().set_fg(Some(color)).set_bold(true))?; 35 | write!(&mut stdout, "{}\t", ch)?; 36 | } 37 | writeln!(&mut stdout)?; 38 | } 39 | Ok(()) 40 | } 41 | -------------------------------------------------------------------------------- /tests/pinyin/b.txt-plain: -------------------------------------------------------------------------------- 1 | ji,ji,fu,ji,ji,-,mu,lan,dang,hu,zhi,- 2 | bu,wen,ji,zhu,sheng,-,wei,wen,nü,tan,xi,- 3 | wen,nü,he,suo,si,-,wen,nü,he,suo,yi,- 4 | nü,yi,wu,suo,si,-,nü,yi,wu,suo,yi,- 5 | zuo,ye,jian,jun,tie,-,ke,han,da,dian,bing,- 6 | jun,shu,shi,er,juan,-,juan,juan,you,ye,ming,- 7 | a,ye,wu,da,er,-,mu,lan,wu,zhang,xiong,- 8 | yuan,wei,shi,an,ma,-,cong,ci,ti,ye,zheng,- 9 | 10 | dong,shi,mai,jun,ma,-,xi,shi,mai,an,jian,- 11 | nan,shi,mai,pei,tou,-,bei,shi,mai,zhang,bian,- 12 | chao,ci,ye,niang,qu,-,mu,su,huang,he,bian,- 13 | bu,wen,ye,niang,huan,nü,sheng,-,dan,wen,huang,he,liu,shui,ming,jian,jian,- 14 | dan,ci,huang,he,qu,-,mu,zhi,hei,shan,tou,- 15 | bu,wen,ye,niang,huan,nü,sheng,-,dan,wen,yan,shan,hu,qi,sheng,jiu,jiu,- 16 | 17 | wan,li,fu,rong,ji,-,guan,shan,du,ruo,fei,- 18 | shuo,qi,chuan,jin,tuo,-,han,guang,zhao,tie,yi,- 19 | jiang,jun,bai,zhan,si,-,zhuang,shi,shi,nian,gui,- 20 | 21 | gui,lai,jian,tian,zi,-,tian,zi,zuo,ming,tang,- 22 | ce,xun,shi,er,zhuan,-,shang,ci,bai,qian,qiang,- 23 | ke,han,wen,suo,yu,-,-,mu,lan,bu,yong,shang,shu,lang,- 24 | yuan,jie,ming,tuo,qian,li,zu,-,song,er,hai,gu,xiang,-,- 25 | 26 | ye,niang,wen,nü,lai,-,chu,guo,xiang,fu,jiang,- 27 | a,zi,wen,mei,lai,-,dang,hu,li,hong,zhuang,- 28 | xiao,di,wen,zi,lai,-,mo,dao,huo,huo,xiang,zhu,yang,- 29 | kai,wo,dong,ge,men,-,zuo,wo,xi,jian,chuang,- 30 | tuo,wo,zhan,shi,pao,-,zhe,wo,jiu,shi,shang,- 31 | dang,chuang,li,yun,bin,-,dui,jing,tie,hua,huang,- 32 | chu,men,kan,huo,ban,-,huo,ban,jie,jing,huang,- 33 | -,tong,xing,shi,er,nian,-,bu,zhi,mu,lan,shi,nü,lang,-,- 34 | 35 | xiong,tu,jiao,pu,shuo,-,ci,tu,yan,mi,li,- 36 | liang,tu,bang,di,zou,-,an,neng,bian,wo,shi,xiong,ci,- 37 | -------------------------------------------------------------------------------- /tests/pinyin/b.txt-with_tone: -------------------------------------------------------------------------------- 1 | jī,jī,fù,jī,jī,-,mù,lán,dāng,hù,zhī,- 2 | bù,wén,jī,zhù,shēng,-,wéi,wén,nǚ,tàn,xī,- 3 | wèn,nǚ,hé,suǒ,sī,-,wèn,nǚ,hé,suǒ,yì,- 4 | nǚ,yì,wú,suǒ,sī,-,nǚ,yì,wú,suǒ,yì,- 5 | zuó,yè,jiàn,jūn,tiē,-,kě,hàn,dà,diǎn,bīng,- 6 | jūn,shū,shí,èr,juǎn,-,juǎn,juǎn,yǒu,yé,míng,- 7 | ā,yé,wú,dà,ér,-,mù,lán,wú,zhǎng,xiōng,- 8 | yuàn,wèi,shì,ān,mǎ,-,cóng,cǐ,tì,yé,zhēng,- 9 | 10 | dōng,shì,mǎi,jùn,mǎ,-,xī,shì,mǎi,ān,jiān,- 11 | nán,shì,mǎi,pèi,tóu,-,běi,shì,mǎi,zhǎng,biān,- 12 | cháo,cí,yé,niáng,qù,-,mù,sù,huáng,hé,biān,- 13 | bù,wén,yé,niáng,huàn,nǚ,shēng,-,dàn,wén,huáng,hé,liú,shuǐ,míng,jiàn,jiàn,- 14 | dàn,cí,huáng,hé,qù,-,mù,zhì,hēi,shān,tóu,- 15 | bù,wén,yé,niáng,huàn,nǚ,shēng,-,dàn,wén,yàn,shān,hú,qí,shēng,jiū,jiū,- 16 | 17 | wàn,lǐ,fù,róng,jī,-,guān,shān,dù,ruò,fēi,- 18 | shuò,qì,chuán,jīn,tuò,-,hán,guāng,zhào,tiě,yī,- 19 | jiāng,jūn,bǎi,zhàn,sǐ,-,zhuàng,shì,shí,nián,guī,- 20 | 21 | guī,lái,jiàn,tiān,zi,-,tiān,zi,zuò,míng,táng,- 22 | cè,xūn,shí,èr,zhuǎn,-,shǎng,cì,bǎi,qiān,qiáng,- 23 | kě,hàn,wèn,suǒ,yù,-,-,mù,lán,bù,yòng,shàng,shū,láng,- 24 | yuàn,jiè,míng,tuó,qiān,lǐ,zú,-,sòng,ér,hái,gù,xiāng,-,- 25 | 26 | yé,niáng,wén,nǚ,lái,-,chū,guō,xiāng,fú,jiāng,- 27 | ā,zǐ,wén,mèi,lái,-,dāng,hù,lǐ,hóng,zhuāng,- 28 | xiǎo,dì,wén,zǐ,lái,-,mó,dāo,huò,huò,xiàng,zhū,yáng,- 29 | kāi,wǒ,dōng,gé,mén,-,zuò,wǒ,xī,jiān,chuáng,- 30 | tuō,wǒ,zhàn,shí,páo,-,zhe,wǒ,jiù,shí,shang,- 31 | dāng,chuāng,lǐ,yún,bìn,-,duì,jìng,tiē,huā,huáng,- 32 | chū,mén,kàn,huǒ,bàn,-,huǒ,bàn,jiē,jīng,huáng,- 33 | -,tóng,xíng,shí,èr,nián,-,bù,zhī,mù,lán,shì,nǚ,láng,-,- 34 | 35 | xióng,tù,jiǎo,pū,shuò,-,cí,tù,yǎn,mí,lí,- 36 | liǎng,tù,bàng,dì,zǒu,-,ān,néng,biàn,wǒ,shì,xióng,cí,- 37 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "pinyin" 3 | version = "0.11.0" 4 | authors = ["mozillazg ", "Xidorn Quan "] 5 | license = "MIT" 6 | readme = "README.md" 7 | homepage = "https://github.com/mozillazg/rust-pinyin" 8 | repository = "https://github.com/mozillazg/rust-pinyin" 9 | description = "Convert Chinese to pinyin" 10 | documentation = "https://docs.rs/pinyin/" 11 | keywords = ["pinyin", "hanzi", "Chinese"] 12 | include = [ 13 | "/build.rs", 14 | "/examples/*", 15 | "/pinyin-data/pinyin.txt", 16 | "/src/**/*", 17 | "/tests/**/*", 18 | "/Cargo.toml", 19 | "/LICENSE", 20 | "/README.md", 21 | ] 22 | edition = "2018" 23 | 24 | [workspace] 25 | members = ["coverage-check"] 26 | 27 | [badges] 28 | travis-ci = { repository = "mozillazg/rust-pinyin", branch = "master" } 29 | appveyor = { repository = "mozillazg/rust-pinyin", branch = "master", service = "github" } 30 | coveralls = { repository = "mozillazg/rust-pinyin", branch = "master" } 31 | 32 | [features] 33 | default = [ 34 | "compat", 35 | "plain", 36 | "with_tone", 37 | "with_tone_num", 38 | "with_tone_num_end", 39 | "heteronym", 40 | ] 41 | compat = [ 42 | "plain", 43 | "with_tone", 44 | "with_tone_num", 45 | "heteronym", 46 | ] 47 | plain = [] 48 | with_tone = [] 49 | with_tone_num = [] 50 | with_tone_num_end = [] 51 | heteronym = [] 52 | 53 | [[example]] 54 | name = "basic" 55 | required-features = ["plain", "with_tone", "with_tone_num", "with_tone_num_end", "heteronym"] 56 | 57 | [[test]] 58 | name = "compat" 59 | required-features = ["compat"] 60 | 61 | [[test]] 62 | name = "special_pinyin" 63 | required-features = ["with_tone_num_end", "heteronym"] 64 | -------------------------------------------------------------------------------- /tests/pinyin/b.txt-with_tone_num: -------------------------------------------------------------------------------- 1 | ji1,ji1,fu4,ji1,ji1,-,mu4,la2n,da1ng,hu4,zhi1,- 2 | bu4,we2n,ji1,zhu4,she1ng,-,we2i,we2n,nü3,ta4n,xi1,- 3 | we4n,nü3,he2,suo3,si1,-,we4n,nü3,he2,suo3,yi4,- 4 | nü3,yi4,wu2,suo3,si1,-,nü3,yi4,wu2,suo3,yi4,- 5 | zuo2,ye4,jia4n,ju1n,tie1,-,ke3,ha4n,da4,dia3n,bi1ng,- 6 | ju1n,shu1,shi2,e4r,jua3n,-,jua3n,jua3n,yo3u,ye2,mi2ng,- 7 | a1,ye2,wu2,da4,e2r,-,mu4,la2n,wu2,zha3ng,xio1ng,- 8 | yua4n,we4i,shi4,a1n,ma3,-,co2ng,ci3,ti4,ye2,zhe1ng,- 9 | 10 | do1ng,shi4,ma3i,ju4n,ma3,-,xi1,shi4,ma3i,a1n,jia1n,- 11 | na2n,shi4,ma3i,pe4i,to2u,-,be3i,shi4,ma3i,zha3ng,bia1n,- 12 | cha2o,ci2,ye2,nia2ng,qu4,-,mu4,su4,hua2ng,he2,bia1n,- 13 | bu4,we2n,ye2,nia2ng,hua4n,nü3,she1ng,-,da4n,we2n,hua2ng,he2,liu2,shui3,mi2ng,jia4n,jia4n,- 14 | da4n,ci2,hua2ng,he2,qu4,-,mu4,zhi4,he1i,sha1n,to2u,- 15 | bu4,we2n,ye2,nia2ng,hua4n,nü3,she1ng,-,da4n,we2n,ya4n,sha1n,hu2,qi2,she1ng,jiu1,jiu1,- 16 | 17 | wa4n,li3,fu4,ro2ng,ji1,-,gua1n,sha1n,du4,ruo4,fe1i,- 18 | shuo4,qi4,chua2n,ji1n,tuo4,-,ha2n,gua1ng,zha4o,tie3,yi1,- 19 | jia1ng,ju1n,ba3i,zha4n,si3,-,zhua4ng,shi4,shi2,nia2n,gui1,- 20 | 21 | gui1,la2i,jia4n,tia1n,zi,-,tia1n,zi,zuo4,mi2ng,ta2ng,- 22 | ce4,xu1n,shi2,e4r,zhua3n,-,sha3ng,ci4,ba3i,qia1n,qia2ng,- 23 | ke3,ha4n,we4n,suo3,yu4,-,-,mu4,la2n,bu4,yo4ng,sha4ng,shu1,la2ng,- 24 | yua4n,jie4,mi2ng,tuo2,qia1n,li3,zu2,-,so4ng,e2r,ha2i,gu4,xia1ng,-,- 25 | 26 | ye2,nia2ng,we2n,nü3,la2i,-,chu1,guo1,xia1ng,fu2,jia1ng,- 27 | a1,zi3,we2n,me4i,la2i,-,da1ng,hu4,li3,ho2ng,zhua1ng,- 28 | xia3o,di4,we2n,zi3,la2i,-,mo2,da1o,huo4,huo4,xia4ng,zhu1,ya2ng,- 29 | ka1i,wo3,do1ng,ge2,me2n,-,zuo4,wo3,xi1,jia1n,chua2ng,- 30 | tuo1,wo3,zha4n,shi2,pa2o,-,zhe,wo3,jiu4,shi2,shang,- 31 | da1ng,chua1ng,li3,yu2n,bi4n,-,dui4,ji4ng,tie1,hua1,hua2ng,- 32 | chu1,me2n,ka4n,huo3,ba4n,-,huo3,ba4n,jie1,ji1ng,hua2ng,- 33 | -,to2ng,xi2ng,shi2,e4r,nia2n,-,bu4,zhi1,mu4,la2n,shi4,nü3,la2ng,-,- 34 | 35 | xio2ng,tu4,jia3o,pu1,shuo4,-,ci2,tu4,ya3n,mi2,li2,- 36 | lia3ng,tu4,ba4ng,di4,zo3u,-,a1n,ne2ng,bia4n,wo3,shi4,xio2ng,ci2,- 37 | -------------------------------------------------------------------------------- /tests/pinyin/b.txt-with_tone_num_end: -------------------------------------------------------------------------------- 1 | ji1,ji1,fu4,ji1,ji1,-,mu4,lan2,dang1,hu4,zhi1,- 2 | bu4,wen2,ji1,zhu4,sheng1,-,wei2,wen2,nü3,tan4,xi1,- 3 | wen4,nü3,he2,suo3,si1,-,wen4,nü3,he2,suo3,yi4,- 4 | nü3,yi4,wu2,suo3,si1,-,nü3,yi4,wu2,suo3,yi4,- 5 | zuo2,ye4,jian4,jun1,tie1,-,ke3,han4,da4,dian3,bing1,- 6 | jun1,shu1,shi2,er4,juan3,-,juan3,juan3,you3,ye2,ming2,- 7 | a1,ye2,wu2,da4,er2,-,mu4,lan2,wu2,zhang3,xiong1,- 8 | yuan4,wei4,shi4,an1,ma3,-,cong2,ci3,ti4,ye2,zheng1,- 9 | 10 | dong1,shi4,mai3,jun4,ma3,-,xi1,shi4,mai3,an1,jian1,- 11 | nan2,shi4,mai3,pei4,tou2,-,bei3,shi4,mai3,zhang3,bian1,- 12 | chao2,ci2,ye2,niang2,qu4,-,mu4,su4,huang2,he2,bian1,- 13 | bu4,wen2,ye2,niang2,huan4,nü3,sheng1,-,dan4,wen2,huang2,he2,liu2,shui3,ming2,jian4,jian4,- 14 | dan4,ci2,huang2,he2,qu4,-,mu4,zhi4,hei1,shan1,tou2,- 15 | bu4,wen2,ye2,niang2,huan4,nü3,sheng1,-,dan4,wen2,yan4,shan1,hu2,qi2,sheng1,jiu1,jiu1,- 16 | 17 | wan4,li3,fu4,rong2,ji1,-,guan1,shan1,du4,ruo4,fei1,- 18 | shuo4,qi4,chuan2,jin1,tuo4,-,han2,guang1,zhao4,tie3,yi1,- 19 | jiang1,jun1,bai3,zhan4,si3,-,zhuang4,shi4,shi2,nian2,gui1,- 20 | 21 | gui1,lai2,jian4,tian1,zi,-,tian1,zi,zuo4,ming2,tang2,- 22 | ce4,xun1,shi2,er4,zhuan3,-,shang3,ci4,bai3,qian1,qiang2,- 23 | ke3,han4,wen4,suo3,yu4,-,-,mu4,lan2,bu4,yong4,shang4,shu1,lang2,- 24 | yuan4,jie4,ming2,tuo2,qian1,li3,zu2,-,song4,er2,hai2,gu4,xiang1,-,- 25 | 26 | ye2,niang2,wen2,nü3,lai2,-,chu1,guo1,xiang1,fu2,jiang1,- 27 | a1,zi3,wen2,mei4,lai2,-,dang1,hu4,li3,hong2,zhuang1,- 28 | xiao3,di4,wen2,zi3,lai2,-,mo2,dao1,huo4,huo4,xiang4,zhu1,yang2,- 29 | kai1,wo3,dong1,ge2,men2,-,zuo4,wo3,xi1,jian1,chuang2,- 30 | tuo1,wo3,zhan4,shi2,pao2,-,zhe,wo3,jiu4,shi2,shang,- 31 | dang1,chuang1,li3,yun2,bin4,-,dui4,jing4,tie1,hua1,huang2,- 32 | chu1,men2,kan4,huo3,ban4,-,huo3,ban4,jie1,jing1,huang2,- 33 | -,tong2,xing2,shi2,er4,nian2,-,bu4,zhi1,mu4,lan2,shi4,nü3,lang2,-,- 34 | 35 | xiong2,tu4,jiao3,pu1,shuo4,-,ci2,tu4,yan3,mi2,li2,- 36 | liang3,tu4,bang4,di4,zou3,-,an1,neng2,bian4,wo3,shi4,xiong2,ci2,- 37 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | use crate::data::CHAR_BLOCKS; 2 | use std::convert::TryFrom; 3 | 4 | #[cfg(feature = "compat")] 5 | mod compat; 6 | mod data; 7 | mod pinyin; 8 | #[cfg(feature = "heteronym")] 9 | mod pinyin_multi; 10 | 11 | #[cfg(feature = "compat")] 12 | pub use crate::compat::*; 13 | pub use crate::pinyin::{Pinyin, PinyinStrIter, ToPinyin}; 14 | #[cfg(feature = "heteronym")] 15 | pub use crate::pinyin_multi::{PinyinMulti, PinyinMultiIter, PinyinMultiStrIter, ToPinyinMulti}; 16 | 17 | /// 将给定输入字符串的拼音通过给定映射函数后存入 `Vec` 中 18 | /// 19 | /// 这个函数会跳过任何没有拼音的字符。本函数主要用于测试目的。 20 | pub fn to_pinyin_vec(input: &str, f: F) -> Vec<&'static str> 21 | where 22 | F: Fn(Pinyin) -> &'static str, 23 | { 24 | input.to_pinyin().flatten().map(f).collect() 25 | } 26 | 27 | /// 单个字符的拼音数据 28 | struct PinyinData { 29 | #[cfg(feature = "plain")] 30 | plain: &'static str, 31 | #[cfg(feature = "with_tone")] 32 | with_tone: &'static str, 33 | #[cfg(feature = "with_tone_num")] 34 | with_tone_num: &'static str, 35 | #[cfg(feature = "with_tone_num_end")] 36 | with_tone_num_end: &'static str, 37 | #[cfg(feature = "compat")] 38 | split: usize, 39 | } 40 | 41 | /// 在 [start, end) 之间字符的数据块 42 | struct CharBlock { 43 | /// 本块的第一个字符 44 | start_code: u32, 45 | /// 本块字符的数据索引 46 | /// 零值表示对应字符没有拼音数据,非零值表示对应的拼音数据为 `PINYIN_DATA[i]`。 47 | data: &'static [u16], 48 | /// 本块字符对应的多音字数据索引 49 | /// 对应的多音字数据为 `HETERONYM_TABLE[i]`。 50 | #[cfg(feature = "heteronym")] 51 | heteronym: &'static [u16], 52 | } 53 | 54 | #[inline] 55 | fn get_block_and_index(ch: char) -> Option<(&'static CharBlock, usize)> { 56 | let code = u32::from(ch); 57 | for block in CHAR_BLOCKS.iter() { 58 | if code < block.start_code { 59 | return None; 60 | } 61 | let idx = usize::try_from(code - block.start_code).unwrap(); 62 | if idx < block.data.len() { 63 | return Some((block, idx)); 64 | } 65 | } 66 | None 67 | } 68 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rust-pinyin 2 | 3 | [![Build Status](https://github.com/mozillazg/rust-pinyin/actions/workflows/ci.yml/badge.svg?branch=master)](https://github.com/mozillazg/rust-pinyin/actions/workflows/ci.yml) 4 | [![Crates.io Version](https://img.shields.io/crates/v/pinyin.svg)](https://crates.io/crates/pinyin) 5 | [![Doc](https://img.shields.io/badge/doc-reference-blue.svg)](https://docs.rs/pinyin/) 6 | 7 | 8 | 汉语拼音转换工具 Rust 版 9 | 10 | 11 | Installation 12 | ------------ 13 | 14 | Add this to your `Cargo.toml`: 15 | 16 | ``` 17 | [dependencies] 18 | pinyin = "0.10" 19 | ``` 20 | 21 | Documentation 22 | -------------- 23 | 24 | API documentation can be found here: https://docs.rs/pinyin/ 25 | 26 | 27 | Usage 28 | ------ 29 | 30 | ```rust 31 | use pinyin::{ToPinyin, ToPinyinMulti}; 32 | 33 | fn main() { 34 | let hans = "中国人"; 35 | 36 | // 无声调,输出 zhong guo ren 37 | for pinyin in hans.to_pinyin().flatten() { 38 | print!("{} ", pinyin.plain()); 39 | } 40 | println!(); 41 | 42 | // 包含声调,输出 zhōng guó rén 43 | for pinyin in hans.to_pinyin().flatten() { 44 | print!("{} ", pinyin.with_tone()); 45 | } 46 | println!(); 47 | 48 | // 声调用数字表示,输出 zho1ng guo2 re2n 49 | for pinyin in hans.to_pinyin().flatten() { 50 | print!("{} ", pinyin.with_tone_num()); 51 | } 52 | println!(); 53 | 54 | // 声调用数字在末尾表示,输出 zhong1 guo2 ren2 55 | for pinyin in hans.to_pinyin().flatten() { 56 | print!("{} ", pinyin.with_tone_num_end()); 57 | } 58 | println!(); 59 | 60 | // 多音字,输出 61 | // zho1ng zho4ng 62 | // guo2 63 | // re2n 64 | for multi in hans.to_pinyin_multi().flatten() { 65 | for pinyin in multi { 66 | print!("{} ", pinyin.with_tone_num()); 67 | } 68 | println!(); 69 | } 70 | } 71 | ``` 72 | 73 | Build 74 | ------------ 75 | 76 | ``` 77 | $ cargo build 78 | ``` 79 | 80 | Test 81 | ------------ 82 | 83 | ``` 84 | $ cargo test 85 | ``` 86 | 87 | Data 88 | ----- 89 | 90 | 使用来自 [pinyin-data](https://github.com/mozillazg/pinyin-data) 的拼音数据。 91 | 92 | 93 | Related Projects 94 | ----------------- 95 | 96 | * [hotoo/pinyin](https://github.com/hotoo/pinyin): 汉语拼音转换工具 Node.js/JavaScript 版。 97 | * [mozillazg/python-pinyin](https://github.com/mozillazg/python-pinyin): 汉语拼音转换工具 Python 版。 98 | * [mozillazg/go-pinyin](https://github.com/mozillazg/go-pinyin): 汉语拼音转换工具 Go 版。 99 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/rust,code,intellij+all 3 | # Edit at https://www.gitignore.io/?templates=rust,code,intellij+all 4 | 5 | ### Code ### 6 | .vscode/* 7 | !.vscode/settings.json 8 | !.vscode/tasks.json 9 | !.vscode/launch.json 10 | !.vscode/extensions.json 11 | 12 | ### Intellij+all ### 13 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 14 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 15 | 16 | # User-specific stuff 17 | .idea/**/workspace.xml 18 | .idea/**/tasks.xml 19 | .idea/**/usage.statistics.xml 20 | .idea/**/dictionaries 21 | .idea/**/shelf 22 | 23 | # Generated files 24 | .idea/**/contentModel.xml 25 | 26 | # Sensitive or high-churn files 27 | .idea/**/dataSources/ 28 | .idea/**/dataSources.ids 29 | .idea/**/dataSources.local.xml 30 | .idea/**/sqlDataSources.xml 31 | .idea/**/dynamic.xml 32 | .idea/**/uiDesigner.xml 33 | .idea/**/dbnavigator.xml 34 | 35 | # Gradle 36 | .idea/**/gradle.xml 37 | .idea/**/libraries 38 | 39 | # Gradle and Maven with auto-import 40 | # When using Gradle or Maven with auto-import, you should exclude module files, 41 | # since they will be recreated, and may cause churn. Uncomment if using 42 | # auto-import. 43 | # .idea/modules.xml 44 | # .idea/*.iml 45 | # .idea/modules 46 | 47 | # CMake 48 | cmake-build-*/ 49 | 50 | # Mongo Explorer plugin 51 | .idea/**/mongoSettings.xml 52 | 53 | # File-based project format 54 | *.iws 55 | 56 | # IntelliJ 57 | out/ 58 | 59 | # mpeltonen/sbt-idea plugin 60 | .idea_modules/ 61 | 62 | # JIRA plugin 63 | atlassian-ide-plugin.xml 64 | 65 | # Cursive Clojure plugin 66 | .idea/replstate.xml 67 | 68 | # Crashlytics plugin (for Android Studio and IntelliJ) 69 | com_crashlytics_export_strings.xml 70 | crashlytics.properties 71 | crashlytics-build.properties 72 | fabric.properties 73 | 74 | # Editor-based Rest Client 75 | .idea/httpRequests 76 | 77 | # Android studio 3.1+ serialized cache file 78 | .idea/caches/build_file_checksums.ser 79 | 80 | # JetBrains templates 81 | **___jb_tmp___ 82 | 83 | ### Intellij+all Patch ### 84 | # Ignores the whole .idea folder and all .iml files 85 | # See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360 86 | 87 | .idea/ 88 | 89 | # Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023 90 | 91 | *.iml 92 | modules.xml 93 | .idea/misc.xml 94 | *.ipr 95 | 96 | # Sonarlint plugin 97 | .idea/sonarlint 98 | 99 | ### Rust ### 100 | # Generated by Cargo 101 | # will have compiled files and executables 102 | /target/ 103 | 104 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 105 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 106 | Cargo.lock 107 | 108 | # These are backup files generated by rustfmt 109 | **/*.rs.bk 110 | 111 | # End of https://www.gitignore.io/api/rust,code,intellij+all 112 | -------------------------------------------------------------------------------- /tests/pinyin.rs: -------------------------------------------------------------------------------- 1 | use pinyin::{Pinyin, ToPinyin}; 2 | use std::ffi::OsStr; 3 | use std::fs::{self, File}; 4 | use std::io::{self, BufRead, BufReader, Write}; 5 | use std::path::PathBuf; 6 | 7 | const DATA_PATH: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/pinyin"); 8 | 9 | #[test] 10 | #[cfg(feature = "plain")] 11 | fn pinyin_plain() -> io::Result<()> { 12 | run_test_cases("plain", Pinyin::plain) 13 | } 14 | 15 | #[test] 16 | #[cfg(feature = "with_tone")] 17 | fn pinyin_with_tone() -> io::Result<()> { 18 | run_test_cases("with_tone", Pinyin::with_tone) 19 | } 20 | 21 | #[test] 22 | #[cfg(feature = "with_tone_num")] 23 | fn pinyin_with_tone_num() -> io::Result<()> { 24 | run_test_cases("with_tone_num", Pinyin::with_tone_num) 25 | } 26 | 27 | #[test] 28 | #[cfg(feature = "with_tone_num_end")] 29 | fn pinyin_with_tone_num_end() -> io::Result<()> { 30 | run_test_cases("with_tone_num_end", Pinyin::with_tone_num_end) 31 | } 32 | 33 | #[test] 34 | #[cfg(feature = "plain")] 35 | fn pinyin_first_letter() -> io::Result<()> { 36 | run_test_cases("first_letter", Pinyin::first_letter) 37 | } 38 | 39 | fn run_test_cases(suffix: &str, converter: fn(Pinyin) -> &'static str) -> io::Result<()> { 40 | let test_cases = list_test_cases()?; 41 | for input_path in test_cases.iter() { 42 | let input_file = File::open(input_path)?; 43 | let input = BufReader::new(input_file) 44 | .lines() 45 | .map(|line| { 46 | let result = line? 47 | .as_str() 48 | .to_pinyin() 49 | .map(|pinyin| pinyin.map_or("-", converter)) 50 | .collect::>(); 51 | Ok(result) 52 | }) 53 | .collect::>>()?; 54 | 55 | let expected_path = input_path.with_extension(format!("txt-{suffix}")); 56 | if !expected_path.exists() { 57 | let mut expected_file = File::create(expected_path)?; 58 | for line in input.iter() { 59 | writeln!(expected_file, "{}", line.join(","))?; 60 | } 61 | } else { 62 | let expected_file = File::open(expected_path)?; 63 | let mut expected_iter = BufReader::new(expected_file).lines(); 64 | for (i, input_line) in input.iter().enumerate() { 65 | let expected_line = expected_iter 66 | .next() 67 | .expect("unexpected end of expected file")?; 68 | let expected = expected_line.split_terminator(',').collect::>(); 69 | assert_eq!( 70 | input_line, 71 | &expected, 72 | "unmatched result on line {} in {}", 73 | i, 74 | input_path.file_name().and_then(OsStr::to_str).unwrap(), 75 | ); 76 | } 77 | } 78 | } 79 | Ok(()) 80 | } 81 | 82 | fn list_test_cases() -> io::Result> { 83 | fs::read_dir(DATA_PATH)? 84 | .filter_map(|entry| { 85 | let path = match entry { 86 | Ok(entry) => entry.path(), 87 | Err(e) => return Some(Err(e)), 88 | }; 89 | let is_input = path.extension().is_some_and(|ext| ext == "txt"); 90 | let result = if is_input { Some(path) } else { None }; 91 | Ok(result).transpose() 92 | }) 93 | .collect() 94 | } 95 | -------------------------------------------------------------------------------- /src/compat.rs: -------------------------------------------------------------------------------- 1 | #![allow(deprecated)] 2 | 3 | use crate::{Pinyin, ToPinyin, ToPinyinMulti}; 4 | use std::collections::HashSet; 5 | 6 | /// 拼音风格 7 | #[deprecated = "请使用 `Pinyin` 的方法代替"] 8 | #[derive(Debug, PartialEq, Eq, Hash)] 9 | pub enum Style { 10 | /// 普通风格,不带声调(默认风格)。如: `pin yin` 11 | Normal, 12 | /// 声调风格 1,拼音声调在韵母第一个字母上。如: `pīn yīn` 13 | Tone, 14 | /// 声调风格 2,即拼音声调在各个拼音之后,用数字 [0-4] 进行表示。如: `pi1n yi1n` 15 | Tone2, 16 | /// 声母风格,只返回各个拼音的声母部分。如:中国 的拼音 `zh g` 17 | Initials, 18 | /// 首字母风格,只返回拼音的首字母部分。如: `p y` 19 | FirstLetter, 20 | /// 韵母风格 1,只返回各个拼音的韵母部分,不带声调。如: `ong uo` 21 | Finals, 22 | /// 韵母风格 2,带声调,声调在韵母第一个字母上。如: `ōng uó` 23 | FinalsTone, 24 | /// 韵母风格 2,带声调,声调在各个拼音之后,用数字 [0-4] 进行表示。如: `o1ng uo2` 25 | FinalsTone2, 26 | } 27 | 28 | /// 参数 29 | #[deprecated] 30 | #[derive(Debug, PartialEq, Eq, Hash)] 31 | pub struct Args { 32 | /// 拼音风格 33 | pub style: Style, 34 | /// 是否启用多音字模式 35 | pub heteronym: bool, 36 | } 37 | 38 | impl Args { 39 | /// 返回一个默认参数 40 | /// 41 | /// ```ignore 42 | /// Args { 43 | /// style: Style::Normal, 44 | /// heteronym: false, 45 | /// } 46 | /// ``` 47 | pub fn new() -> Args { 48 | Args { 49 | style: Style::Normal, 50 | heteronym: false, 51 | } 52 | } 53 | } 54 | 55 | impl Default for Args { 56 | fn default() -> Self { 57 | Self::new() 58 | } 59 | } 60 | 61 | fn apply_style(py: Pinyin, style: &Style) -> &'static str { 62 | match style { 63 | Style::Normal => py.plain(), 64 | Style::Tone => py.with_tone(), 65 | Style::Tone2 => py.with_tone_num(), 66 | Style::Initials => py.initials(), 67 | Style::FirstLetter => py.first_letter(), 68 | Style::Finals => py.finals_plain(), 69 | Style::FinalsTone => py.finals_with_tone(), 70 | Style::FinalsTone2 => py.finals_with_tone_num(), 71 | } 72 | } 73 | 74 | /// 汉字转拼音 75 | /// 76 | /// ``` 77 | /// let hans = "中国人"; 78 | /// let args = pinyin::Args::new(); 79 | /// 80 | /// // 默认输出 [["zhong"] ["guo"] ["ren"]] 81 | /// println!("{:?}", pinyin::pinyin(hans, &args)); 82 | /// ``` 83 | #[deprecated = "请使用 `ToPinyin` 或 `ToPinyinMulti` 代替"] 84 | pub fn pinyin(s: &str, a: &Args) -> Vec> { 85 | if a.heteronym { 86 | s.to_pinyin_multi() 87 | .map(|multi| match multi { 88 | Some(multi) => { 89 | let mut set = HashSet::new(); 90 | multi 91 | .into_iter() 92 | .map(|pinyin| apply_style(pinyin, &a.style)) 93 | .filter(|s| set.insert(*s)) 94 | .map(str::to_string) 95 | .collect() 96 | } 97 | None => vec![], 98 | }) 99 | .collect() 100 | } else { 101 | s.to_pinyin() 102 | .map(|pinyin| match pinyin { 103 | Some(pinyin) => vec![apply_style(pinyin, &a.style).to_string()], 104 | None => vec![], 105 | }) 106 | .collect() 107 | } 108 | } 109 | 110 | /// 汉字转拼音,与 ``pinyin`` 的区别是返回值不同,每个汉字只取一个音 111 | /// 112 | /// ``` 113 | /// let hans = "中国人"; 114 | /// let args = pinyin::Args::new(); 115 | /// 116 | /// // 默认输出 ["zhong", "guo", "ren"] 117 | /// println!("{:?}", pinyin::lazy_pinyin(hans, &args)); 118 | /// ``` 119 | #[deprecated = "请使用 `ToPinyin` 代替"] 120 | pub fn lazy_pinyin(s: &str, a: &Args) -> Vec { 121 | s.to_pinyin() 122 | .flatten() 123 | .map(|pinyin| apply_style(pinyin, &a.style).to_string()) 124 | .collect() 125 | } 126 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [0.10.0] (2023-05-23) 4 | 5 | * 使用 [pinyin-data] v0.13.0 的拼音数据 6 | 7 | ## [0.9.0] (2021-12-12) 8 | 9 | * 使用 [pinyin-data] v0.12.0 的拼音数据 10 | 11 | ## [0.8.0] (2020-06-13) 12 | 13 | * 使用 [pinyin-data] v0.9.0 的拼音数据 14 | 15 | ## [0.7.0] (2019-06-12) 16 | 17 | * 重新设计 API 使用迭代器及静态数据,实现无运行时开销 (via [#36]) 18 | * 修正部分非常见拼音在数字调号时依然使用音调符号的问题 (via [#51]) 19 | * 当音节以数字声调标记法输出时,字母`ü`不再被改写为字母`v` (via [#52]) 20 | * 添加声调数字在拼音末尾的表示形式 (via [#44]) 21 | 22 | ## [0.6.0] (2019-05-01) 23 | 24 | * Use hashmap instead of binary search (via [#27]. Thanks [@hanabi1224]) 25 | * 无声调相关风格下对结果去重,Fixed [#25] (via [#28]. Thanks [@hanabi1224]) 26 | * 增加 Windows CI (via [#29]. Thanks [@hanabi1224]) 27 | 28 | ## [0.5.0] (2019-04-07) 29 | 30 | * 使用 [pinyin-data] v0.7.0 的拼音数据 31 | 32 | ## [0.4.0] (2018-09-01) 33 | 34 | * 移除依赖、增强编译性能和速度 (via [#20]. Thanks [@LuoZijun]) 35 | * 使用 [pinyin-data] v0.6.1 的拼音数据 36 | 37 | ## [0.3.0] (2018-04-30) 38 | 39 | * 使用 [pinyin-data] v0.5.1 的拼音数据 40 | * 使用 clippy 优化代码 41 | * 最低支持 1.17 版本的 Rust 42 | * 增加 examples 43 | * 更新依赖包版本: 44 | * `regex`: `~0.1.8` ->`0.2` 45 | * `phf`: `~0.7.3` -> `0.7` 46 | 47 | ## [0.2.0] (2017-10-05) 48 | 49 | * 修改 ``pinyin`` 函数,由 50 | ``pub fn pinyin<'a>(s: &'a str, a: &'a Args) -> Vec>`` 51 | 改为 52 | ``pub fn pinyin(s: &str, a: &Args) -> Vec>`` 53 | * 增加 ``lazy_pinyin``函数:: 54 | ``pub fn lazy_pinyin(s: &str, a: &Args) -> Vec`` 55 | 56 | ## [0.1.0] (2017-09-26) 57 | 58 | * 改为使用 [pinyin-data](https://github.com/mozillazg/pinyin-data) v0.4.1 的拼音数据 59 | 60 | ## [0.0.6] (2016-12-29) 61 | 62 | * Use `env::var_os` intead of `env!` in build script 63 | (via [#5](https://github.com/mozillazg/rust-pinyin/pull/5). Thanks 64 | [@alexcrichton](https://github.com/alexcrichton)) 65 | 66 | * Drop support for Rust < 1.3.0 67 | 68 | ## [0.0.5] (2015-11-21) 69 | 70 | * Fixed a regression that caused the crate to stop compiling on 71 | current rust nightly and beta versions. 72 | (via [#1](https://github.com/mozillazg/rust-pinyin/pull/1). Thanks 73 | [@bluss](https://github.com/bluss)) 74 | * Drop support for Rust < 1.2.0 75 | 76 | ## [0.0.4] (2015-09-20) 77 | 78 | * test on Rust 1.3 79 | * fix can't run test on Rust nightly 80 | 81 | ## [0.0.3] (2015-09-18) 82 | 83 | * move build.rs and data out of src directory. 84 | 85 | ## [0.0.2] (2015-08-30) 86 | 87 | * 清理代码 88 | * 更新文档 89 | 90 | ## 0.0.1 (2015-08-27) 91 | 92 | * Initial Release 93 | 94 | [pinyin-data]: https://github.com/mozillazg/pinyin-data 95 | 96 | [#20]: https://github.com/mozillazg/rust-pinyin/pull/20 97 | 98 | [#25]: https://github.com/mozillazg/rust-pinyin/issues/25 99 | 100 | [#27]: https://github.com/mozillazg/rust-pinyin/pull/27 101 | 102 | [#28]: https://github.com/mozillazg/rust-pinyin/pull/28 103 | 104 | [#29]: https://github.com/mozillazg/rust-pinyin/pull/29 105 | 106 | [@LuoZijun]: https://github.com/LuoZijun 107 | 108 | [@hanabi1224]: https://github.com/hanabi1224 109 | 110 | [#36]: https://github.com/mozillazg/rust-pinyin/pull/36 111 | 112 | [#44]: https://github.com/mozillazg/rust-pinyin/pull/44 113 | 114 | [#51]: https://github.com/mozillazg/rust-pinyin/pull/51 115 | 116 | [#52]: https://github.com/mozillazg/rust-pinyin/pull/52 117 | 118 | [0.0.2]: https://github.com/mozillazg/rust-pinyin/compare/v0.0.1...v0.0.2 119 | 120 | [0.0.3]: https://github.com/mozillazg/rust-pinyin/compare/v0.0.2...v0.0.3 121 | 122 | [0.0.4]: https://github.com/mozillazg/rust-pinyin/compare/v0.0.3...v0.0.4 123 | 124 | [0.0.5]: https://github.com/mozillazg/rust-pinyin/compare/v0.0.4...v0.0.5 125 | 126 | [0.0.6]: https://github.com/mozillazg/rust-pinyin/compare/v0.0.5...v0.0.6 127 | 128 | [0.1.0]: https://github.com/mozillazg/rust-pinyin/compare/v0.0.6...v0.1.0 129 | 130 | [0.2.0]: https://github.com/mozillazg/rust-pinyin/compare/v0.1.0...v0.2.0 131 | 132 | [0.3.0]: https://github.com/mozillazg/rust-pinyin/compare/v0.2.0...v0.3.0 133 | 134 | [0.4.0]: https://github.com/mozillazg/rust-pinyin/compare/v0.3.0...v0.4.0 135 | 136 | [0.5.0]: https://github.com/mozillazg/rust-pinyin/compare/v0.4.0...v0.5.0 137 | 138 | [0.6.0]: https://github.com/mozillazg/rust-pinyin/compare/v0.5.0...v0.6.0 139 | 140 | [0.7.0]: https://github.com/mozillazg/rust-pinyin/compare/v0.6.0...v0.7.0 141 | 142 | [0.8.0]: https://github.com/mozillazg/rust-pinyin/compare/v0.7.0...v0.8.0 143 | 144 | [0.9.0]: https://github.com/mozillazg/rust-pinyin/compare/v0.8.0...v0.9.0 145 | 146 | [0.10.0]: https://github.com/mozillazg/rust-pinyin/compare/v0.9.0...v0.10.0 147 | 148 | -------------------------------------------------------------------------------- /src/pinyin.rs: -------------------------------------------------------------------------------- 1 | use crate::data::PINYIN_DATA; 2 | use crate::{get_block_and_index, PinyinData}; 3 | use std::str::Chars; 4 | 5 | /// 单个字符的拼音信息 6 | #[derive(Copy, Clone)] 7 | pub struct Pinyin(pub(crate) &'static PinyinData); 8 | 9 | impl Pinyin { 10 | /// 普通风格,不带声调 11 | /// 12 | /// *仅在启用 `plain` 特性时可用* 13 | /// ``` 14 | /// # use pinyin::*; 15 | /// assert_eq!(to_pinyin_vec("拼音", Pinyin::plain), vec!["pin", "yin"]); 16 | /// ``` 17 | #[cfg(feature = "plain")] 18 | pub fn plain(self) -> &'static str { 19 | self.0.plain 20 | } 21 | 22 | /// 带声调的风格 23 | /// 24 | /// *仅在启用 `with_tone` 特性时可用* 25 | /// ``` 26 | /// # use pinyin::*; 27 | /// assert_eq!(to_pinyin_vec("拼音", Pinyin::with_tone), vec!["pīn", "yīn"]); 28 | /// ``` 29 | #[cfg(feature = "with_tone")] 30 | pub fn with_tone(self) -> &'static str { 31 | self.0.with_tone 32 | } 33 | 34 | /// 声调在各个拼音之后,使用数字 1-4 表示的风格 35 | /// 36 | /// *仅在启用 `with_tone_num` 特性时可用* 37 | /// ``` 38 | /// # use pinyin::*; 39 | /// assert_eq!(to_pinyin_vec("拼音", Pinyin::with_tone_num), vec!["pi1n", "yi1n"]); 40 | /// ``` 41 | #[cfg(feature = "with_tone_num")] 42 | pub fn with_tone_num(self) -> &'static str { 43 | self.0.with_tone_num 44 | } 45 | 46 | /// 声调在拼音最后,使用数字 1-4 表示的风格 47 | /// 48 | /// *仅在启用 `with_tone_num_end` 特性时可用* 49 | /// ``` 50 | /// # use pinyin::*; 51 | /// assert_eq!(to_pinyin_vec("拼音", Pinyin::with_tone_num_end), vec!["pin1", "yin1"]); 52 | /// ``` 53 | #[cfg(feature = "with_tone_num_end")] 54 | pub fn with_tone_num_end(self) -> &'static str { 55 | self.0.with_tone_num_end 56 | } 57 | 58 | /// 首字母风格 59 | /// 60 | /// *仅在启用 `plain` 特性时可用* 61 | /// ``` 62 | /// # use pinyin::*; 63 | /// assert_eq!(to_pinyin_vec("拼音", Pinyin::first_letter), vec!["p", "y"]); 64 | /// assert_eq!(to_pinyin_vec("中国", Pinyin::first_letter), vec!["z", "g"]); 65 | /// assert_eq!(to_pinyin_vec("安心", Pinyin::first_letter), vec!["a", "x"]); 66 | /// ``` 67 | #[cfg(feature = "plain")] 68 | pub fn first_letter(self) -> &'static str { 69 | let ch = self.0.plain.chars().next().unwrap(); 70 | &self.0.plain[..ch.len_utf8()] 71 | } 72 | 73 | #[cfg(feature = "compat")] 74 | pub(crate) fn initials(self) -> &'static str { 75 | &self.0.plain[..self.0.split] 76 | } 77 | 78 | #[cfg(feature = "compat")] 79 | pub(crate) fn finals_plain(self) -> &'static str { 80 | &self.0.plain[self.0.split..] 81 | } 82 | 83 | #[cfg(feature = "compat")] 84 | pub(crate) fn finals_with_tone(self) -> &'static str { 85 | &self.0.with_tone[self.0.split..] 86 | } 87 | 88 | #[cfg(feature = "compat")] 89 | pub(crate) fn finals_with_tone_num(self) -> &'static str { 90 | &self.0.with_tone_num[self.0.split..] 91 | } 92 | } 93 | 94 | /// 用于获取拼音信息的 trait 95 | pub trait ToPinyin { 96 | type Output; 97 | fn to_pinyin(&self) -> Self::Output; 98 | } 99 | 100 | /// ``` 101 | /// # #[cfg(feature = "plain")] { 102 | /// use pinyin::ToPinyin; 103 | /// assert_eq!('拼'.to_pinyin().unwrap().plain(), "pin"); 104 | /// # } 105 | /// ``` 106 | impl ToPinyin for char { 107 | type Output = Option; 108 | 109 | fn to_pinyin(&self) -> Option { 110 | get_block_and_index(*self).and_then(|(block, index)| match usize::from(block.data[index]) { 111 | 0 => None, 112 | idx => Some(Pinyin(&PINYIN_DATA[idx])), 113 | }) 114 | } 115 | } 116 | 117 | /// ``` 118 | /// # #[cfg(feature = "plain")] { 119 | /// use pinyin::{ToPinyin, Pinyin}; 120 | /// let mut iter = "拼音".to_pinyin(); 121 | /// let mut next_plain = || iter.next().and_then(|p| p).map(Pinyin::plain); 122 | /// assert_eq!(next_plain(), Some("pin")); 123 | /// assert_eq!(next_plain(), Some("yin")); 124 | /// assert_eq!(next_plain(), None); 125 | /// # } 126 | /// ``` 127 | impl<'a> ToPinyin for &'a str { 128 | type Output = PinyinStrIter<'a>; 129 | 130 | #[inline] 131 | fn to_pinyin(&self) -> Self::Output { 132 | PinyinStrIter(self.chars()) 133 | } 134 | } 135 | 136 | /// *辅助迭代器*,用于获取字符串的拼音信息 137 | pub struct PinyinStrIter<'a>(Chars<'a>); 138 | 139 | impl<'a> Iterator for PinyinStrIter<'a> { 140 | type Item = Option; 141 | 142 | #[inline] 143 | fn next(&mut self) -> Option { 144 | self.0.next().map(|c| c.to_pinyin()) 145 | } 146 | } 147 | 148 | #[cfg(test)] 149 | mod tests { 150 | use crate::ToPinyin; 151 | 152 | #[test] 153 | fn special_code_point() { 154 | assert!('\u{10FFFF}'.to_pinyin().is_none()); 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /src/pinyin_multi.rs: -------------------------------------------------------------------------------- 1 | use crate::data::{HETERONYM_TABLE, PINYIN_DATA}; 2 | use crate::{get_block_and_index, Pinyin, PinyinData}; 3 | use std::str::Chars; 4 | 5 | /// 单个字符的多音字信息 6 | /// 7 | /// *仅在启用 `heteronym` 特性时可用* 8 | #[derive(Copy, Clone)] 9 | pub struct PinyinMulti { 10 | first: &'static PinyinData, 11 | other_indexes: &'static [u16], 12 | } 13 | 14 | impl PinyinMulti { 15 | /// 对应字符不同发音的数量 16 | pub fn count(self) -> usize { 17 | self.other_indexes.len() + 1 18 | } 19 | 20 | /// 获取指定序号的拼音,如果序号超过总数则 panic 21 | pub fn get(self, idx: usize) -> Pinyin { 22 | self.get_opt(idx).unwrap() 23 | } 24 | 25 | /// 获取指定序号的拼音,如果序号超过总数则返回 `None` 26 | pub fn get_opt(self, idx: usize) -> Option { 27 | if idx == 0 { 28 | return Some(Pinyin(self.first)); 29 | } 30 | self.other_indexes 31 | .get(idx - 1) 32 | .map(|i| Pinyin(&PINYIN_DATA[usize::from(*i)])) 33 | } 34 | } 35 | 36 | impl IntoIterator for PinyinMulti { 37 | type Item = Pinyin; 38 | type IntoIter = PinyinMultiIter; 39 | 40 | fn into_iter(self) -> PinyinMultiIter { 41 | PinyinMultiIter { 42 | inner: self, 43 | index: 0, 44 | } 45 | } 46 | } 47 | 48 | /// *辅助迭代器*,用于迭代一个字的多个拼音 49 | pub struct PinyinMultiIter { 50 | inner: PinyinMulti, 51 | index: usize, 52 | } 53 | 54 | impl Iterator for PinyinMultiIter { 55 | type Item = Pinyin; 56 | 57 | fn next(&mut self) -> Option { 58 | self.inner.get_opt(self.index).inspect(|_pinyin| { 59 | self.index += 1; 60 | }) 61 | } 62 | } 63 | 64 | /// 用于获取多音字信息的 trait 65 | /// 66 | /// *仅在启用 `heteronym` 特性时可用* 67 | pub trait ToPinyinMulti { 68 | type Output; 69 | fn to_pinyin_multi(&self) -> Self::Output; 70 | } 71 | 72 | /// ``` 73 | /// # #[cfg(feature = "with_tone")] { 74 | /// use pinyin::{Pinyin, ToPinyinMulti}; 75 | /// let mut iter = '还'.to_pinyin_multi().unwrap().into_iter(); 76 | /// let mut next_pinyin = || iter.next().map(Pinyin::with_tone); 77 | /// assert_eq!(next_pinyin(), Some("hái")); 78 | /// assert_eq!(next_pinyin(), Some("huán")); 79 | /// assert_eq!(next_pinyin(), Some("fú")); 80 | /// assert_eq!(next_pinyin(), None); 81 | /// # } 82 | /// ``` 83 | impl ToPinyinMulti for char { 84 | type Output = Option; 85 | 86 | fn to_pinyin_multi(&self) -> Option { 87 | get_block_and_index(*self).and_then(|(block, index)| { 88 | let first = match usize::from(block.data[index]) { 89 | 0 => return None, 90 | idx => &PINYIN_DATA[idx], 91 | }; 92 | let idx = usize::from(block.heteronym[index]); 93 | let other_indexes = HETERONYM_TABLE[idx]; 94 | Some(PinyinMulti { 95 | first, 96 | other_indexes, 97 | }) 98 | }) 99 | } 100 | } 101 | 102 | /// ``` 103 | /// # #[cfg(feature = "with_tone")] { 104 | /// use pinyin::{Pinyin, ToPinyinMulti}; 105 | /// let mut iter = "还没".to_pinyin_multi(); 106 | /// let mut next_heteronym = || { 107 | /// iter.next() 108 | /// .and_then(|m| m) 109 | /// .map(|m| m.into_iter().map(Pinyin::with_tone).collect::>()) 110 | /// }; 111 | /// assert_eq!(next_heteronym(), Some(vec!["hái", "huán", "fú"])); 112 | /// assert_eq!(next_heteronym(), Some(vec!["méi", "mò", "me"])); 113 | /// assert_eq!(next_heteronym(), None); 114 | /// # } 115 | /// ``` 116 | impl<'a> ToPinyinMulti for &'a str { 117 | type Output = PinyinMultiStrIter<'a>; 118 | 119 | #[inline] 120 | fn to_pinyin_multi(&self) -> Self::Output { 121 | PinyinMultiStrIter(self.chars()) 122 | } 123 | } 124 | 125 | /// *辅助迭代器*,用于获取字符串的多音字信息 126 | pub struct PinyinMultiStrIter<'a>(Chars<'a>); 127 | 128 | impl<'a> Iterator for PinyinMultiStrIter<'a> { 129 | type Item = Option; 130 | 131 | #[inline] 132 | fn next(&mut self) -> Option { 133 | self.0.next().map(|c| c.to_pinyin_multi()) 134 | } 135 | } 136 | 137 | #[cfg(test)] 138 | mod tests { 139 | #[cfg(feature = "with_tone")] 140 | use crate::Pinyin; 141 | use crate::{PinyinMulti, ToPinyinMulti}; 142 | 143 | fn zi() -> PinyinMulti { 144 | '子'.to_pinyin_multi().expect("no pinyin?") 145 | } 146 | 147 | #[test] 148 | fn pinyin_multi_count() { 149 | assert_eq!(zi().count(), 2); 150 | } 151 | 152 | #[test] 153 | #[cfg(feature = "with_tone")] 154 | fn pinyin_multi_get_opt() { 155 | assert_eq!(zi().get_opt(0).map(Pinyin::with_tone), Some("zi")); 156 | assert_eq!(zi().get_opt(1).map(Pinyin::with_tone), Some("zǐ")); 157 | assert_eq!(zi().get_opt(2).map(Pinyin::with_tone), None); 158 | } 159 | 160 | #[test] 161 | #[cfg(feature = "with_tone")] 162 | fn pinyin_multi_get() { 163 | assert_eq!(zi().get(0).with_tone(), "zi"); 164 | assert_eq!(zi().get(1).with_tone(), "zǐ"); 165 | } 166 | 167 | #[test] 168 | #[should_panic] 169 | fn pinyin_multi_get_panic() { 170 | zi().get(2); 171 | } 172 | 173 | #[test] 174 | #[cfg(feature = "with_tone")] 175 | fn pinyin_multi_iter() { 176 | let mut iter = zi().into_iter(); 177 | assert_eq!(iter.next().map(Pinyin::with_tone), Some("zi")); 178 | assert_eq!(iter.next().map(Pinyin::with_tone), Some("zǐ")); 179 | assert_eq!(iter.next().map(Pinyin::with_tone), None); 180 | } 181 | 182 | #[test] 183 | #[cfg(feature = "with_tone")] 184 | fn str_to_pinyin_multi() { 185 | let actual = "还没" 186 | .to_pinyin_multi() 187 | .map(|multi| { 188 | multi 189 | .unwrap() 190 | .into_iter() 191 | .map(Pinyin::with_tone) 192 | .collect::>() 193 | }) 194 | .collect::>(); 195 | let expected = vec![vec!["hái", "huán", "fú"], vec!["méi", "mò", "me"]]; 196 | assert_eq!(actual, expected); 197 | } 198 | } 199 | -------------------------------------------------------------------------------- /tests/compat.rs: -------------------------------------------------------------------------------- 1 | #![allow(deprecated)] 2 | 3 | extern crate pinyin; 4 | 5 | pub struct TestCase { 6 | pub hans: String, 7 | pub args: pinyin::Args, 8 | pub result: Vec>, 9 | pub lazy_result: Vec, 10 | } 11 | 12 | impl TestCase { 13 | pub fn new( 14 | hans: String, 15 | args: pinyin::Args, 16 | result: Vec>, 17 | lazy_result: Vec, 18 | ) -> TestCase { 19 | TestCase { 20 | hans, 21 | args, 22 | result, 23 | lazy_result, 24 | } 25 | } 26 | 27 | pub fn new2( 28 | hans: String, 29 | args: pinyin::Args, 30 | result: Vec>, 31 | lazy_result: Vec<&str>, 32 | ) -> TestCase { 33 | TestCase { 34 | hans, 35 | args, 36 | result: result 37 | .into_iter() 38 | .map(|vec| vec.into_iter().map(String::from).collect()) 39 | .collect(), 40 | lazy_result: lazy_result.into_iter().map(String::from).collect(), 41 | } 42 | } 43 | } 44 | 45 | #[test] 46 | fn test_pinyin() { 47 | let test_data = vec![ 48 | TestCase::new( 49 | "中国人".to_string(), 50 | pinyin::Args::new(), 51 | vec![ 52 | vec!["zhong".to_string()], 53 | vec!["guo".to_string()], 54 | vec!["ren".to_string()], 55 | ], 56 | vec!["zhong".to_string(), "guo".to_string(), "ren".to_string()], 57 | ), 58 | TestCase::new( 59 | "中国人".to_string(), 60 | pinyin::Args { 61 | style: pinyin::Style::Normal, 62 | heteronym: false, 63 | }, 64 | vec![ 65 | vec!["zhong".to_string()], 66 | vec!["guo".to_string()], 67 | vec!["ren".to_string()], 68 | ], 69 | vec!["zhong".to_string(), "guo".to_string(), "ren".to_string()], 70 | ), 71 | TestCase::new( 72 | "中国人".to_string(), 73 | pinyin::Args { 74 | style: pinyin::Style::Tone, 75 | heteronym: false, 76 | }, 77 | vec![ 78 | vec!["zhōng".to_string()], 79 | vec!["guó".to_string()], 80 | vec!["rén".to_string()], 81 | ], 82 | vec!["zhōng".to_string(), "guó".to_string(), "rén".to_string()], 83 | ), 84 | TestCase::new( 85 | "中国人".to_string(), 86 | pinyin::Args { 87 | style: pinyin::Style::Tone2, 88 | heteronym: false, 89 | }, 90 | vec![ 91 | vec!["zho1ng".to_string()], 92 | vec!["guo2".to_string()], 93 | vec!["re2n".to_string()], 94 | ], 95 | vec!["zho1ng".to_string(), "guo2".to_string(), "re2n".to_string()], 96 | ), 97 | TestCase::new( 98 | "中国人".to_string(), 99 | pinyin::Args { 100 | style: pinyin::Style::Initials, 101 | heteronym: false, 102 | }, 103 | vec![ 104 | vec!["zh".to_string()], 105 | vec!["g".to_string()], 106 | vec!["r".to_string()], 107 | ], 108 | vec!["zh".to_string(), "g".to_string(), "r".to_string()], 109 | ), 110 | TestCase::new( 111 | "中国人".to_string(), 112 | pinyin::Args { 113 | style: pinyin::Style::FirstLetter, 114 | heteronym: false, 115 | }, 116 | vec![ 117 | vec!["z".to_string()], 118 | vec!["g".to_string()], 119 | vec!["r".to_string()], 120 | ], 121 | vec!["z".to_string(), "g".to_string(), "r".to_string()], 122 | ), 123 | TestCase::new( 124 | "中国人".to_string(), 125 | pinyin::Args { 126 | style: pinyin::Style::Finals, 127 | heteronym: false, 128 | }, 129 | vec![ 130 | vec!["ong".to_string()], 131 | vec!["uo".to_string()], 132 | vec!["en".to_string()], 133 | ], 134 | vec!["ong".to_string(), "uo".to_string(), "en".to_string()], 135 | ), 136 | TestCase::new( 137 | "中国人".to_string(), 138 | pinyin::Args { 139 | style: pinyin::Style::FinalsTone, 140 | heteronym: false, 141 | }, 142 | vec![ 143 | vec!["ōng".to_string()], 144 | vec!["uó".to_string()], 145 | vec!["én".to_string()], 146 | ], 147 | vec!["ōng".to_string(), "uó".to_string(), "én".to_string()], 148 | ), 149 | TestCase::new( 150 | "中国人".to_string(), 151 | pinyin::Args { 152 | style: pinyin::Style::FinalsTone2, 153 | heteronym: false, 154 | }, 155 | vec![ 156 | vec!["o1ng".to_string()], 157 | vec!["uo2".to_string()], 158 | vec!["e2n".to_string()], 159 | ], 160 | vec!["o1ng".to_string(), "uo2".to_string(), "e2n".to_string()], 161 | ), 162 | TestCase::new2( 163 | "中国人".to_string(), 164 | pinyin::Args { 165 | style: pinyin::Style::Normal, 166 | heteronym: true, 167 | }, 168 | vec![vec!["zhong"], vec!["guo"], vec!["ren"]], 169 | vec!["zhong", "guo", "ren"], 170 | ), 171 | TestCase::new2( 172 | "阿拉巴".to_string(), 173 | pinyin::Args { 174 | style: pinyin::Style::Normal, 175 | heteronym: true, 176 | }, 177 | vec![vec!["a", "e"], vec!["la"], vec!["ba"]], 178 | vec!["a", "la", "ba"], 179 | ), 180 | ]; 181 | for data in &test_data { 182 | assert_eq!(data.result, pinyin::pinyin(&data.hans, &data.args)); 183 | assert_eq!( 184 | data.lazy_result, 185 | pinyin::lazy_pinyin(&data.hans, &data.args) 186 | ); 187 | } 188 | } 189 | 190 | #[test] 191 | fn test_non_chinese_pinyin() { 192 | let hans = "中国人abc你好"; 193 | let expect = vec![ 194 | vec!["zhong".to_string()], 195 | vec!["guo".to_string()], 196 | vec!["ren".to_string()], 197 | vec![], 198 | vec![], 199 | vec![], 200 | vec!["ni".to_string()], 201 | vec!["hao".to_string()], 202 | ]; 203 | let result = pinyin::pinyin(hans, &pinyin::Args::new()); 204 | assert_eq!(expect, result); 205 | } 206 | 207 | #[test] 208 | fn test_non_chinese_lazy_pinyin() { 209 | let hans = "中国人abc你好"; 210 | let expect = vec![ 211 | "zhong".to_string(), 212 | "guo".to_string(), 213 | "ren".to_string(), 214 | "ni".to_string(), 215 | "hao".to_string(), 216 | ]; 217 | let result = pinyin::lazy_pinyin(hans, &pinyin::Args::new()); 218 | assert_eq!(expect, result); 219 | } 220 | 221 | #[test] 222 | fn test_new_args() { 223 | let args = pinyin::Args::new(); 224 | assert_eq!(pinyin::Style::Normal, args.style); 225 | assert!(!args.heteronym); 226 | 227 | let expected = pinyin::Args { 228 | style: pinyin::Style::Normal, 229 | heteronym: false, 230 | }; 231 | assert_eq!(expected, args); 232 | } 233 | 234 | #[test] 235 | fn test_default_args() { 236 | let args: pinyin::Args = Default::default(); 237 | assert_eq!(pinyin::Style::Normal, args.style); 238 | assert!(!args.heteronym); 239 | 240 | let args = pinyin::Args::default(); 241 | assert_eq!(pinyin::Style::Normal, args.style); 242 | assert!(!args.heteronym); 243 | 244 | let expected = pinyin::Args { 245 | style: pinyin::Style::Normal, 246 | heteronym: false, 247 | }; 248 | assert_eq!(expected, args); 249 | } 250 | 251 | #[test] 252 | fn test_no_initial() { 253 | let hans = "安"; 254 | let mut expect = vec!["an".to_string()]; 255 | let mut result = pinyin::lazy_pinyin(hans, &pinyin::Args::new()); 256 | assert_eq!(expect, result); 257 | 258 | expect = vec!["an".to_string()]; 259 | result = pinyin::lazy_pinyin( 260 | hans, 261 | &pinyin::Args { 262 | style: pinyin::Style::Finals, 263 | heteronym: false, 264 | }, 265 | ); 266 | assert_eq!(expect, result); 267 | } 268 | 269 | #[test] 270 | fn test_no_phonetic_symbol() { 271 | let hans = "啊"; 272 | let mut expect = vec!["a".to_string()]; 273 | let mut result = pinyin::lazy_pinyin(hans, &pinyin::Args::new()); 274 | assert_eq!(expect, result); 275 | 276 | expect = vec!["a".to_string()]; 277 | result = pinyin::lazy_pinyin( 278 | hans, 279 | &pinyin::Args { 280 | style: pinyin::Style::Finals, 281 | heteronym: false, 282 | }, 283 | ); 284 | assert_eq!(expect, result); 285 | } 286 | --------------------------------------------------------------------------------