├── .gitignore ├── test ├── Words.dat ├── test_helper.rb └── chinese_pinyin_test.rb ├── lib ├── chinese_pinyin │ └── version.rb └── chinese_pinyin.rb ├── Gemfile ├── CHANGELOG.md ├── .github └── workflows │ └── chinese_pinyin.yml ├── chinese_pinyin.gemspec ├── MIT-LICENSE ├── Rakefile ├── bin └── ch2py └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | .rvmrc 3 | Gemfile.lock 4 | -------------------------------------------------------------------------------- /test/Words.dat: -------------------------------------------------------------------------------- 1 | 广州|guang zhou 2 | 上海|ShANg4 hAi3 3 | -------------------------------------------------------------------------------- /lib/chinese_pinyin/version.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module ChinesePinyin 4 | VERSION = "1.1.0" 5 | end 6 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | gemspec 4 | 5 | gem 'rake' 6 | gem 'test-unit' # ruby 2.1.5以后版本的标准库中不包含'test-unit' 7 | -------------------------------------------------------------------------------- /test/test_helper.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding : utf-8 -*- 2 | require 'rubygems' 3 | require 'test/unit' 4 | require File.join(File.dirname(__FILE__) + '/../lib/chinese_pinyin') 5 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Next Release 2 | 3 | ## 1.1.0 (04/19/2021) 4 | 5 | * support ruby 3 6 | 7 | ## 1.0.2 (08/19/2019) 8 | 9 | * add 嗯 to pinyin-utf8.dat 10 | 11 | ## 1.0.1 12 | 13 | * force utf8 14 | 15 | ## 1.0.0 (06/23/2015) 16 | 17 | * add executable cmd ch2py 18 | * fix tone marks 19 | -------------------------------------------------------------------------------- /.github/workflows/chinese_pinyin.yml: -------------------------------------------------------------------------------- 1 | name: Chinese Pinyin 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - 'master' 7 | 8 | push: 9 | branches: 10 | - 'ruby3-support' 11 | - 'master' 12 | 13 | jobs: 14 | build: 15 | runs-on: ubuntu-latest 16 | strategy: 17 | matrix: 18 | ruby: ['2.1', '2.5', '2.6', '2.7', '3.0'] 19 | steps: 20 | - uses: actions/checkout@v1 21 | - uses: ruby/setup-ruby@v1 22 | with: 23 | ruby-version: ${{ matrix.ruby }} 24 | - name: Build and test with Rake 25 | run: | 26 | rake test 27 | -------------------------------------------------------------------------------- /chinese_pinyin.gemspec: -------------------------------------------------------------------------------- 1 | lib = File.expand_path('../lib/', __FILE__) 2 | $:.unshift lib unless $:.include?(lib) 3 | 4 | require "chinese_pinyin/version" 5 | 6 | Gem::Specification.new do |s| 7 | s.name = "chinese_pinyin" 8 | s.version = ChinesePinyin::VERSION 9 | s.license = "MIT" 10 | s.platform = Gem::Platform::RUBY 11 | s.authors = ["Richard Huang", "Hong, Liang"] 12 | s.email = ["flyerhzm@gmail.com", "hongliang@bamajia.com"] 13 | s.homepage = "http://github.com/flyerhzm/chinese_pinyin" 14 | s.summary = "translate chinese hanzi to pinyin." 15 | s.description = "translate chinese hanzi to pinyin." 16 | 17 | s.required_rubygems_version = ">= 1.3.6" 18 | 19 | s.files = `git ls-files`.split("\n") 20 | s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n") 21 | s.require_paths = ["lib"] 22 | 23 | s.executables = ["ch2py"] 24 | end 25 | -------------------------------------------------------------------------------- /MIT-LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010 - 2013 Richard Huang (flyerhzm@gmail.com) 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | $LOAD_PATH.unshift File.expand_path("../lib", __FILE__) 2 | 3 | require 'bundler/setup' 4 | require 'rake/testtask' 5 | require "rdoc/task" 6 | 7 | require "chinese_pinyin/version" 8 | 9 | task :build do 10 | system "gem build chinese_pinyin.gemspec" 11 | end 12 | 13 | task :install => :build do 14 | system "sudo gem install chinese_pinyin-#{ChinesePinyin::VERSION}.gem" 15 | end 16 | 17 | task :release => :build do 18 | puts "Tagging #{ChinesePinyin::VERSION}..." 19 | system "git tag -a #{ChinesePinyin::VERSION} -m 'Tagging #{ChinesePinyin::VERSION}'" 20 | puts "Pushing to Github..." 21 | system "git push --tags" 22 | puts "Pushing to rubygems.org..." 23 | system "gem push chinese_pinyin-#{ChinesePinyin::VERSION}.gem" 24 | end 25 | 26 | desc 'Default: run unit tests.' 27 | task :default => :test 28 | 29 | desc 'Test the chinese_pinyin plugin.' 30 | Rake::TestTask.new(:test) do |t| 31 | t.libs << 'lib' 32 | t.libs << 'test' 33 | t.pattern = 'test/**/*_test.rb' 34 | t.verbose = true 35 | end 36 | 37 | desc 'Generate documentation for the chinese_pinyin plugin.' 38 | Rake::RDocTask.new(:rdoc) do |rdoc| 39 | rdoc.rdoc_dir = 'rdoc' 40 | rdoc.title = 'ChinesePinyin' 41 | rdoc.options << '--line-numbers' << '--inline-source' 42 | rdoc.rdoc_files.include('README') 43 | rdoc.rdoc_files.include('lib/**/*.rb') 44 | end 45 | -------------------------------------------------------------------------------- /bin/ch2py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require 'optparse' 4 | require File.expand_path('../../lib/chinese_pinyin', __FILE__) 5 | 6 | opts = {} 7 | 8 | parser = OptParse.new do |opt| 9 | opt.banner = "Usage: ch2py [opts]" 10 | 11 | opt.on('-c', '--camelcase', 'Camelcase of each word') do 12 | opts[:camelcase] = true 13 | end 14 | 15 | opt.on('-i', '--stdin', 'Read from stdard input') do 16 | opts[:stdin] = true 17 | end 18 | 19 | opt.on('-t', '--tone', 'Show tone at end of word') do 20 | opts[:tone] = true 21 | end 22 | 23 | opt.on('-m', '--tonemarks', 'Show tone at top of letter, this would cover -t option') do 24 | opts[:tonemarks] = true 25 | end 26 | 27 | opt.on('-s', '--splitter ', 'Splitter of each word, use a space by default') do |sp| 28 | opts[:splitter] = sp 29 | end 30 | 31 | opt.on('-v', '--version', 'Show version') do 32 | puts "ch2py: Version #{ChinesePinyin::VERSION}" 33 | exit 34 | end 35 | 36 | opt.on('-h', '--help', 'Show this help') do 37 | puts parser 38 | exit 39 | end 40 | end 41 | 42 | parser.parse! 43 | 44 | args = parser.default_argv 45 | 46 | if opts.fetch(:stdin, false) 47 | chars = STDIN.readline 48 | elsif args.empty? 49 | raise OptParse::MissingArgument 50 | else 51 | chars = args.join('') 52 | end 53 | 54 | STDOUT.puts Pinyin.t(chars, opts) 55 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ChinesePinyin 2 | 3 | [![Chinese Pinyin](https://github.com/flyerhzm/chinese_pinyin/actions/workflows/chinese_pinyin.yml/badge.svg)](https://github.com/flyerhzm/chinese_pinyin/actions/workflows/chinese_pinyin.yml) 4 | 5 | Translate chinese hanzi to pinyin. 6 | 7 | The dict is borrowed from 8 | 9 | ## Install 10 | 11 | ``` 12 | gem install chinese_pinyin 13 | ``` 14 | 15 | or add in Gemfile. 16 | 17 | ``` 18 | gem 'chinese_pinyin' 19 | ``` 20 | 21 | ## Usage 22 | 23 | By CLI 24 | 25 | 26 | ``` 27 | $ ch2py -h 28 | Usage: ch2py [opts] 29 | -c, --camelcase Camelcase of each word 30 | -i, --stdin Read from stdard input 31 | -t, --tone Show tone at end of word 32 | -m, --tonemarks Show tone at top of letter, this 33 | would cover -t option 34 | -s, --splitter Splitter of each word, use a space 35 | by default 36 | -v, --version Show version 37 | -h, --help Show this help 38 | 39 | $ ch2py 中文 40 | zhong wen 41 | ``` 42 | 43 | By code 44 | 45 | ``` 46 | require 'chinese_pinyin' 47 | 48 | Pinyin.t('中国') => "zhong guo" 49 | Pinyin.t('你好world') => "ni hao world" 50 | Pinyin.t('中国', splitter: '-') => "zhong-guo" 51 | Pinyin.t('中国', splitter: '') => "zhongguo" 52 | Pinyin.t('中国', tone: true) => "zhong1 guo2" 53 | Pinyin.t('中国', tonemarks: true) => "zhōng guó" 54 | Pinyin.t('北京') { |letters| letters[0].upcase } => 'BJ' 55 | Pinyin.t('北京') { |letters, i| letters[0].upcase if i == 0 } => 'B' 56 | ``` 57 | 58 | ## Polyphone Issue 59 | 60 | use Words.dat to override default behavior. 61 | 62 | by default 63 | 64 | ``` 65 | Pinyin.t('广州') => "yan zhou" 66 | ``` 67 | 68 | add file Words.dat 69 | 70 | ``` 71 | 广州|guang3 zhou1 72 | ``` 73 | 74 | set `ENV['WORDS_FILE']` for Words.dat 75 | 76 | ``` 77 | ENV['WORDS_FILE'] = "Words.dat path" 78 | Pinyin.t('广州') => "guang zhou" 79 | ``` 80 | -------------------------------------------------------------------------------- /test/chinese_pinyin_test.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding : utf-8 -*- 2 | 3 | require File.expand_path(File.join(File.dirname(__FILE__), 'test_helper')) 4 | 5 | ENV["WORDS_FILE"] = File.dirname(__FILE__) + '/Words.dat' 6 | 7 | class PinyinTest < Test::Unit::TestCase 8 | def test_t 9 | assert_equal("zhong guo", Pinyin.t('中国')) 10 | assert_equal("zhong guo english ri", Pinyin.t('中国english日')) 11 | assert_equal("shang hai very good o ye", Pinyin.t('上海very good哦耶')) 12 | end 13 | 14 | def test_t_with_frozen_string 15 | assert_equal("zhong guo", Pinyin.t('中国'.freeze)) 16 | assert_equal("shen zhen", Pinyin.t('深圳'.freeze)) 17 | end 18 | 19 | def test_t_with_splitter 20 | assert_equal("zhong-guo", Pinyin.t('中国', splitter: '-')) 21 | assert_equal("huangzhimin", Pinyin.t('黄志敏', splitter: '')) 22 | assert_equal("guang-zhou", Pinyin.t('广州', splitter: '-')) 23 | assert_equal("shang-hai", Pinyin.t('上海', splitter: '-')) 24 | end 25 | 26 | def test_t_with_tone 27 | assert_equal("zhong1 guo2", Pinyin.t('中国', tone: true)) 28 | assert_equal("huang2 zhi4 min3", Pinyin.t('黄志敏', tone: true)) 29 | assert_equal("shang4 hai3", Pinyin.t('上海', tone: true)) 30 | end 31 | 32 | def test_t_with_camelcase 33 | assert_equal("Zhong Guo", Pinyin.t('中国', camelcase: true)) 34 | assert_equal("Huang Zhi Min", Pinyin.t('黄志敏', camelcase: true)) 35 | assert_equal("Zhong1 Guo2", Pinyin.t('中国', camelcase: true, tone: true)) 36 | assert_equal("Huang2 Zhi4 Min3", Pinyin.t('黄志敏', camelcase: true, tone: true)) 37 | assert_equal("Zhong-Guo", Pinyin.t('中国', camelcase: true, splitter: '-')) 38 | assert_equal("HuangZhiMin", Pinyin.t('黄志敏', camelcase: true, splitter: '')) 39 | assert_equal("Guang-Zhou", Pinyin.t('广州', camelcase: true, splitter: '-')) 40 | assert_equal("Shang-Hai", Pinyin.t('上海', camelcase: true, splitter: '-')) 41 | assert_equal("Shang4-Hai3", Pinyin.t('上海', camelcase: true, tone:true, splitter: '-')) 42 | end 43 | 44 | def test_t_with_chinese_punctuation 45 | assert_equal("ce-shi-yi-xia-Think-diff", Pinyin.t('测试一下,Think diff', splitter: '-')) 46 | end 47 | 48 | def test_t_with_tonemarks 49 | assert_equal('zhōng guó', Pinyin.t('中国', tonemarks: true)) 50 | assert_equal('běi jīng', Pinyin.t('北京', tonemarks: true)) 51 | end 52 | 53 | def test_t_with_custom 54 | assert_equal('BJ', Pinyin.t('北京') { |letters| letters[0].upcase } ) 55 | assert_equal('B', Pinyin.t('北京') { |letters, i| letters[0].upcase if i == 0 } ) 56 | end 57 | end 58 | -------------------------------------------------------------------------------- /lib/chinese_pinyin.rb: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | $KCODE = 'u' if RUBY_VERSION =~ /1\.8/ 3 | 4 | require 'chinese_pinyin/version' 5 | 6 | class Pinyin 7 | 8 | TONE_MARK = { 9 | a: %w(ā á ǎ à a), 10 | o: %w(ō ó ǒ ò o), 11 | e: %w(ē é ě è e), 12 | i: %w(ī í ǐ ì i), 13 | u: %w(ū ú ǔ ù u), 14 | v: %w(ǖ ǘ ǚ ǜ ü) 15 | } 16 | 17 | class <