├── setup.cfg ├── bopomofo ├── __version__.py ├── dictionrary.py └── __init__.py ├── MANIFEST.in ├── .travis.yml ├── LICENSE ├── .gitignore ├── setup.py ├── README.rst └── tests └── test_main.py /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal=1 3 | -------------------------------------------------------------------------------- /bopomofo/__version__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.1.2' 2 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.rst 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.7" 4 | - "3.8" 5 | - "pypy3" 6 | install: 7 | - pip install . 8 | - pip install codecov 9 | script: coverage run --source bopomofo -m py.test 10 | after_success: 11 | - codecov 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Anthony Fu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.bat 2 | *.ipynb 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | env/ 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *,cover 49 | .hypothesis/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # IPython Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # dotenv 82 | .env 83 | 84 | # virtualenv 85 | venv/ 86 | ENV/ 87 | 88 | # Spyder project settings 89 | .spyderproject 90 | 91 | # Rope project settings 92 | .ropeproject 93 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import codecs 4 | from setuptools import setup 5 | 6 | def read(fname): 7 | try: 8 | with codecs.open(fname, 'r', 'utf-8') as f: 9 | return f.read() 10 | except IOError: 11 | return '' 12 | 13 | version=read(os.path.join('bopomofo','__version__.py')).strip().split('=')[-1].strip("' ") 14 | 15 | setup(name='bopomofo', 16 | version=version, 17 | description='Translate chinese word to bopomofo', 18 | url='https://github.com/antfu/bopomofo', 19 | author='Anthony Fu', 20 | author_email='anthonyfu117@hotmail.com', 21 | license='MIT', 22 | long_description=read('README.rst'), 23 | packages=['bopomofo'], 24 | install_requires=['xpinyin'], 25 | 26 | # See https://pypi.python.org/pypi?%3Aaction=list_classifiers 27 | classifiers=[ 28 | # 3 - Alpha 29 | # 4 - Beta 30 | # 5 - Production/Stable 31 | 'Development Status :: 4 - Beta', 32 | 'Intended Audience :: Developers', 33 | 'Operating System :: OS Independent', 34 | 'Topic :: Software Development :: Libraries :: Python Modules', 35 | 36 | 'License :: OSI Approved :: MIT License', 37 | 38 | 'Programming Language :: Python :: 2', 39 | 'Programming Language :: Python :: 2.7', 40 | 'Programming Language :: Python :: 3', 41 | 'Programming Language :: Python :: 3.4', 42 | 'Programming Language :: Python :: 3.5' 43 | ],) 44 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Bopomofo 2 | ========== 3 | .. image:: https://img.shields.io/travis/antfu/bopomofo.svg?style=flat-square 4 | :target: https://travis-ci.org/antfu/bopomofo 5 | 6 | .. image:: https://img.shields.io/codecov/c/github/antfu/bopomofo.svg?style=flat-square 7 | :target: https://codecov.io/gh/antfu/bopomofo 8 | 9 | .. image:: https://img.shields.io/codacy/grade/c5ae3c7ed15f4b388218f83cda6782f9.svg?style=flat-square 10 | :target: https://www.codacy.com/app/anthonyfu117/bopomofo 11 | 12 | .. image:: https://img.shields.io/pypi/v/bopomofo.svg?style=flat-square 13 | :target: https://pypi.python.org/pypi/bopomofo 14 | 15 | .. image:: https://img.shields.io/pypi/pyversions/bopomofo.svg?style=flat-square 16 | :target: https://pypi.python.org/pypi/bopomofo 17 | 18 | .. image:: https://img.shields.io/pypi/status/bopomofo.svg?style=flat-square 19 | :target: https://pypi.python.org/pypi/bopomofo 20 | 21 | .. image:: https://img.shields.io/pypi/l/bopomofo.svg?style=flat-square 22 | :target: https://github.com/antfu/bopomofo/blob/master/LICENSE 23 | 24 | 25 | Translate 漢字 to ㄅㄆㄇㄈ, based on `lxneng/xpinyin `_. 26 | 27 | Install 28 | ---------- 29 | 30 | :: 31 | 32 | pip install bopomofo 33 | 34 | 35 | Usage 36 | ---------- 37 | 38 | .. code-block:: python 39 | 40 | >>> from bopomofo import to_bopomofo 41 | >>> to_bopomofo(u'注音') 42 | 'ㄓㄨˋ ㄧㄣ' 43 | 44 | >>> to_bopomofo(u'注音', tones=False) 45 | 'ㄓㄨ ㄧㄣ' 46 | 47 | >>> to_bopomofo(u'注音', u'、') 48 | 'ㄓㄨˋ、ㄧㄣ' 49 | 50 | >>> bopomofo.to_bopomofo(u'注音', first_tone_symbol=True) 51 | 'ㄓㄨˋ ㄧㄣˉ' 52 | -------------------------------------------------------------------------------- /bopomofo/dictionrary.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | pinyin_bopomofo = { 3 | 'vacabulary': u'ㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄐㄑㄒㄓㄔㄕㄖㄗㄘㄙㄚㄛㄜㄝㄞㄟㄠㄡㄢㄣㄤㄥㄦㄧㄨㄩ', 4 | 'consonants': [ 5 | ('b', u'ㄅ'), 6 | ('p', u'ㄆ'), 7 | ('m', u'ㄇ'), 8 | ('f', u'ㄈ'), 9 | ('d', u'ㄉ'), 10 | ('t', u'ㄊ'), 11 | ('n', u'ㄋ'), 12 | ('l', u'ㄌ'), 13 | ('g', u'ㄍ'), 14 | ('k', u'ㄎ'), 15 | ('h', u'ㄏ'), 16 | ('j', u'ㄐ'), 17 | ('q', u'ㄑ'), 18 | ('x', u'ㄒ'), 19 | ('zh', u'ㄓ'), 20 | ('ch', u'ㄔ'), 21 | ('sh', u'ㄕ'), 22 | ('r', u'ㄖ'), 23 | ('z', u'ㄗ'), 24 | ('c', u'ㄘ'), 25 | ('s', u'ㄙ') 26 | ], 27 | 'vowels': [ 28 | ('a', u'ㄚ'), 29 | ('o', u'ㄛ'), 30 | ('e', u'ㄜ'), 31 | ('ê', u'ㄝ'), 32 | ('ai', u'ㄞ'), 33 | ('ei', u'ㄟ'), 34 | ('ao', u'ㄠ'), 35 | ('ou', u'ㄡ'), 36 | ('an', u'ㄢ'), 37 | ('en', u'ㄣ'), 38 | ('ang', u'ㄤ'), 39 | ('eng', u'ㄥ'), 40 | ('er', u'ㄦ'), 41 | ('i', u'ㄧ'), 42 | ('ia', u'ㄧㄚ'), 43 | ('ie', u'ㄧㄝ'), 44 | ('iao', u'ㄧㄠ'), 45 | ('iu', u'ㄧㄡ'), 46 | ('ian', u'ㄧㄢ'), 47 | ('in', u'ㄧㄣ'), 48 | ('iang', u'ㄧㄤ'), 49 | ('ing', u'ㄧㄥ'), 50 | ('u', u'ㄨ'), 51 | ('ua', u'ㄨㄚ'), 52 | ('uo', u'ㄨㄛ'), 53 | ('uai', u'ㄨㄞ'), 54 | ('ui', u'ㄨㄟ'), 55 | ('uan', u'ㄨㄢ'), 56 | ('un', u'ㄨㄣ'), 57 | ('uang', u'ㄨㄤ'), 58 | ('ong', u'ㄨㄥ'), 59 | ('v', u'ㄩ'), 60 | ('ve', u'ㄩㄝ'), 61 | ('van', u'ㄩㄢ'), 62 | ('vn', u'ㄩㄣ'), 63 | ('iong', u'ㄩㄥ'), 64 | ], 65 | 'special': [ 66 | ('ai', u'ㄞ'), 67 | ('ei', u'ㄟ'), 68 | ('e', u'ㄟ'), 69 | ('zhi', u'ㄓ'), 70 | ('chi', u'ㄔ'), 71 | ('shi', u'ㄕ'), 72 | ('ri', u'ㄖ'), 73 | ('zi', u'ㄗ'), 74 | ('ci', u'ㄘ'), 75 | ('si', u'ㄙ'), 76 | ('yi', u'ㄧ'), 77 | ('ya', u'ㄧㄚ'), 78 | ('ye', u'ㄧㄝ'), 79 | ('yao', u'ㄧㄠ'), 80 | ('you', u'ㄧㄡ'), 81 | ('yan', u'ㄧㄢ'), 82 | ('yin', u'ㄧㄣ'), 83 | ('yang', u'ㄧㄤ'), 84 | ('ying', u'ㄧㄥ'), 85 | ('wu', u'ㄨ'), 86 | ('wa', u'ㄨㄚ'), 87 | ('wo', u'ㄨㄛ'), 88 | ('wai', u'ㄨㄞ'), 89 | ('wei', u'ㄨㄟ'), 90 | ('wan', u'ㄨㄢ'), 91 | ('wen', u'ㄨㄣ'), 92 | ('wang', u'ㄨㄤ'), 93 | ('weng', u'ㄨㄥ'), 94 | ('yu', u'ㄩ'), 95 | ('yue', u'ㄩㄝ'), 96 | ('yuan', u'ㄩㄢ'), 97 | ('yun', u'ㄩㄣ'), 98 | ('yong', u'ㄩㄥ') 99 | ], 100 | 'tones': { 101 | 'pinyin': { 102 | 0: u'aoeiuvü', 103 | 1: u'āōēīūǖǖ', 104 | 2: u'áóéíúǘǘ', 105 | 3: u'ǎǒěǐǔǚǚ', 106 | 4: u'àòèìùǜǜ' 107 | }, 108 | 'bopomofo': { 109 | 0: u'˙', 110 | 1: u'ˉ', 111 | 2: u'ˊ', 112 | 3: u'ˇ', 113 | 4: u'ˋ' 114 | } 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /tests/test_main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pytest 3 | import bopomofo 4 | 5 | 6 | def test_application(): 7 | assert bopomofo.to_bopomofo(u'注音') == u'ㄓㄨˋ ㄧㄣ' 8 | assert bopomofo.to_bopomofo(u'注音', u'、') == u'ㄓㄨˋ、ㄧㄣ' 9 | assert bopomofo.to_bopomofo(u'注音', tones=False) == u'ㄓㄨ ㄧㄣ' 10 | assert bopomofo.to_bopomofo(u'注音', first_tone_symbol=True) == u'ㄓㄨˋ ㄧㄣˉ' 11 | assert bopomofo.to_bopomofo(u'English') == 'English' 12 | assert bopomofo.to_bopomofo(u'English中文') == u'English ㄓㄨㄥ ㄨㄣˊ' 13 | assert bopomofo.to_bopomofo(u'GitHub是一個透過Git進行版本控制的軟體原始碼代管服務', u'', first_tone_symbol=True) \ 14 | == u'GitHubㄕˋㄧˉㄍㄜˋㄊㄡˋㄍㄨㄛˋGitㄐㄧㄣˋㄒㄧㄥˊㄅㄢˇㄅㄣˇㄎㄨㄥˋㄓˋㄉㄜ˙ㄖㄨㄢˇㄊㄧˇㄩㄢˊㄕˇㄇㄚˇㄉㄞˋㄍㄨㄢˇㄈㄨˊㄨˋ' 15 | 16 | 17 | def test_extract_tone(): 18 | cases = [ 19 | [u'fú', 'fu', 2], 20 | [u'wù', 'wu', 4], 21 | [u'shì', 'shi', 4], 22 | [u'yī', 'yi', 1], 23 | [u'tòu', 'tou', 4], 24 | [u'a', 'a', 0] 25 | ] 26 | 27 | for case in cases: 28 | normalized, tone = bopomofo._single_pinyin_extarct_tone(case[0]) 29 | assert normalized == case[1] 30 | assert tone == case[2] 31 | 32 | 33 | def test_pinyin_to_bopomofo(): 34 | cases = [ 35 | [u'fú', u'ㄈㄨˊ'], 36 | [u'wù', u'ㄨˋ'], 37 | [u'shì', u'ㄕˋ'], 38 | [u'yī', u'ㄧ'], 39 | [u'tòu', u'ㄊㄡˋ'] 40 | ] 41 | 42 | for case in cases: 43 | assert bopomofo._single_pinyin_to_bopomofo( 44 | case[0], tones=True) == case[1] 45 | 46 | 47 | def test_bopomofo_to_pinyin(): 48 | cases = [ 49 | [u'fú', u'ㄈㄨˊ'], 50 | [u'wù', u'ㄨˋ'], 51 | [u'shì', u'ㄕˋ'], 52 | [u'yī', u'ㄧ'], 53 | [u'tòu', u'ㄊㄡˋ'] 54 | ] 55 | 56 | for case in cases: 57 | assert bopomofo._single_bopomofo_to_pinyin( 58 | case[1], tones=True) == case[0] 59 | 60 | with pytest.raises(bopomofo.PinyinParsingError): 61 | bopomofo._single_bopomofo_to_pinyin(u'ㄕㄕ') 62 | 63 | with pytest.raises(bopomofo.PinyinParsingError): 64 | bopomofo._single_bopomofo_to_pinyin(u'ㄜㄜ') 65 | 66 | assert bopomofo.bopomofo_to_pinyin(u'GitHubㄕˋㄧˉㄍㄜˋㄊㄡˋㄍㄨㄛˋGitㄐㄧㄣˋㄒㄧㄥˊㄅㄢˇㄅㄣˇㄎㄨㄥˋㄓˋㄉㄜ˙ㄖㄨㄢˇㄊㄧˇㄩㄢˊㄕˇㄇㄚˇㄉㄞˋㄍㄨㄢˇㄈㄨˊㄨˋ') \ 67 | == u'GitHub shì yī gè tòu guò Git jìn xíng bǎn běn kòng zhì de ruǎn tǐ yuán shǐ mǎ dài guǎn fú wù' 68 | assert bopomofo.bopomofo_to_pinyin(u'GitHubㄕˋㄧˉㄍㄜˋㄊㄡˋㄍㄨㄛˋGitㄐㄧㄣˋㄒㄧㄥˊㄅㄢˇㄅㄣˇㄎㄨㄥˋㄓˋㄉㄜ˙ㄖㄨㄢˇㄊㄧˇㄩㄢˊㄕˇㄇㄚˇㄉㄞˋㄍㄨㄢˇㄈㄨˊㄨˋ', tones=False) \ 69 | == u'GitHub shi yi ge tou guo Git jin xing ban ben kong zhi de ruan ti yuan shi ma dai guan fu wu' 70 | 71 | 72 | def test_to_pinyin(): 73 | assert bopomofo.to_pinyin(u'GitHub是一個透過Git進行版本控制的軟體原始碼代管服務', tones=True) \ 74 | == u'GitHub shì yī gè tòu! guò! Git jìn xíng bǎn běn kòng zhì de ruǎn tǐ yuán shǐ mǎ dài guǎn fú wù' 75 | assert bopomofo.to_pinyin(u'GitHub是一個透過Git進行版本控制的軟體原始碼代管服務') \ 76 | == u'GitHub shi yi ge tou guo Git jin xing ban ben kong zhi de ruan ti yuan shi ma dai guan fu wu' 77 | 78 | 79 | def test_invaild_inputs(): 80 | with pytest.raises(bopomofo.PinyinParsingError): 81 | bopomofo._single_pinyin_to_bopomofo('hee') 82 | 83 | with pytest.raises(bopomofo.PinyinParsingError): 84 | bopomofo._single_pinyin_to_bopomofo('vvv') 85 | -------------------------------------------------------------------------------- /bopomofo/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | import re 4 | from xpinyin import Pinyin 5 | 6 | from .__version__ import __version__ 7 | from .dictionrary import pinyin_bopomofo as _dict 8 | 9 | # Create a 'xpinyin' instance 10 | _pinyin = Pinyin() 11 | 12 | class PinyinParsingError(Exception): 13 | pass 14 | 15 | 16 | def to_bopomofo(chars, splitter=u' ', tones=True, first_tone_symbol=False): 17 | '''Translate words to bopomofo 18 | 19 | :param chars: The text string to be coverted 20 | :param splitter: The splitter between words 21 | :param first_tone_symbol: Display the first tone symbol or not. 22 | Default set to False 23 | ''' 24 | 25 | return splitter.join(_bopomofo_list(chars, tones, first_tone_symbol)) 26 | 27 | 28 | def to_pinyin(chars, splitter=u' ', tones=False): 29 | '''Translate words to pinyin 30 | An API port of 'xpinyin.get_pinyin' 31 | ''' 32 | 33 | return _pinyin.get_pinyin(chars, splitter, tone_marks=tones) 34 | 35 | def bopomofo_to_pinyin(bopomofo, splitter=u' ', tones=True, default_tone=1): 36 | '''Translate bopomofo to pinyin''' 37 | 38 | bopomofos = _bopomofo_split(bopomofo, splitter) 39 | return splitter.join([_single_bopomofo_to_pinyin(x, tones, default_tone, ignore_warning=True) for x in bopomofos]) 40 | 41 | 42 | def _bopomofo_split(bopomofo, splitter=u' '): 43 | tones = u''.join(_dict['tones']['bopomofo'].values()) + splitter 44 | vacabulary = _dict['vacabulary'] + tones 45 | bopomofos = [] 46 | pre_index = 0 47 | not_bopomofo = False 48 | for index, char in enumerate(bopomofo): 49 | if char not in vacabulary: 50 | not_bopomofo = True 51 | else: 52 | if not_bopomofo: 53 | bopomofos.append(bopomofo[pre_index:index]) 54 | pre_index = index 55 | not_bopomofo = False 56 | if char in tones: 57 | bopomofos.append(bopomofo[pre_index:index+1]) 58 | pre_index = index + 1 59 | return bopomofos 60 | 61 | def _pinyin_list(chars, tones=False): 62 | '''Translate words to pinyin in list''' 63 | 64 | return _pinyin.get_pinyin(chars, '|', show_tone_marks=tones).split('|') 65 | 66 | 67 | def _bopomofo_list(chars, tones=False, first_tone_symbol=False): 68 | '''Translate words to bopomofo in list''' 69 | 70 | pinyin = _pinyin_list(chars, tones) 71 | return [_single_pinyin_to_bopomofo(x, tones, first_tone_symbol, ignore_warning=True) for x in pinyin] 72 | 73 | 74 | def _single_pinyin_to_bopomofo(pinyin, tones=False, first_tone_symbol=False, ignore_warning=False): 75 | '''Translate a single pinyin to bopomofo''' 76 | 77 | result = None 78 | raw = pinyin 79 | pinyin = pinyin.strip('! ').lower() 80 | normalized_pinyin, pinyin_tone = _single_pinyin_extarct_tone(pinyin) 81 | consonant = None 82 | vowel = None 83 | tone_symbol = '' 84 | 85 | if tones: 86 | # Skip if it's first tone, unless specified 87 | if pinyin_tone != 1 or first_tone_symbol: 88 | tone_symbol = _dict['tones']['bopomofo'][pinyin_tone] 89 | 90 | 91 | for con in _dict['special']: 92 | pin, bopo = con 93 | if normalized_pinyin == pin: 94 | return bopo + tone_symbol 95 | 96 | for con in _dict['consonants']: 97 | pin, bopo = con 98 | if normalized_pinyin.startswith(pin): 99 | result = bopo 100 | consonant = pin 101 | vowel = normalized_pinyin[len(pin):] 102 | break 103 | else: 104 | if (ignore_warning): 105 | return raw 106 | raise PinyinParsingError('Can not find consonant for pinyin "%s".' % pinyin) 107 | 108 | for vow in _dict['vowels']: 109 | pin, bopo = vow 110 | if vowel == pin: 111 | result += bopo 112 | break 113 | else: 114 | if (ignore_warning): 115 | return raw 116 | raise PinyinParsingError('Can not find vowel for pinyin "%s".' % pinyin) 117 | 118 | return result + tone_symbol 119 | 120 | 121 | def _single_bopomofo_to_pinyin(bopomofo, tones=False, default_tone=1, ignore_warning=False): 122 | result = None 123 | raw = bopomofo 124 | tone_index = default_tone 125 | normalized = raw.strip() 126 | for index, tone_symbol in _dict['tones']['bopomofo'].items(): 127 | if normalized.endswith(tone_symbol): 128 | tone_index = index 129 | normalized = normalized.rstrip(tone_symbol) 130 | 131 | if not tones: 132 | tone_index = 0 133 | 134 | for pin, bopo in _dict['special']: 135 | if normalized == bopo: 136 | return _single_pinyin_append_tone(pin, tone_index) 137 | 138 | for pin, bopo in _dict['consonants']: 139 | if normalized.startswith(bopo): 140 | result = pin 141 | consonant = bopo 142 | vowel = normalized[len(bopo):] 143 | break 144 | else: 145 | if (ignore_warning): 146 | return raw 147 | raise PinyinParsingError('Can not find consonant for bopomofo "%s".' % bopomofo) 148 | 149 | for pin, bopo in _dict['vowels']: 150 | if vowel == bopo: 151 | result += pin 152 | break 153 | else: 154 | if (ignore_warning): # pragma: no cover 155 | return raw 156 | raise PinyinParsingError('Can not find consonant for bopomofo "%s".' % bopomofo) 157 | 158 | return _single_pinyin_append_tone(result, tone_index) 159 | 160 | 161 | def _single_pinyin_append_tone(pinyin, tone): 162 | t = pinyin 163 | if tone != 0: 164 | m = re.search(u"[aoeiuv\u00fc]+", t) 165 | if m is None: # pragma: no cover 166 | pass 167 | elif len(m.group(0)) == 1: 168 | # if just find one vowels, put the mark on it 169 | t = t[:m.start(0)] \ 170 | + _dict['tones']['pinyin'][tone][_dict['tones']['pinyin'][0].index(m.group(0))] \ 171 | + t[m.end(0):] 172 | else: 173 | # mark on vowels which search with "a, o, e" one by one 174 | # when "i" and "u" stand together, make the vowels behind 175 | for num, vowels in enumerate((u"a", u"o", u"e", u"ui", u"iu")): 176 | if vowels in t: 177 | t = t.replace(vowels[-1], _dict['tones']['pinyin'][tone][num]) 178 | break 179 | return t 180 | 181 | 182 | def _single_pinyin_extarct_tone(pinyin): 183 | tone = 0 184 | raw = pinyin 185 | pinyin = raw.strip().lower() 186 | normalized = raw 187 | for _tone, letters in _dict['tones']['pinyin'].items(): 188 | # Ignore the zero-tone list 189 | if _tone == 0: 190 | continue 191 | for char_index, char in enumerate(letters): 192 | if char in pinyin: 193 | tone_char_index = pinyin.index(char) 194 | tone = _tone 195 | normalized = pinyin[:tone_char_index] \ 196 | + _dict['tones']['pinyin'][0][char_index] \ 197 | + pinyin[tone_char_index+1:] 198 | break 199 | else: 200 | continue 201 | break 202 | return normalized, tone 203 | --------------------------------------------------------------------------------