├── .idea ├── encodings.xml ├── inspectionProfiles │ └── Project_Default.xml ├── misc.xml ├── modules.xml ├── python-wubi.iml └── vcs.xml ├── LICENSE ├── README.md ├── pywubi ├── __init__.py ├── constants.py ├── core.py ├── utlis.py └── wubi_dict.py ├── resource └── 86版_全码.txt └── setup.py /.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 15 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 8 | 9 | 10 | 11 | 13 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/python-wubi.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Thunder Bouble 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 汉字五笔转换工具(Python 版) 2 | ============================= 3 | [![pypi](https://img.shields.io/badge/pypi-0.0.2-yellow.svg)](https://pypi.org/project/pywubi) 4 | ![python_vesion](https://img.shields.io/badge/python-%3E3-green.svg) 5 | 6 | 7 | 将汉字转为五笔码。现只支持 86 版编码。(ps:因为找到整理出 86 版五笔编码) 8 | 9 | ## 关于 10 | 11 | * GitHub: https://github.com/sfyc23/python-wubi 12 | * License: MIT license 13 | * PyPI: https://pypi.org/project/pywubi 14 | * Python version: 3 15 | 16 | ## 特性 17 | 18 | 1. 将词组转成五笔编码。比如词语:生死有命。换成五笔码为:'tgdw'; 19 | 2. 返回汉字的所有可能的编码。如汉字:为 。换成五笔码为: 'ylyi', 'yly', 'yl', 'o'; 20 | 3. 将一段句子,转成五笔码。如:天气不错,我们去散步吧!:五笔码为:'gdi', 'rnb', 'gii', 'qajg', ',', 'trnt', 'wun', 'fcu', 'aety', 'hir', 'kcn', '!' 21 | 22 | ## 安装 23 | 24 | $ pip install pywubi 25 | 26 | ## 使用示例 27 | 28 | >>> from pywubi import wubi 29 | >>> wubi('我爱你') 30 | ['trnt', 'epdc', 'wqiy'] 31 | >>> wubi('我爱你',multicode=True) # 返回汉字的所有可能的五笔编码 32 | [['trnt', 'trn', 'q'], ['epdc', 'epd', 'ep'], ['wqiy', 'wqi', 'wq']] 33 | >>> wubi('我爱你', single=False) # 以词组的方法处理这些汉字 34 | ['tewq'] 35 | 36 | ## Lincese 37 | 38 | MIT License 39 | 40 | Copyright (c) 2019 Thunder Bouble 41 | 42 | Permission is hereby granted, free of charge, to any person obtaining a copy 43 | of this software and associated documentation files (the "Software"), to deal 44 | in the Software without restriction, including without limitation the rights 45 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 46 | copies of the Software, and to permit persons to whom the Software is 47 | furnished to do so, subject to the following conditions: 48 | 49 | The above copyright notice and this permission notice shall be included in all 50 | copies or substantial portions of the Software. 51 | 52 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 53 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 54 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 55 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 56 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 57 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 58 | SOFTWARE. 59 | -------------------------------------------------------------------------------- /pywubi/__init__.py: -------------------------------------------------------------------------------- 1 | # from .core import * 2 | 3 | from pywubi.core import ( 4 | wubi, single_wubi, conbin_wubi 5 | ) 6 | 7 | __title__ = 'pywubi' 8 | __version__ = '0.0.2' 9 | __author__ = 'sfyc23' 10 | __license__ = 'MIT' 11 | __copyright__ = ''' 12 | MIT License 13 | 14 | Copyright (c) 2019 Thunder Bouble 15 | 16 | Permission is hereby granted, free of charge, to any person obtaining a copy 17 | of this software and associated documentation files (the "Software"), to deal 18 | in the Software without restriction, including without limitation the rights 19 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 20 | copies of the Software, and to permit persons to whom the Software is 21 | furnished to do so, subject to the following conditions: 22 | 23 | The above copyright notice and this permission notice shall be included in all 24 | copies or substantial portions of the Software. 25 | 26 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 27 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 28 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 29 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 30 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 31 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 | SOFTWARE. 33 | ''' -------------------------------------------------------------------------------- /pywubi/constants.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from enum import IntEnum, unique 5 | import os 6 | import re 7 | 8 | from pywubi import wubi_dict 9 | 10 | # 单字拼音库 11 | WUBI_86_DICT = wubi_dict.wubi_86_dict 12 | 13 | # 利用环境变量控制不做copy操作, 以减少内存使用 14 | if not os.environ.get('PYWUBI_NO_DICT_COPY'): 15 | WUBI_86_DICT = WUBI_86_DICT.copy() 16 | 17 | # 能匹配的中文编码 18 | RE_HANS = re.compile( 19 | r'^(?:[' 20 | r'\u3007' # 〇 21 | r'\u4e00-\ufa29' 22 | r'])+$' 23 | ) 24 | 25 | @unique 26 | class Style(IntEnum): 27 | """编码""" 28 | # 86 版编码 29 | WUBI_86 = 1 30 | # 96 版编码 31 | WUBI_96 = 2 32 | STYLE_WUBI_86 = Style.WUBI_86 33 | STYLE_WUBI_96 = Style.WUBI_96 34 | -------------------------------------------------------------------------------- /pywubi/core.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from pywubi.utlis import ( 5 | single_seg, combin_seg 6 | ) 7 | 8 | from pywubi.constants import ( 9 | WUBI_86_DICT, 10 | RE_HANS 11 | ) 12 | 13 | def single_wubi(han, multicode=False): 14 | ''' 15 | 将单个汉字转成五笔编码 16 | :param han: str ,单个汉字 17 | :param multicode: bool,是否返回多个编码 18 | 如果 True,返回这个汉字的所有五笔编码。 19 | 如果 False,返回这个汉字编码中最长的。 20 | 默认为 False 21 | :return: str or list. 22 | 如果 multicode = True,返回此汉字的所有编码。 23 | 如果 multicode = False ,返回单个五笔编码 24 | ''' 25 | if len(han) > 1: 26 | return conbin_wubi(han) 27 | num = ord(han) 28 | if num not in WUBI_86_DICT: 29 | return han 30 | pys = WUBI_86_DICT[num].split(',') # 字的拼音列表 31 | if not multicode: 32 | return pys[0] 33 | return pys 34 | 35 | 36 | def conbin_wubi(hans): 37 | ''' 38 | 将词组转成五笔编码 39 | :param hans:str,词组 40 | :return: str ,五笔编码 41 | ''' 42 | len_han = len(hans) 43 | if len_han == 1: 44 | return single_wubi(hans) 45 | elif len_han == 2: 46 | s = '' 47 | s += single_wubi(hans[0])[:2] 48 | s += single_wubi(hans[1])[:2] 49 | return s 50 | elif len_han == 3: 51 | s = '' 52 | s += single_wubi(hans[0])[0] 53 | s += single_wubi(hans[1])[0] 54 | s += single_wubi(hans[2])[:2] 55 | return s 56 | elif len_han >= 4: 57 | s = '' 58 | s += single_wubi(hans[0])[0] 59 | s += single_wubi(hans[1])[0] 60 | s += single_wubi(hans[2])[0] 61 | s += single_wubi(hans[-1])[0] 62 | return s 63 | 64 | 65 | # style=Style.WUBI_86,errors='default' 66 | def wubi(hans, multicode=False, single=True): 67 | ''' 68 | 将汉字转换成五笔编码 69 | :param hans: str, 汉字字符串('我爱你'),或单个字符('滚') 70 | :param multicode: bool,是否启用多编码: 71 | 如果 True,返回这个汉字的所有五笔编码。 72 | 如果 False,返回这个汉字编码中最长的。 73 | 默认为 False 74 | :param single: bool,是否以单个字符处理 75 | True,以单个字符处理 76 | False,则为词组做处理。 77 | 默认为 True 78 | :return: list,五笔列表 79 | 80 | Usage:: 81 | >>> from pywubi import wubi 82 | >>> wubi('我爱你') 83 | ['trnt', 'epdc', 'wqiy'] 84 | >>> wubi('我爱你',multicode=True) 85 | [['trnt', 'trn', 'q'], ['epdc', 'epd', 'ep'], ['wqiy', 'wqi', 'wq']] 86 | >>> wubi('我爱你', single=False) 87 | ['tewq'] 88 | ''' 89 | res = [] 90 | if single: 91 | # 将可以找到编码的单个汉字, 92 | han_list = single_seg(hans) 93 | for han in han_list: 94 | if RE_HANS.match(han): 95 | res.append(single_wubi(han, multicode)) 96 | else: 97 | res.append(han) 98 | else: 99 | # 将可以找到编码的汉字进行分词, 100 | han_list = combin_seg(hans) 101 | for han in han_list: 102 | if RE_HANS.match(han): 103 | res.append(conbin_wubi(han)) 104 | else: 105 | res.append(han) 106 | return res 107 | 108 | 109 | # if __name__ == '__main__': 110 | # 111 | # ret = wubi('我爱你') 112 | # print(ret) 113 | # ret = wubi('为',multicode=True) 114 | # print(ret) 115 | # ret = wubi('天气不错,我们去散步吧', single=True) 116 | # 117 | # print(ret) 118 | -------------------------------------------------------------------------------- /pywubi/utlis.py: -------------------------------------------------------------------------------- 1 | # from __future__ import unicode_literals 2 | 3 | from pywubi.constants import RE_HANS 4 | 5 | 6 | def combin_seg(chars): 7 | ''' 8 | 对字符进行分割,能组合的字放一起。 9 | :param chars: 10 | :return: 11 | ''' 12 | s = '' # 保存一个词 13 | ret = [] # 分词结果 14 | flag = 0 # 上一个字符是什么? 0: 汉字, 1: 不是汉字 15 | 16 | for n, c in enumerate(chars): 17 | if RE_HANS.match(c): # 汉字, 确定 flag 的初始值 18 | if n == 0: # 第一个字符 19 | flag = 0 20 | 21 | if flag == 0: 22 | s += c 23 | else: # 上一个字符不是汉字, 分词 24 | ret.append(s) 25 | flag = 0 26 | s = c 27 | 28 | else: # 不是汉字 29 | if n == 0: # 第一个字符, 确定 flag 的初始值 30 | flag = 1 31 | 32 | if flag == 1: 33 | s += c 34 | else: # 上一个字符是汉字, 分词 35 | ret.append(s) 36 | flag = 1 37 | s = c 38 | 39 | ret.append(s) # 最后的词 40 | return ret 41 | 42 | 43 | 44 | def single_seg(chars): 45 | ''' 46 | 对字符进行分组。切割成单个字符 47 | :param chars: 48 | :return: 49 | ''' 50 | res = [] 51 | s = '' 52 | for c in chars: 53 | if RE_HANS.match(c): # 汉字, 确定 flag 的初始值 54 | if s: 55 | res.append(s) 56 | s = '' 57 | res.append(c) 58 | else: 59 | s += c 60 | return res 61 | 62 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # Note: To use the 'upload' functionality of this file, you must: 5 | # $ pip install twine 6 | 7 | import io 8 | import os 9 | import re 10 | from setuptools import find_packages, setup, Command 11 | 12 | 13 | # What packages are required for this module to be executed? 14 | REQUIRED = [ 15 | # 'requests', 'maya', 'records', 16 | ] 17 | 18 | # What packages are optional? 19 | EXTRAS = { 20 | # 'fancy feature': ['django'], 21 | } 22 | 23 | # The rest you shouldn't have to touch too much :) 24 | # ------------------------------------------------ 25 | # Except, perhaps the License and Trove Classifiers! 26 | # If you do change the License, remember to change the Trove Classifier for that! 27 | 28 | current_dir = os.path.abspath(os.path.dirname(__file__)) 29 | 30 | def get_meta(): 31 | meta_re = re.compile(r"(?P__\w+__) = '(?P[^']+)'") 32 | meta_d = {} 33 | with open(os.path.join(current_dir, 'pywubi','__init__.py'), 34 | encoding='utf8') as fp: 35 | for match in meta_re.finditer(fp.read()): 36 | meta_d[match.group('name')] = match.group('value') 37 | return meta_d 38 | 39 | try: 40 | with io.open(os.path.join(current_dir, 'README.md'), encoding='utf-8') as f: 41 | long_description = '\n' + f.read() 42 | except FileNotFoundError: 43 | long_description = '汉字五笔转换模块/工具.' 44 | 45 | packages = [ 46 | 'pywubi', 47 | ] 48 | 49 | meta_d = get_meta() 50 | # Where the magic happens: 51 | setup( 52 | name=meta_d['__title__'], 53 | version=meta_d['__version__'], 54 | description='汉字五笔转换模块/工具.', 55 | long_description=long_description, 56 | long_description_content_type='text/markdown', 57 | author=meta_d['__author__'], 58 | author_email='sfyc23@gmail.com', 59 | license=meta_d['__license__'], 60 | python_requires='>=3.6.0', 61 | url='https://github.com/sfyc23/python-wubi', 62 | # packages=find_packages(exclude=('tests',)), 63 | packages=packages, 64 | # If your package is a single module, use this instead of 'packages': 65 | # py_modules=['mypackage'], 66 | 67 | # entry_points={ 68 | # 'console_scripts': ['mycli=mymodule:cli'], 69 | # }, 70 | install_requires=REQUIRED, 71 | extras_require=EXTRAS, 72 | include_package_data=True, 73 | 74 | classifiers=[ 75 | # Trove classifiers 76 | # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers 77 | 'License :: OSI Approved :: MIT License', 78 | 'Programming Language :: Python', 79 | 'Programming Language :: Python :: 3', 80 | 'Programming Language :: Python :: 3.6', 81 | 'Programming Language :: Python :: Implementation :: CPython', 82 | 'Programming Language :: Python :: Implementation :: PyPy' 83 | ], 84 | keywords='wubi, 五笔', 85 | 86 | ) 87 | 88 | 89 | 90 | 91 | --------------------------------------------------------------------------------