├── .idea
├── encodings.xml
├── inspectionProfiles
│ └── Project_Default.xml
├── misc.xml
├── modules.xml
├── python-wubi.iml
└── vcs.xml
├── LICENSE
├── README.md
├── pywubi
├── __init__.py
├── constants.py
├── core.py
├── utlis.py
└── wubi_dict.py
├── resource
└── 86版_全码.txt
└── setup.py
/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/python-wubi.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Thunder Bouble
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 汉字五笔转换工具(Python 版)
2 | =============================
3 | [](https://pypi.org/project/pywubi)
4 | 
5 |
6 |
7 | 将汉字转为五笔码。现只支持 86 版编码。(ps:因为找到整理出 86 版五笔编码)
8 |
9 | ## 关于
10 |
11 | * GitHub: https://github.com/sfyc23/python-wubi
12 | * License: MIT license
13 | * PyPI: https://pypi.org/project/pywubi
14 | * Python version: 3
15 |
16 | ## 特性
17 |
18 | 1. 将词组转成五笔编码。比如词语:生死有命。换成五笔码为:'tgdw';
19 | 2. 返回汉字的所有可能的编码。如汉字:为 。换成五笔码为: 'ylyi', 'yly', 'yl', 'o';
20 | 3. 将一段句子,转成五笔码。如:天气不错,我们去散步吧!:五笔码为:'gdi', 'rnb', 'gii', 'qajg', ',', 'trnt', 'wun', 'fcu', 'aety', 'hir', 'kcn', '!'
21 |
22 | ## 安装
23 |
24 | $ pip install pywubi
25 |
26 | ## 使用示例
27 |
28 | >>> from pywubi import wubi
29 | >>> wubi('我爱你')
30 | ['trnt', 'epdc', 'wqiy']
31 | >>> wubi('我爱你',multicode=True) # 返回汉字的所有可能的五笔编码
32 | [['trnt', 'trn', 'q'], ['epdc', 'epd', 'ep'], ['wqiy', 'wqi', 'wq']]
33 | >>> wubi('我爱你', single=False) # 以词组的方法处理这些汉字
34 | ['tewq']
35 |
36 | ## Lincese
37 |
38 | MIT License
39 |
40 | Copyright (c) 2019 Thunder Bouble
41 |
42 | Permission is hereby granted, free of charge, to any person obtaining a copy
43 | of this software and associated documentation files (the "Software"), to deal
44 | in the Software without restriction, including without limitation the rights
45 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
46 | copies of the Software, and to permit persons to whom the Software is
47 | furnished to do so, subject to the following conditions:
48 |
49 | The above copyright notice and this permission notice shall be included in all
50 | copies or substantial portions of the Software.
51 |
52 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
53 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
54 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
55 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
56 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
57 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
58 | SOFTWARE.
59 |
--------------------------------------------------------------------------------
/pywubi/__init__.py:
--------------------------------------------------------------------------------
1 | # from .core import *
2 |
3 | from pywubi.core import (
4 | wubi, single_wubi, conbin_wubi
5 | )
6 |
7 | __title__ = 'pywubi'
8 | __version__ = '0.0.2'
9 | __author__ = 'sfyc23'
10 | __license__ = 'MIT'
11 | __copyright__ = '''
12 | MIT License
13 |
14 | Copyright (c) 2019 Thunder Bouble
15 |
16 | Permission is hereby granted, free of charge, to any person obtaining a copy
17 | of this software and associated documentation files (the "Software"), to deal
18 | in the Software without restriction, including without limitation the rights
19 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
20 | copies of the Software, and to permit persons to whom the Software is
21 | furnished to do so, subject to the following conditions:
22 |
23 | The above copyright notice and this permission notice shall be included in all
24 | copies or substantial portions of the Software.
25 |
26 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
31 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 | SOFTWARE.
33 | '''
--------------------------------------------------------------------------------
/pywubi/constants.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | from enum import IntEnum, unique
5 | import os
6 | import re
7 |
8 | from pywubi import wubi_dict
9 |
10 | # 单字拼音库
11 | WUBI_86_DICT = wubi_dict.wubi_86_dict
12 |
13 | # 利用环境变量控制不做copy操作, 以减少内存使用
14 | if not os.environ.get('PYWUBI_NO_DICT_COPY'):
15 | WUBI_86_DICT = WUBI_86_DICT.copy()
16 |
17 | # 能匹配的中文编码
18 | RE_HANS = re.compile(
19 | r'^(?:['
20 | r'\u3007' # 〇
21 | r'\u4e00-\ufa29'
22 | r'])+$'
23 | )
24 |
25 | @unique
26 | class Style(IntEnum):
27 | """编码"""
28 | # 86 版编码
29 | WUBI_86 = 1
30 | # 96 版编码
31 | WUBI_96 = 2
32 | STYLE_WUBI_86 = Style.WUBI_86
33 | STYLE_WUBI_96 = Style.WUBI_96
34 |
--------------------------------------------------------------------------------
/pywubi/core.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | from pywubi.utlis import (
5 | single_seg, combin_seg
6 | )
7 |
8 | from pywubi.constants import (
9 | WUBI_86_DICT,
10 | RE_HANS
11 | )
12 |
13 | def single_wubi(han, multicode=False):
14 | '''
15 | 将单个汉字转成五笔编码
16 | :param han: str ,单个汉字
17 | :param multicode: bool,是否返回多个编码
18 | 如果 True,返回这个汉字的所有五笔编码。
19 | 如果 False,返回这个汉字编码中最长的。
20 | 默认为 False
21 | :return: str or list.
22 | 如果 multicode = True,返回此汉字的所有编码。
23 | 如果 multicode = False ,返回单个五笔编码
24 | '''
25 | if len(han) > 1:
26 | return conbin_wubi(han)
27 | num = ord(han)
28 | if num not in WUBI_86_DICT:
29 | return han
30 | pys = WUBI_86_DICT[num].split(',') # 字的拼音列表
31 | if not multicode:
32 | return pys[0]
33 | return pys
34 |
35 |
36 | def conbin_wubi(hans):
37 | '''
38 | 将词组转成五笔编码
39 | :param hans:str,词组
40 | :return: str ,五笔编码
41 | '''
42 | len_han = len(hans)
43 | if len_han == 1:
44 | return single_wubi(hans)
45 | elif len_han == 2:
46 | s = ''
47 | s += single_wubi(hans[0])[:2]
48 | s += single_wubi(hans[1])[:2]
49 | return s
50 | elif len_han == 3:
51 | s = ''
52 | s += single_wubi(hans[0])[0]
53 | s += single_wubi(hans[1])[0]
54 | s += single_wubi(hans[2])[:2]
55 | return s
56 | elif len_han >= 4:
57 | s = ''
58 | s += single_wubi(hans[0])[0]
59 | s += single_wubi(hans[1])[0]
60 | s += single_wubi(hans[2])[0]
61 | s += single_wubi(hans[-1])[0]
62 | return s
63 |
64 |
65 | # style=Style.WUBI_86,errors='default'
66 | def wubi(hans, multicode=False, single=True):
67 | '''
68 | 将汉字转换成五笔编码
69 | :param hans: str, 汉字字符串('我爱你'),或单个字符('滚')
70 | :param multicode: bool,是否启用多编码:
71 | 如果 True,返回这个汉字的所有五笔编码。
72 | 如果 False,返回这个汉字编码中最长的。
73 | 默认为 False
74 | :param single: bool,是否以单个字符处理
75 | True,以单个字符处理
76 | False,则为词组做处理。
77 | 默认为 True
78 | :return: list,五笔列表
79 |
80 | Usage::
81 | >>> from pywubi import wubi
82 | >>> wubi('我爱你')
83 | ['trnt', 'epdc', 'wqiy']
84 | >>> wubi('我爱你',multicode=True)
85 | [['trnt', 'trn', 'q'], ['epdc', 'epd', 'ep'], ['wqiy', 'wqi', 'wq']]
86 | >>> wubi('我爱你', single=False)
87 | ['tewq']
88 | '''
89 | res = []
90 | if single:
91 | # 将可以找到编码的单个汉字,
92 | han_list = single_seg(hans)
93 | for han in han_list:
94 | if RE_HANS.match(han):
95 | res.append(single_wubi(han, multicode))
96 | else:
97 | res.append(han)
98 | else:
99 | # 将可以找到编码的汉字进行分词,
100 | han_list = combin_seg(hans)
101 | for han in han_list:
102 | if RE_HANS.match(han):
103 | res.append(conbin_wubi(han))
104 | else:
105 | res.append(han)
106 | return res
107 |
108 |
109 | # if __name__ == '__main__':
110 | #
111 | # ret = wubi('我爱你')
112 | # print(ret)
113 | # ret = wubi('为',multicode=True)
114 | # print(ret)
115 | # ret = wubi('天气不错,我们去散步吧', single=True)
116 | #
117 | # print(ret)
118 |
--------------------------------------------------------------------------------
/pywubi/utlis.py:
--------------------------------------------------------------------------------
1 | # from __future__ import unicode_literals
2 |
3 | from pywubi.constants import RE_HANS
4 |
5 |
6 | def combin_seg(chars):
7 | '''
8 | 对字符进行分割,能组合的字放一起。
9 | :param chars:
10 | :return:
11 | '''
12 | s = '' # 保存一个词
13 | ret = [] # 分词结果
14 | flag = 0 # 上一个字符是什么? 0: 汉字, 1: 不是汉字
15 |
16 | for n, c in enumerate(chars):
17 | if RE_HANS.match(c): # 汉字, 确定 flag 的初始值
18 | if n == 0: # 第一个字符
19 | flag = 0
20 |
21 | if flag == 0:
22 | s += c
23 | else: # 上一个字符不是汉字, 分词
24 | ret.append(s)
25 | flag = 0
26 | s = c
27 |
28 | else: # 不是汉字
29 | if n == 0: # 第一个字符, 确定 flag 的初始值
30 | flag = 1
31 |
32 | if flag == 1:
33 | s += c
34 | else: # 上一个字符是汉字, 分词
35 | ret.append(s)
36 | flag = 1
37 | s = c
38 |
39 | ret.append(s) # 最后的词
40 | return ret
41 |
42 |
43 |
44 | def single_seg(chars):
45 | '''
46 | 对字符进行分组。切割成单个字符
47 | :param chars:
48 | :return:
49 | '''
50 | res = []
51 | s = ''
52 | for c in chars:
53 | if RE_HANS.match(c): # 汉字, 确定 flag 的初始值
54 | if s:
55 | res.append(s)
56 | s = ''
57 | res.append(c)
58 | else:
59 | s += c
60 | return res
61 |
62 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | # Note: To use the 'upload' functionality of this file, you must:
5 | # $ pip install twine
6 |
7 | import io
8 | import os
9 | import re
10 | from setuptools import find_packages, setup, Command
11 |
12 |
13 | # What packages are required for this module to be executed?
14 | REQUIRED = [
15 | # 'requests', 'maya', 'records',
16 | ]
17 |
18 | # What packages are optional?
19 | EXTRAS = {
20 | # 'fancy feature': ['django'],
21 | }
22 |
23 | # The rest you shouldn't have to touch too much :)
24 | # ------------------------------------------------
25 | # Except, perhaps the License and Trove Classifiers!
26 | # If you do change the License, remember to change the Trove Classifier for that!
27 |
28 | current_dir = os.path.abspath(os.path.dirname(__file__))
29 |
30 | def get_meta():
31 | meta_re = re.compile(r"(?P__\w+__) = '(?P[^']+)'")
32 | meta_d = {}
33 | with open(os.path.join(current_dir, 'pywubi','__init__.py'),
34 | encoding='utf8') as fp:
35 | for match in meta_re.finditer(fp.read()):
36 | meta_d[match.group('name')] = match.group('value')
37 | return meta_d
38 |
39 | try:
40 | with io.open(os.path.join(current_dir, 'README.md'), encoding='utf-8') as f:
41 | long_description = '\n' + f.read()
42 | except FileNotFoundError:
43 | long_description = '汉字五笔转换模块/工具.'
44 |
45 | packages = [
46 | 'pywubi',
47 | ]
48 |
49 | meta_d = get_meta()
50 | # Where the magic happens:
51 | setup(
52 | name=meta_d['__title__'],
53 | version=meta_d['__version__'],
54 | description='汉字五笔转换模块/工具.',
55 | long_description=long_description,
56 | long_description_content_type='text/markdown',
57 | author=meta_d['__author__'],
58 | author_email='sfyc23@gmail.com',
59 | license=meta_d['__license__'],
60 | python_requires='>=3.6.0',
61 | url='https://github.com/sfyc23/python-wubi',
62 | # packages=find_packages(exclude=('tests',)),
63 | packages=packages,
64 | # If your package is a single module, use this instead of 'packages':
65 | # py_modules=['mypackage'],
66 |
67 | # entry_points={
68 | # 'console_scripts': ['mycli=mymodule:cli'],
69 | # },
70 | install_requires=REQUIRED,
71 | extras_require=EXTRAS,
72 | include_package_data=True,
73 |
74 | classifiers=[
75 | # Trove classifiers
76 | # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
77 | 'License :: OSI Approved :: MIT License',
78 | 'Programming Language :: Python',
79 | 'Programming Language :: Python :: 3',
80 | 'Programming Language :: Python :: 3.6',
81 | 'Programming Language :: Python :: Implementation :: CPython',
82 | 'Programming Language :: Python :: Implementation :: PyPy'
83 | ],
84 | keywords='wubi, 五笔',
85 |
86 | )
87 |
88 |
89 |
90 |
91 |
--------------------------------------------------------------------------------