├── README.md └── cixing.py /README.md: -------------------------------------------------------------------------------- 1 | # ChineseCixing 2 | ChineseCixing,针对中文词语的笔画拆解,偏旁查询,拼音转换接口.在当前的中文信息处理当中,语言外部形式上的特征在各个任务中扮演着越来越重要的角色,本项目目的是为提供这一接口 3 | 4 | 5 | # 使用方式 6 | from cixing import * 7 | handler = ChineseCixing() 8 | strokes = handler.get_strokes(s) 9 | pinyins = handler.get_pinyin(s) 10 | radicals = handler.get_radical(s) 11 | 12 | # 测试样例 13 | s = '自然语言处理是皇冠上的一颗明珠' 14 | handler = ChineseCixing() 15 | strokes = handler.get_strokes(s) 16 | pinyins = handler.get_pinyin(s) 17 | radicals = handler.get_radical(s) 18 | 19 | print('strokes', strokes) 20 | print('pinyins', pinyins) 21 | print('radicals', radicals) 22 | 23 | strokes: [ 24 | '㇓㇑㇕㇐㇐㇐', 25 | '㇓㇇㇔㇔㇐㇓㇏㇔㇔㇔㇔㇔', 26 | '㇔㇊㇐㇑㇕㇐㇑㇕㇐', 27 | '㇔㇐㇐㇐㇑㇕㇐', 28 | '㇓㇇㇏㇑㇔', 29 | '㇐㇐㇑㇀㇑㇕㇐㇐㇑㇐㇐', 30 | '㇑㇕㇐㇐㇐㇑㇐㇓㇏', 31 | '㇓㇑㇕㇐㇐㇐㇐㇑㇐', 32 | '㇔㇇㇐㇐㇓㇟㇐㇚㇔', 33 | '㇑㇐㇐', 34 | '㇓㇑㇕㇐㇐㇓㇆㇔', 35 | '㇐', 36 | '㇑㇕㇐㇐㇐㇑㇓㇔㇐㇓㇑㇕㇓㇔', 37 | '㇑㇕㇐㇐㇓㇆㇐㇐', 38 | '㇐㇐㇑㇀㇓㇐㇐㇑㇓㇏' 39 | ] 40 | 41 | pinyins: ['zi', 'ran', 'yu', 'yan', 'chu', 'li', 'shi', 'huang', 42 | 'guan', 'shang', 'de', 'yi', 'ke', 'ming', 'zhu'] 43 | radicals: ['自', '灬', '讠', '言', '夂', '王', '日', '白', 44 | '冖', '一', '白', '一', '页', '日', '王'] 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /cixing.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding:utf-8 3 | # File:cixing.py 4 | # Author:lhy 5 | # Date:2018/6/9 6 | from pypinyin import lazy_pinyin 7 | import os 8 | 9 | class ChineseCixing: 10 | def __init__(self): 11 | cur_dir = '/'.join(os.path.abspath(__file__).split('/')[:-1]) 12 | strokefile = os.path.join(cur_dir, 'strokes.txt') 13 | radicalfile = os.path.join(cur_dir, 'radicals.txt') 14 | self.char_dict = { 15 | "点": "㇔", 16 | "横": "㇐", 17 | "横钩": "㇖", 18 | "横撇": "㇇", 19 | "横撇弯钩": "㇌", 20 | "横斜钩": "⺄", 21 | "横折": "㇕", 22 | "横折竖钩": "㇆", 23 | "横折提": "㇊", 24 | "横折弯": "㇍", 25 | "横折弯钩": "㇈", 26 | "横折折": "㇅", 27 | "横折折撇": "㇋", 28 | "横折折折": "㇎", 29 | "横折折折钩": "㇡", 30 | "捺": "㇏", 31 | "撇": "㇓", 32 | "撇点": "㇛", 33 | "撇折": "㇜", 34 | "竖": "㇑", 35 | "竖钩": "㇚", 36 | "竖提": "㇙", 37 | "竖弯": "㇄", 38 | "竖弯横钩": "㇟", 39 | "竖折": "㇗", 40 | "竖折撇": "ㄣ", 41 | "竖折折": "㇞", 42 | "竖折折钩": "㇉", 43 | "提": "㇀", 44 | "弯钩": "㇁", 45 | "卧钩": "㇃", 46 | "斜钩": "㇂", 47 | } 48 | self.stroke_dict = {i.strip().split(':')[0]:[self.char_dict[j.split('/')[0]] for j in i.strip().split(':')[1].split(',')] for i in open(strokefile) if i.strip()} 49 | self.radical_dict = {i.strip().split(':')[0]:i.strip().split(':')[1] for i in open(radicalfile) if i.strip()} 50 | 51 | '''获取汉字笔画''' 52 | def get_strokes(self, word): 53 | strokes = [] 54 | chars = [c for c in word] 55 | for c in chars: 56 | stroke = ''.join(self.stroke_dict.get(c, c)) 57 | strokes.append(stroke) 58 | return strokes 59 | 60 | '''获取汉字汉语拼音''' 61 | def get_pinyin(self, word): 62 | pinyins = lazy_pinyin(word) 63 | return pinyins 64 | 65 | '''获取汉字偏旁部首''' 66 | def get_radical(self, word): 67 | radicals = [] 68 | chars = [c for c in word] 69 | for c in chars: 70 | stroke = self.radical_dict.get(c, c) 71 | radicals.append(stroke) 72 | return radicals 73 | 74 | 75 | def demo(): 76 | word = '自然语言处理是皇冠上的一颗明珠' 77 | handler = ChineseCixing() 78 | strokes = handler.get_strokes(word) 79 | pinyins = handler.get_pinyin(word) 80 | radicals = handler.get_radical(word) 81 | 82 | print('strokes', strokes) 83 | print('pinyins', pinyins) 84 | print('radicals', radicals) 85 | 86 | if __name__ == '__main__': 87 | demo() 88 | --------------------------------------------------------------------------------