├── .gitignore ├── CHANGES.rst ├── LICENSE ├── MANIFEST.in ├── README.rst ├── setup.py └── zunda ├── __init__.py └── zunda.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /CHANGES.rst: -------------------------------------------------------------------------------- 1 | CHANGES 2 | ======= 3 | 4 | 0.1.3 (2019-11-30) 5 | ------------------------- 6 | 7 | - bugfix for installation on conda (thanks @Kensuke-Mitsuzawa) 8 | 9 | 0.1.2 (2019-02-24) 10 | ------------------------- 11 | 12 | - First release. 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 IKEGAMI Yukino 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.rst 2 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Zunda Python 2 | =================== 3 | 4 | |pyversion| |version| |license| 5 | 6 | Zunda: Japanese Enhanced Modality Analyzer client for Python. 7 | 8 | Zunda is an extended modality analyzer for Japanese. 9 | For details about Zunda, See https://jmizuno.github.io/zunda/ (Written in Japanese) 10 | 11 | this module requires installing Zunda, which is available at (https://github.com/jmizuno/zunda/releases), CaboCha (https://taku910.github.io/cabocha/), and MeCab (http://taku910.github.io/mecab/). 12 | 13 | 14 | Contributions are welcome! 15 | 16 | 17 | Installation 18 | ============== 19 | 20 | :: 21 | 22 | # Install Zunda 23 | wget https://github.com/jmizuno/zunda/archive/2.0b4.tar.gz 24 | tar xzf zunda-2.0b4.tar.gz 25 | rm zunda-2.0b4.tar.gz 26 | cd zunda-2.0b4 27 | ./configure 28 | make 29 | sudo make install 30 | cd ../ 31 | rm -rf zunda-2.0b4 32 | 33 | # Install zunda-python 34 | pip install zunda-python 35 | 36 | Example 37 | =========== 38 | 39 | .. code:: python 40 | 41 | import zunda 42 | parser = zunda.Parser() 43 | parser.parse('花子は太郎を食事に誘った裕子が嫌いだった') 44 | # => [{'assumptional': '0', 45 | 'authenticity': '成立', 46 | 'chunks': [{'func': 'に', 47 | 'head': '食事', 48 | 'link_from': [], 49 | 'link_to': 3, 50 | 'score': 1.883877, 51 | 'words': [{'feature': '名詞,サ変接続,*,*,*,*,食事,ショクジ,ショクジ', 52 | 'funcexp': 'O', 53 | 'surface': '食事'}, 54 | {'feature': '助詞,格助詞,一般,*,*,*,に,ニ,ニ', 55 | 'funcexp': 'B:判断', 56 | 'surface': 'に'}]}], 57 | 'sentiment': '0', 58 | 'source': '筆者', 59 | 'tense': '非未来', 60 | 'type': '叙述', 61 | 'word': '食事', 62 | 'words': '食事に'}, 63 | {'assumptional': '0', 64 | 'authenticity': '成立', 65 | 'chunks': [{'func': 'を', 66 | 'head': '太郎', 67 | 'link_from': [], 68 | 'link_to': 3, 69 | 'score': 1.640671, 70 | 'words': [{'feature': '名詞,固有名詞,地域,一般,*,*,太郎,タロウ,タロー', 71 | 'funcexp': 'O', 72 | 'surface': '太郎'}, 73 | {'feature': '助詞,格助詞,一般,*,*,*,を,ヲ,ヲ', 'funcexp': 'O', 'surface': 'を'}]}, 74 | {'func': 'に', 75 | 'head': '食事', 76 | 'link_from': [], 77 | 'link_to': 3, 78 | 'score': 1.883877, 79 | 'words': [{'feature': '名詞,サ変接続,*,*,*,*,食事,ショクジ,ショクジ', 80 | 'funcexp': 'O', 81 | 'surface': '食事'}, 82 | {'feature': '助詞,格助詞,一般,*,*,*,に,ニ,ニ', 'funcexp': 'B:判断', 'surface': 'に'}]}, 83 | {'func': 'た', 84 | 'head': '誘っ', 85 | 'link_from': [1, 2], 86 | 'link_to': 4, 87 | 'score': 1.565227, 88 | 'words': [{'feature': '動詞,自立,*,*,五段・ワ行促音便,連用タ接続,誘う,サソッ,サソッ', 89 | 'funcexp': 'O', 90 | 'surface': '誘っ'}, 91 | {'feature': '助動詞,*,*,*,特殊・タ,基本形,た,タ,タ', 92 | 'funcexp': 'B:完了', 93 | 'surface': 'た'}]}], 94 | 'sentiment': '0', 95 | 'source': '筆者', 96 | 'tense': '非未来', 97 | 'type': '叙述', 98 | 'word': '誘っ', 99 | 'words': '太郎を食事に誘った'}, 100 | {'assumptional': '0', 101 | 'authenticity': '成立', 102 | 'chunks': [{'func': 'は', 103 | 'head': '花子', 104 | 'link_from': [], 105 | 'link_to': 5, 106 | 'score': -1.81792, 107 | 'words': [{'feature': '名詞,固有名詞,人名,名,*,*,花子,ハナコ,ハナコ', 108 | 'funcexp': 'O', 109 | 'surface': '花子'}, 110 | {'feature': '助詞,係助詞,*,*,*,*,は,ハ,ワ', 'funcexp': 'O', 'surface': 'は'}]}, 111 | {'func': 'が', 112 | 'head': '裕子', 113 | 'link_from': [3], 114 | 'link_to': 5, 115 | 'score': -1.81792, 116 | 'words': [{'feature': '名詞,固有名詞,人名,名,*,*,裕子,ユウコ,ユーコ', 117 | 'funcexp': 'O', 118 | 'surface': '裕子'}, 119 | {'feature': '助詞,格助詞,一般,*,*,*,が,ガ,ガ', 'funcexp': 'O', 'surface': 'が'}]}, 120 | {'func': 'た', 121 | 'head': '嫌い', 122 | 'link_from': [0, 4], 123 | 'link_to': -1, 124 | 'score': 0.0, 125 | 'words': [{'feature': '名詞,形容動詞語幹,*,*,*,*,嫌い,キライ,キライ', 126 | 'funcexp': 'O', 127 | 'surface': '嫌い'}, 128 | {'feature': '助動詞,*,*,*,特殊・ダ,連用タ接続,だ,ダッ,ダッ', 129 | 'funcexp': 'B:判断', 130 | 'surface': 'だっ'}, 131 | {'feature': '助動詞,*,*,*,特殊・タ,基本形,た,タ,タ', 132 | 'funcexp': 'B:完了', 133 | 'surface': 'た'}]}], 134 | 'sentiment': '0', 135 | 'source': '筆者', 136 | 'tense': '非未来', 137 | 'type': '叙述', 138 | 'word': '嫌い', 139 | 'words': '花子は裕子が嫌いだった'}] 140 | 141 | LICENSE 142 | ========= 143 | 144 | MIT License 145 | 146 | 147 | Copyright 148 | ============= 149 | 150 | Zunda Python 151 | (c) 2019- Yukino Ikegami. All Rights Reserved. 152 | 153 | Zunda (Original version) 154 | (c) 2013- @jmizuno 155 | 156 | ACKNOWLEDGEMENT 157 | ================= 158 | 159 | This module uses Zunda. 160 | I thank to @jmizuno and Tohoku University Inui-Okazaki Lab. 161 | 162 | 163 | .. |pyversion| image:: https://img.shields.io/pypi/pyversions/zunda-python.svg 164 | 165 | .. |version| image:: https://img.shields.io/pypi/v/zunda-python.svg 166 | :target: http://pypi.python.org/pypi/zunda-python/ 167 | :alt: latest version 168 | 169 | .. |license| image:: https://img.shields.io/pypi/l/zunda-python.svg 170 | :target: http://pypi.python.org/pypi/zunda-python/ 171 | :alt: license 172 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from codecs import open 3 | import os 4 | import re 5 | from setuptools import setup 6 | 7 | with open(os.path.join('zunda', '__init__.py'), 'r', encoding='utf8') as f: 8 | version = re.compile( 9 | r'.*__version__ = "(.*?)"', re.S).match(f.read()).group(1) 10 | 11 | setup( 12 | name='zunda-python', 13 | packages=['zunda'], 14 | version=version, 15 | license='MIT License', 16 | platforms=['POSIX', 'Unix', 'MacOS'], 17 | description='Zunda: Japanese Enhanced Modality Analyzer client for Python', 18 | author='Yukino Ikegami', 19 | author_email='yknikgm@gmail.com', 20 | url='https://github.com/ikegami-yukino/zunda-python', 21 | keywords=['modality analyzer'], 22 | classifiers=[ 23 | 'Development Status :: 3 - Alpha', 24 | 'Intended Audience :: Developers', 25 | 'Intended Audience :: Information Technology', 26 | 'Intended Audience :: Science/Research', 27 | 'License :: OSI Approved :: MIT License', 28 | 'Natural Language :: Japanese', 29 | 'Programming Language :: Python :: 3.3', 30 | 'Programming Language :: Python :: 3.4', 31 | 'Programming Language :: Python :: 3.5', 32 | 'Programming Language :: Python :: 3.6', 33 | 'Programming Language :: Python :: 3.7', 34 | 'Topic :: Text Processing :: Linguistic' 35 | ], 36 | data_files=[('', ['README.rst', 'CHANGES.rst'])], 37 | long_description='%s\n\n%s' % (open('README.rst', encoding='utf8').read(), 38 | open('CHANGES.rst', encoding='utf8').read()), 39 | ) 40 | -------------------------------------------------------------------------------- /zunda/__init__.py: -------------------------------------------------------------------------------- 1 | from .zunda import Parser 2 | 3 | VERSION = (0, 1, 3) 4 | __version__ = "0.1.3" 5 | __all__ = ["Parser"] 6 | -------------------------------------------------------------------------------- /zunda/zunda.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from subprocess import Popen, PIPE 3 | 4 | 5 | class Parser(object): 6 | """Zunda: Japanese Enhanced Modality Analyzer 7 | 8 | Zunda is an extended modality analyzer for Japanese. 9 | Please see details in https://jmizuno.github.io/zunda/ (written in Japanese) 10 | And this module requires installing Zunda, which is available at https://github.com/jmizuno/zunda/releases 11 | 12 | >>> import zunda 13 | >>> parser = zunda.Parser() 14 | >>> parser.parse('花子は太郎を食事に誘った裕子が嫌いだった') 15 | [{'assumptional': '0', 16 | 'authenticity': '成立', 17 | 'chunks': [{'func': 'に', 18 | 'head': '食事', 19 | 'link_from': [], 20 | 'link_to': 3, 21 | 'score': 1.883877, 22 | 'words': [{'feature': '名詞,サ変接続,*,*,*,*,食事,ショクジ,ショクジ', 23 | 'funcexp': 'O', 24 | 'surface': '食事'}, 25 | {'feature': '助詞,格助詞,一般,*,*,*,に,ニ,ニ', 26 | 'funcexp': 'B:判断', 27 | 'surface': 'に'}]}], 28 | 'sentiment': '0', 29 | 'source': '筆者', 30 | 'tense': '非未来', 31 | 'type': '叙述', 32 | 'word': '食事', 33 | 'words': '食事に'}, 34 | {'assumptional': '0', 35 | 'authenticity': '成立', 36 | 'chunks': [{'func': 'を', 37 | 'head': '太郎', 38 | 'link_from': [], 39 | 'link_to': 3, 40 | 'score': 1.640671, 41 | 'words': [{'feature': '名詞,固有名詞,地域,一般,*,*,太郎,タロウ,タロー', 42 | 'funcexp': 'O', 43 | 'surface': '太郎'}, 44 | {'feature': '助詞,格助詞,一般,*,*,*,を,ヲ,ヲ', 'funcexp': 'O', 'surface': 'を'}]}, 45 | {'func': 'に', 46 | 'head': '食事', 47 | 'link_from': [], 48 | 'link_to': 3, 49 | 'score': 1.883877, 50 | 'words': [{'feature': '名詞,サ変接続,*,*,*,*,食事,ショクジ,ショクジ', 51 | 'funcexp': 'O', 52 | 'surface': '食事'}, 53 | {'feature': '助詞,格助詞,一般,*,*,*,に,ニ,ニ', 'funcexp': 'B:判断', 'surface': 'に'}]}, 54 | {'func': 'た', 55 | 'head': '誘っ', 56 | 'link_from': [1, 2], 57 | 'link_to': 4, 58 | 'score': 1.565227, 59 | 'words': [{'feature': '動詞,自立,*,*,五段・ワ行促音便,連用タ接続,誘う,サソッ,サソッ', 60 | 'funcexp': 'O', 61 | 'surface': '誘っ'}, 62 | {'feature': '助動詞,*,*,*,特殊・タ,基本形,た,タ,タ', 63 | 'funcexp': 'B:完了', 64 | 'surface': 'た'}]}], 65 | 'sentiment': '0', 66 | 'source': '筆者', 67 | 'tense': '非未来', 68 | 'type': '叙述', 69 | 'word': '誘っ', 70 | 'words': '太郎を食事に誘った'}, 71 | {'assumptional': '0', 72 | 'authenticity': '成立', 73 | 'chunks': [{'func': 'は', 74 | 'head': '花子', 75 | 'link_from': [], 76 | 'link_to': 5, 77 | 'score': -1.81792, 78 | 'words': [{'feature': '名詞,固有名詞,人名,名,*,*,花子,ハナコ,ハナコ', 79 | 'funcexp': 'O', 80 | 'surface': '花子'}, 81 | {'feature': '助詞,係助詞,*,*,*,*,は,ハ,ワ', 'funcexp': 'O', 'surface': 'は'}]}, 82 | {'func': 'が', 83 | 'head': '裕子', 84 | 'link_from': [3], 85 | 'link_to': 5, 86 | 'score': -1.81792, 87 | 'words': [{'feature': '名詞,固有名詞,人名,名,*,*,裕子,ユウコ,ユーコ', 88 | 'funcexp': 'O', 89 | 'surface': '裕子'}, 90 | {'feature': '助詞,格助詞,一般,*,*,*,が,ガ,ガ', 'funcexp': 'O', 'surface': 'が'}]}, 91 | {'func': 'た', 92 | 'head': '嫌い', 93 | 'link_from': [0, 4], 94 | 'link_to': -1, 95 | 'score': 0.0, 96 | 'words': [{'feature': '名詞,形容動詞語幹,*,*,*,*,嫌い,キライ,キライ', 97 | 'funcexp': 'O', 98 | 'surface': '嫌い'}, 99 | {'feature': '助動詞,*,*,*,特殊・ダ,連用タ接続,だ,ダッ,ダッ', 100 | 'funcexp': 'B:判断', 101 | 'surface': 'だっ'}, 102 | {'feature': '助動詞,*,*,*,特殊・タ,基本形,た,タ,タ', 103 | 'funcexp': 'B:完了', 104 | 'surface': 'た'}]}], 105 | 'sentiment': '0', 106 | 'source': '筆者', 107 | 'tense': '非未来', 108 | 'type': '叙述', 109 | 'word': '嫌い', 110 | 'words': '花子は裕子が嫌いだった'}] 111 | """ 112 | 113 | def __init__(self, zunda_args='', encoding='utf-8'): 114 | """ 115 | Params: 116 | zunda_args (str) : argument for zunda 117 | encoding (str) : character encoding (default utf-8) 118 | """ 119 | self.zunda_args = zunda_args 120 | self.encoding = encoding 121 | 122 | def _parse_zunda_return(self, zunda_return): 123 | events = [] 124 | chunks = [] 125 | word_count = 0 126 | for line in zunda_return.splitlines()[:-1]: # The last line is EOS 127 | if not line: 128 | continue 129 | elif line.startswith('#FUNCEXP'): 130 | funcexp_str = line.split('\t')[1] 131 | funcexp = funcexp_str.split(',') 132 | elif line.startswith('#EVENT'): 133 | event_info = line.split('\t') 134 | event = {'word': int(event_info[1]), 'source': event_info[2].split(':')[1], 135 | 'tense': event_info[3], 'assumptional': event_info[4], 136 | 'type': event_info[5], 'authenticity': event_info[6], 137 | 'sentiment': event_info[7], 'chunks': []} 138 | events.append(event) 139 | elif line.startswith('* '): 140 | chunk_info = line.split(' ') 141 | chunk = {'link_to': int(chunk_info[2][:-1]), 'link_from': [], 142 | 'head': int(chunk_info[3].split('/')[0]), 143 | 'func': int(chunk_info[3].split('/')[1]), 144 | 'score': float(chunk_info[4]), 'words': []} 145 | chunks.append(chunk) 146 | else: 147 | (surface, feature) = line.split('\t') 148 | chunks[-1]['words'].append({'surface': surface, 'feature': feature, 149 | 'funcexp': funcexp[word_count]}) 150 | word_count += 1 151 | 152 | for (i, chunk) in enumerate(chunks): 153 | if chunk['link_to'] != -1: 154 | chunks[chunk['link_to']]['link_from'].append(i) 155 | chunks[i]['head'] = chunk['words'][chunks[i]['head']]['surface'] 156 | chunks[i]['func'] = chunk['words'][chunks[i]['func']]['surface'] 157 | 158 | word_count = 0 159 | for (i, event) in enumerate(events): 160 | for (j, chunk) in enumerate(chunks): 161 | for word in chunk['words']: 162 | if event['word'] == word_count: 163 | events[i]['word'] = word['surface'] 164 | for link_chunk in chunk['link_from']: 165 | events[i]['chunks'].append(chunks[link_chunk]) 166 | events[i]['chunks'].append(chunk) 167 | events[i]['words'] = ''.join([word['surface'] for chunk in events[i]['chunks'] for word in chunk['words']]) 168 | word_count += 1 169 | word_count = 0 170 | return events 171 | 172 | def parse(self, sentence): 173 | """Parse the sentence 174 | Param: 175 | sentence (str) 176 | Return: 177 | events (list of dict) 178 | """ 179 | cmd = 'echo %s| zunda %s' % (sentence, self.zunda_args) 180 | with Popen(cmd, shell=True, stdout=PIPE) as proc: 181 | zunda_return = proc.communicate()[0].decode(self.encoding) 182 | events = self._parse_zunda_return(zunda_return) 183 | return events 184 | --------------------------------------------------------------------------------