├── .gitignore
├── CHANGES.rst
├── LICENSE
├── MANIFEST.in
├── README.rst
├── setup.py
└── zunda
    ├── __init__.py
    └── zunda.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/CHANGES.rst:
--------------------------------------------------------------------------------
 1 | CHANGES
 2 | =======
 3 | 
 4 | 0.1.3 (2019-11-30)
 5 | -------------------------
 6 | 
 7 | - bugfix for installation on conda (thanks @Kensuke-Mitsuzawa)
 8 | 
 9 | 0.1.2 (2019-02-24)
10 | -------------------------
11 | 
12 | - First release.
13 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 IKEGAMI Yukino
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.rst
2 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | Zunda Python
  2 | ===================
  3 | 
  4 | |pyversion| |version| |license|
  5 | 
  6 | Zunda: Japanese Enhanced Modality Analyzer client for Python.
  7 | 
  8 | Zunda is an extended modality analyzer for Japanese.
  9 | For details about Zunda, See https://jmizuno.github.io/zunda/ (Written in Japanese)
 10 | 
 11 | this module requires installing Zunda, which is available at (https://github.com/jmizuno/zunda/releases), CaboCha (https://taku910.github.io/cabocha/), and MeCab (http://taku910.github.io/mecab/).
 12 | 
 13 | 
 14 | Contributions are welcome!
 15 | 
 16 | 
 17 | Installation
 18 | ==============
 19 | 
 20 | ::
 21 | 
 22 |  # Install Zunda
 23 |  wget https://github.com/jmizuno/zunda/archive/2.0b4.tar.gz
 24 |  tar xzf zunda-2.0b4.tar.gz
 25 |  rm zunda-2.0b4.tar.gz
 26 |  cd zunda-2.0b4
 27 |  ./configure
 28 |  make
 29 |  sudo make install
 30 |  cd ../
 31 |  rm -rf zunda-2.0b4
 32 | 
 33 |  # Install zunda-python
 34 |  pip install zunda-python
 35 | 
 36 | Example
 37 | ===========
 38 | 
 39 | .. code:: python
 40 | 
 41 |     import zunda
 42 |     parser = zunda.Parser()
 43 |     parser.parse('花子は太郎を食事に誘った裕子が嫌いだった')
 44 |     # => [{'assumptional': '0',
 45 |       'authenticity': '成立',
 46 |       'chunks': [{'func': 'に',
 47 |         'head': '食事',
 48 |         'link_from': [],
 49 |         'link_to': 3,
 50 |         'score': 1.883877,
 51 |         'words': [{'feature': '名詞,サ変接続,*,*,*,*,食事,ショクジ,ショクジ',
 52 |           'funcexp': 'O',
 53 |           'surface': '食事'},
 54 |          {'feature': '助詞,格助詞,一般,*,*,*,に,ニ,ニ',
 55 |           'funcexp': 'B:判断',
 56 |           'surface': 'に'}]}],
 57 |       'sentiment': '0',
 58 |       'source': '筆者',
 59 |       'tense': '非未来',
 60 |       'type': '叙述',
 61 |       'word': '食事',
 62 |       'words': '食事に'},
 63 |      {'assumptional': '0',
 64 |       'authenticity': '成立',
 65 |       'chunks': [{'func': 'を',
 66 |         'head': '太郎',
 67 |         'link_from': [],
 68 |         'link_to': 3,
 69 |         'score': 1.640671,
 70 |         'words': [{'feature': '名詞,固有名詞,地域,一般,*,*,太郎,タロウ,タロー',
 71 |           'funcexp': 'O',
 72 |           'surface': '太郎'},
 73 |          {'feature': '助詞,格助詞,一般,*,*,*,を,ヲ,ヲ', 'funcexp': 'O', 'surface': 'を'}]},
 74 |        {'func': 'に',
 75 |         'head': '食事',
 76 |         'link_from': [],
 77 |         'link_to': 3,
 78 |         'score': 1.883877,
 79 |         'words': [{'feature': '名詞,サ変接続,*,*,*,*,食事,ショクジ,ショクジ',
 80 |           'funcexp': 'O',
 81 |           'surface': '食事'},
 82 |          {'feature': '助詞,格助詞,一般,*,*,*,に,ニ,ニ', 'funcexp': 'B:判断', 'surface': 'に'}]},
 83 |        {'func': 'た',
 84 |         'head': '誘っ',
 85 |         'link_from': [1, 2],
 86 |         'link_to': 4,
 87 |         'score': 1.565227,
 88 |         'words': [{'feature': '動詞,自立,*,*,五段・ワ行促音便,連用タ接続,誘う,サソッ,サソッ',
 89 |           'funcexp': 'O',
 90 |           'surface': '誘っ'},
 91 |          {'feature': '助動詞,*,*,*,特殊・タ,基本形,た,タ,タ',
 92 |           'funcexp': 'B:完了',
 93 |           'surface': 'た'}]}],
 94 |       'sentiment': '0',
 95 |       'source': '筆者',
 96 |       'tense': '非未来',
 97 |       'type': '叙述',
 98 |       'word': '誘っ',
 99 |       'words': '太郎を食事に誘った'},
100 |      {'assumptional': '0',
101 |       'authenticity': '成立',
102 |       'chunks': [{'func': 'は',
103 |         'head': '花子',
104 |         'link_from': [],
105 |         'link_to': 5,
106 |         'score': -1.81792,
107 |         'words': [{'feature': '名詞,固有名詞,人名,名,*,*,花子,ハナコ,ハナコ',
108 |           'funcexp': 'O',
109 |           'surface': '花子'},
110 |          {'feature': '助詞,係助詞,*,*,*,*,は,ハ,ワ', 'funcexp': 'O', 'surface': 'は'}]},
111 |        {'func': 'が',
112 |         'head': '裕子',
113 |         'link_from': [3],
114 |         'link_to': 5,
115 |         'score': -1.81792,
116 |         'words': [{'feature': '名詞,固有名詞,人名,名,*,*,裕子,ユウコ,ユーコ',
117 |           'funcexp': 'O',
118 |           'surface': '裕子'},
119 |          {'feature': '助詞,格助詞,一般,*,*,*,が,ガ,ガ', 'funcexp': 'O', 'surface': 'が'}]},
120 |        {'func': 'た',
121 |         'head': '嫌い',
122 |         'link_from': [0, 4],
123 |         'link_to': -1,
124 |         'score': 0.0,
125 |         'words': [{'feature': '名詞,形容動詞語幹,*,*,*,*,嫌い,キライ,キライ',
126 |           'funcexp': 'O',
127 |           'surface': '嫌い'},
128 |          {'feature': '助動詞,*,*,*,特殊・ダ,連用タ接続,だ,ダッ,ダッ',
129 |           'funcexp': 'B:判断',
130 |           'surface': 'だっ'},
131 |          {'feature': '助動詞,*,*,*,特殊・タ,基本形,た,タ,タ',
132 |           'funcexp': 'B:完了',
133 |           'surface': 'た'}]}],
134 |       'sentiment': '0',
135 |       'source': '筆者',
136 |       'tense': '非未来',
137 |       'type': '叙述',
138 |       'word': '嫌い',
139 |       'words': '花子は裕子が嫌いだった'}]
140 | 
141 | LICENSE
142 | =========
143 | 
144 | MIT License
145 | 
146 | 
147 | Copyright
148 | =============
149 | 
150 | Zunda Python
151 | (c) 2019- Yukino Ikegami. All Rights Reserved.
152 | 
153 | Zunda (Original version)
154 | (c) 2013- @jmizuno
155 | 
156 | ACKNOWLEDGEMENT
157 | =================
158 | 
159 | This module uses Zunda.
160 | I thank to @jmizuno and Tohoku University Inui-Okazaki Lab.
161 | 
162 | 
163 | .. |pyversion| image:: https://img.shields.io/pypi/pyversions/zunda-python.svg
164 | 
165 | .. |version| image:: https://img.shields.io/pypi/v/zunda-python.svg
166 |     :target: http://pypi.python.org/pypi/zunda-python/
167 |     :alt: latest version
168 | 
169 | .. |license| image:: https://img.shields.io/pypi/l/zunda-python.svg
170 |     :target: http://pypi.python.org/pypi/zunda-python/
171 |     :alt: license
172 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from codecs import open
 3 | import os
 4 | import re
 5 | from setuptools import setup
 6 | 
 7 | with open(os.path.join('zunda', '__init__.py'), 'r', encoding='utf8') as f:
 8 |     version = re.compile(
 9 |         r'.*__version__ = "(.*?)"', re.S).match(f.read()).group(1)
10 | 
11 | setup(
12 |     name='zunda-python',
13 |     packages=['zunda'],
14 |     version=version,
15 |     license='MIT License',
16 |     platforms=['POSIX', 'Unix', 'MacOS'],
17 |     description='Zunda: Japanese Enhanced Modality Analyzer client for Python',
18 |     author='Yukino Ikegami',
19 |     author_email='yknikgm@gmail.com',
20 |     url='https://github.com/ikegami-yukino/zunda-python',
21 |     keywords=['modality analyzer'],
22 |     classifiers=[
23 |         'Development Status :: 3 - Alpha',
24 |         'Intended Audience :: Developers',
25 |         'Intended Audience :: Information Technology',
26 |         'Intended Audience :: Science/Research',
27 |         'License :: OSI Approved :: MIT License',
28 |         'Natural Language :: Japanese',
29 |         'Programming Language :: Python :: 3.3',
30 |         'Programming Language :: Python :: 3.4',
31 |         'Programming Language :: Python :: 3.5',
32 |         'Programming Language :: Python :: 3.6',
33 |         'Programming Language :: Python :: 3.7',
34 |         'Topic :: Text Processing :: Linguistic'
35 |         ],
36 |     data_files=[('', ['README.rst', 'CHANGES.rst'])],
37 |     long_description='%s\n\n%s' % (open('README.rst', encoding='utf8').read(),
38 |                                    open('CHANGES.rst', encoding='utf8').read()),
39 | )
40 | 


--------------------------------------------------------------------------------
/zunda/__init__.py:
--------------------------------------------------------------------------------
1 | from .zunda import Parser
2 | 
3 | VERSION = (0, 1, 3)
4 | __version__ = "0.1.3"
5 | __all__ = ["Parser"]
6 | 


--------------------------------------------------------------------------------
/zunda/zunda.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from subprocess import Popen, PIPE
  3 | 
  4 | 
  5 | class Parser(object):
  6 |     """Zunda: Japanese Enhanced Modality Analyzer
  7 | 
  8 |     Zunda is an extended modality analyzer for Japanese.
  9 |     Please see details in https://jmizuno.github.io/zunda/ (written in Japanese)
 10 |     And this module requires installing Zunda, which is available at https://github.com/jmizuno/zunda/releases
 11 | 
 12 |     >>> import zunda
 13 |     >>> parser = zunda.Parser()
 14 |     >>> parser.parse('花子は太郎を食事に誘った裕子が嫌いだった')
 15 |     [{'assumptional': '0',
 16 |       'authenticity': '成立',
 17 |       'chunks': [{'func': 'に',
 18 |         'head': '食事',
 19 |         'link_from': [],
 20 |         'link_to': 3,
 21 |         'score': 1.883877,
 22 |         'words': [{'feature': '名詞,サ変接続,*,*,*,*,食事,ショクジ,ショクジ',
 23 |           'funcexp': 'O',
 24 |           'surface': '食事'},
 25 |          {'feature': '助詞,格助詞,一般,*,*,*,に,ニ,ニ',
 26 |           'funcexp': 'B:判断',
 27 |           'surface': 'に'}]}],
 28 |       'sentiment': '0',
 29 |       'source': '筆者',
 30 |       'tense': '非未来',
 31 |       'type': '叙述',
 32 |       'word': '食事',
 33 |       'words': '食事に'},
 34 |      {'assumptional': '0',
 35 |       'authenticity': '成立',
 36 |       'chunks': [{'func': 'を',
 37 |         'head': '太郎',
 38 |         'link_from': [],
 39 |         'link_to': 3,
 40 |         'score': 1.640671,
 41 |         'words': [{'feature': '名詞,固有名詞,地域,一般,*,*,太郎,タロウ,タロー',
 42 |           'funcexp': 'O',
 43 |           'surface': '太郎'},
 44 |          {'feature': '助詞,格助詞,一般,*,*,*,を,ヲ,ヲ', 'funcexp': 'O', 'surface': 'を'}]},
 45 |        {'func': 'に',
 46 |         'head': '食事',
 47 |         'link_from': [],
 48 |         'link_to': 3,
 49 |         'score': 1.883877,
 50 |         'words': [{'feature': '名詞,サ変接続,*,*,*,*,食事,ショクジ,ショクジ',
 51 |           'funcexp': 'O',
 52 |           'surface': '食事'},
 53 |          {'feature': '助詞,格助詞,一般,*,*,*,に,ニ,ニ', 'funcexp': 'B:判断', 'surface': 'に'}]},
 54 |        {'func': 'た',
 55 |         'head': '誘っ',
 56 |         'link_from': [1, 2],
 57 |         'link_to': 4,
 58 |         'score': 1.565227,
 59 |         'words': [{'feature': '動詞,自立,*,*,五段・ワ行促音便,連用タ接続,誘う,サソッ,サソッ',
 60 |           'funcexp': 'O',
 61 |           'surface': '誘っ'},
 62 |          {'feature': '助動詞,*,*,*,特殊・タ,基本形,た,タ,タ',
 63 |           'funcexp': 'B:完了',
 64 |           'surface': 'た'}]}],
 65 |       'sentiment': '0',
 66 |       'source': '筆者',
 67 |       'tense': '非未来',
 68 |       'type': '叙述',
 69 |       'word': '誘っ',
 70 |       'words': '太郎を食事に誘った'},
 71 |      {'assumptional': '0',
 72 |       'authenticity': '成立',
 73 |       'chunks': [{'func': 'は',
 74 |         'head': '花子',
 75 |         'link_from': [],
 76 |         'link_to': 5,
 77 |         'score': -1.81792,
 78 |         'words': [{'feature': '名詞,固有名詞,人名,名,*,*,花子,ハナコ,ハナコ',
 79 |           'funcexp': 'O',
 80 |           'surface': '花子'},
 81 |          {'feature': '助詞,係助詞,*,*,*,*,は,ハ,ワ', 'funcexp': 'O', 'surface': 'は'}]},
 82 |        {'func': 'が',
 83 |         'head': '裕子',
 84 |         'link_from': [3],
 85 |         'link_to': 5,
 86 |         'score': -1.81792,
 87 |         'words': [{'feature': '名詞,固有名詞,人名,名,*,*,裕子,ユウコ,ユーコ',
 88 |           'funcexp': 'O',
 89 |           'surface': '裕子'},
 90 |          {'feature': '助詞,格助詞,一般,*,*,*,が,ガ,ガ', 'funcexp': 'O', 'surface': 'が'}]},
 91 |        {'func': 'た',
 92 |         'head': '嫌い',
 93 |         'link_from': [0, 4],
 94 |         'link_to': -1,
 95 |         'score': 0.0,
 96 |         'words': [{'feature': '名詞,形容動詞語幹,*,*,*,*,嫌い,キライ,キライ',
 97 |           'funcexp': 'O',
 98 |           'surface': '嫌い'},
 99 |          {'feature': '助動詞,*,*,*,特殊・ダ,連用タ接続,だ,ダッ,ダッ',
100 |           'funcexp': 'B:判断',
101 |           'surface': 'だっ'},
102 |          {'feature': '助動詞,*,*,*,特殊・タ,基本形,た,タ,タ',
103 |           'funcexp': 'B:完了',
104 |           'surface': 'た'}]}],
105 |       'sentiment': '0',
106 |       'source': '筆者',
107 |       'tense': '非未来',
108 |       'type': '叙述',
109 |       'word': '嫌い',
110 |       'words': '花子は裕子が嫌いだった'}]
111 |     """
112 | 
113 |     def __init__(self, zunda_args='', encoding='utf-8'):
114 |         """
115 |         Params:
116 |             zunda_args (str) : argument for zunda
117 |             encoding (str) : character encoding (default utf-8)
118 |         """
119 |         self.zunda_args = zunda_args
120 |         self.encoding = encoding
121 | 
122 |     def _parse_zunda_return(self, zunda_return):
123 |         events = []
124 |         chunks = []
125 |         word_count = 0
126 |         for line in zunda_return.splitlines()[:-1]:  # The last line is EOS
127 |             if not line:
128 |                 continue
129 |             elif line.startswith('#FUNCEXP'):
130 |                 funcexp_str = line.split('\t')[1]
131 |                 funcexp = funcexp_str.split(',')
132 |             elif line.startswith('#EVENT'):
133 |                 event_info = line.split('\t')
134 |                 event = {'word': int(event_info[1]), 'source': event_info[2].split(':')[1],
135 |                          'tense': event_info[3], 'assumptional': event_info[4],
136 |                          'type': event_info[5], 'authenticity': event_info[6],
137 |                          'sentiment': event_info[7], 'chunks': []}
138 |                 events.append(event)
139 |             elif line.startswith('* '):
140 |                 chunk_info = line.split(' ')
141 |                 chunk = {'link_to': int(chunk_info[2][:-1]), 'link_from': [],
142 |                          'head': int(chunk_info[3].split('/')[0]),
143 |                          'func': int(chunk_info[3].split('/')[1]),
144 |                          'score': float(chunk_info[4]), 'words': []}
145 |                 chunks.append(chunk)
146 |             else:
147 |                 (surface, feature) = line.split('\t')
148 |                 chunks[-1]['words'].append({'surface': surface, 'feature': feature,
149 |                                             'funcexp': funcexp[word_count]})
150 |                 word_count += 1
151 | 
152 |         for (i, chunk) in enumerate(chunks):
153 |             if chunk['link_to'] != -1:
154 |                 chunks[chunk['link_to']]['link_from'].append(i)
155 |             chunks[i]['head'] = chunk['words'][chunks[i]['head']]['surface']
156 |             chunks[i]['func'] = chunk['words'][chunks[i]['func']]['surface']
157 | 
158 |         word_count = 0
159 |         for (i, event) in enumerate(events):
160 |             for (j, chunk) in enumerate(chunks):
161 |                 for word in chunk['words']:
162 |                     if event['word'] == word_count:
163 |                         events[i]['word'] = word['surface']
164 |                         for link_chunk in chunk['link_from']:
165 |                             events[i]['chunks'].append(chunks[link_chunk])
166 |                         events[i]['chunks'].append(chunk)
167 |                         events[i]['words'] = ''.join([word['surface'] for chunk in events[i]['chunks'] for word in chunk['words']])
168 |                     word_count += 1
169 |             word_count = 0
170 |         return events
171 | 
172 |     def parse(self, sentence):
173 |         """Parse the sentence
174 |         Param:
175 |             sentence (str)
176 |         Return:
177 |             events (list of dict)
178 |         """
179 |         cmd = 'echo %s| zunda %s' % (sentence, self.zunda_args)
180 |         with Popen(cmd, shell=True, stdout=PIPE) as proc:
181 |             zunda_return = proc.communicate()[0].decode(self.encoding)
182 |         events = self._parse_zunda_return(zunda_return)
183 |         return events
184 | 


--------------------------------------------------------------------------------